From e6d8d88bcef5a388a0a7a0669d9418d25194f138 Mon Sep 17 00:00:00 2001
From: TennyZhuang <zty0826@gmail.com>
Date: Thu, 22 Feb 2024 14:10:12 +0800
Subject: [PATCH 01/24] feat(expr): allow explicit cast serial to bigint
 (#15184)

Signed-off-by: TennyZhuang <zty0826@gmail.com>
---
 e2e_test/batch/catalog/pg_cast.slt.part      |  5 +--
 src/common/src/types/serial.rs               |  6 ++++
 src/expr/impl/src/scalar/cast.rs             |  1 +
 src/frontend/src/expr/type_inference/cast.rs | 33 ++++++++++----------
 4 files changed, 27 insertions(+), 18 deletions(-)
diff --git a/e2e_test/batch/catalog/pg_cast.slt.part b/e2e_test/batch/catalog/pg_cast.slt.part
index b8ab68a5ed5cd..b1558d1e144c4 100644
--- a/e2e_test/batch/catalog/pg_cast.slt.part
+++ b/e2e_test/batch/catalog/pg_cast.slt.part
@@ -82,8 +82,9 @@ SELECT * FROM pg_catalog.pg_cast;
 78 3802 701 e
 79 3802 1700 e
 80 3802 1043 a
-81 1301 701 e
-82 1301 1043 a
+81 20 20 e
+82 1301 701 e
+83 1301 1043 a
 
 query TT rowsort
 SELECT s.typname, t.typname
diff --git a/src/common/src/types/serial.rs b/src/common/src/types/serial.rs
index 9bfbf5e4fcac7..5c84c95fa0f7a 100644
--- a/src/common/src/types/serial.rs
+++ b/src/common/src/types/serial.rs
@@ -26,6 +26,12 @@ use crate::util::row_id::RowId;
 #[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Default, Hash)]
 pub struct Serial(i64);
 
+impl From<Serial> for i64 {
+    fn from(value: Serial) -> i64 {
+        value.0
+    }
+}
+
 impl From<i64> for Serial {
     fn from(value: i64) -> Self {
         Self(value)
diff --git a/src/expr/impl/src/scalar/cast.rs b/src/expr/impl/src/scalar/cast.rs
index dc81e3ab77bac..bf8afc7712f93 100644
--- a/src/expr/impl/src/scalar/cast.rs
+++ b/src/expr/impl/src/scalar/cast.rs
@@ -87,6 +87,7 @@ pub fn jsonb_to_number<T: TryFrom<F64>>(v: JsonbRef<'_>) -> Result<T> {
 #[function("cast(int4) -> int2")]
 #[function("cast(int8) -> int2")]
 #[function("cast(int8) -> int4")]
+#[function("cast(serial) -> int8")]
 #[function("cast(float4) -> int2")]
 #[function("cast(float8) -> int2")]
 #[function("cast(float4) -> int4")]
diff --git a/src/frontend/src/expr/type_inference/cast.rs b/src/frontend/src/expr/type_inference/cast.rs
index aa7e1c8ee9192..1f1a96e92b826 100644
--- a/src/frontend/src/expr/type_inference/cast.rs
+++ b/src/frontend/src/expr/type_inference/cast.rs
@@ -216,22 +216,23 @@ pub static CAST_MAP: LazyLock<CastMap> = LazyLock::new(|| {
     use DataTypeName::*;
     const CAST_TABLE: &[(&str, DataTypeName)] = &[
         // 123456789ABCDEF
-        (". e            a", Boolean),     // 0
-        (" .iiiiii       a", Int16),       // 1
-        ("ea.iiiii       a", Int32),       // 2
-        (" aa.iiii       a", Int64),       // 3
-        (" aaa.ii        a", Decimal),     // 4
-        (" aaaa.i        a", Float32),     // 5
-        (" aaaaa.        a", Float64),     // 6
-        ("      e.       a", Int256),      // 7
-        ("        .ii    a", Date),        // 8
-        ("        a.ia   a", Timestamp),   // 9
-        ("        aa.a   a", Timestamptz), // A
-        ("           .i  a", Time),        // B
-        ("           a.  a", Interval),    // C
-        ("eeeeeee      . a", Jsonb),       // D
-        ("              .a", Bytea),       // E
-        ("eeeeeeeeeeeeeee.", Varchar),     // F
+        (". e            a ", Boolean),     // 0
+        (" .iiiiii       a ", Int16),       // 1
+        ("ea.iiiii       a ", Int32),       // 2
+        (" aa.iiii       a ", Int64),       // 3
+        (" aaa.ii        a ", Decimal),     // 4
+        (" aaaa.i        a ", Float32),     // 5
+        (" aaaaa.        a ", Float64),     // 6
+        ("      e.       a ", Int256),      // 7
+        ("        .ii    a ", Date),        // 8
+        ("        a.ia   a ", Timestamp),   // 9
+        ("        aa.a   a ", Timestamptz), // A
+        ("           .i  a ", Time),        // B
+        ("           a.  a ", Interval),    // C
+        ("eeeeeee      . a ", Jsonb),       // D
+        ("              .a ", Bytea),       // E
+        ("eeeeeeeeeeeeeee. ", Varchar),     // F
+        ("   e            .", Serial),
     ];
     let mut map = BTreeMap::new();
     for (row, source) in CAST_TABLE {

From d6a1089d8f8cdef395aeace1bb33040f20d421f5 Mon Sep 17 00:00:00 2001
From: Mike Wang <52522981+mikechesterwang@users.noreply.github.com>
Date: Fri, 23 Feb 2024 09:49:50 +0800
Subject: [PATCH 02/24] fix: broken link in README (#15145)

Co-authored-by: TennyZhuang <zty0826@gmail.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 44443cfab8282..1611af1815175 100644
--- a/README.md
+++ b/README.md
@@ -72,7 +72,7 @@ Don’t have Docker? Learn how to install RisingWave on Mac, Ubuntu, and other e
 
 ## Production deployments
 
-For **single-node deployment**, please refer to [Docker Compose](https://docs.risingwave.com/docs/current/risingwave-trial/?method=docker-compose).
+For **single-node deployment**, please refer to [Docker Compose](https://docs.risingwave.com/docs/current/risingwave-docker-compose/).
 
 For **distributed deployment**, please refer to [Kubernetes with Helm](https://docs.risingwave.com/docs/current/risingwave-k8s-helm/) or [Kubernetes with Operator](https://docs.risingwave.com/docs/current/risingwave-kubernetes/).
 

From 34bb7e339e6bd1594e8c524cb581a0874fc58684 Mon Sep 17 00:00:00 2001
From: William Wen <44139337+wenym1@users.noreply.github.com>
Date: Fri, 23 Feb 2024 09:50:53 +0800
Subject: [PATCH 03/24] chore: upgrade declared pg version to 13.14.0 (#15177)

---
 e2e_test/batch/catalog/version.slt.part   | 4 ++--
 e2e_test/batch/functions/setting.slt.part | 6 +++---
 src/common/src/lib.rs                     | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/e2e_test/batch/catalog/version.slt.part b/e2e_test/batch/catalog/version.slt.part
index b2ba9e2a877c5..dc3e0399b1e6a 100644
--- a/e2e_test/batch/catalog/version.slt.part
+++ b/e2e_test/batch/catalog/version.slt.part
@@ -1,4 +1,4 @@
 query T
-select substring(version() from 1 for 14);
+select substring(version() from 1 for 16);
 ----
-PostgreSQL 9.5
+PostgreSQL 13.14
diff --git a/e2e_test/batch/functions/setting.slt.part b/e2e_test/batch/functions/setting.slt.part
index 77d1d80e46590..233399d80a025 100644
--- a/e2e_test/batch/functions/setting.slt.part
+++ b/e2e_test/batch/functions/setting.slt.part
@@ -1,12 +1,12 @@
 query T
 SELECT current_setting('server_version');
 ----
-9.5.0
+13.14.0
 
 query I
-SELECT CAST(current_setting('server_version_num') AS INT) / 100 AS version;
+SELECT current_setting('server_version_num') AS version;
 ----
-905
+130014
 
 query T
 SELECT set_config('client_min_messages', 'warning', false);
diff --git a/src/common/src/lib.rs b/src/common/src/lib.rs
index 980897d5636e7..313c0bada6616 100644
--- a/src/common/src/lib.rs
+++ b/src/common/src/lib.rs
@@ -92,9 +92,9 @@ pub const UNKNOWN_GIT_SHA: &str = "unknown";
 
 // The single source of truth of the pg parameters, Used in ConfigMap and current_cluster_version.
 // The version of PostgreSQL that Risingwave claims to be.
-pub const PG_VERSION: &str = "9.5.0";
+pub const PG_VERSION: &str = "13.14.0";
 /// The version of PostgreSQL that Risingwave claims to be.
-pub const SERVER_VERSION_NUM: i32 = 90500;
+pub const SERVER_VERSION_NUM: i32 = 130014;
 /// Shows the server-side character set encoding. At present, this parameter can be shown but not set, because the encoding is determined at database creation time. It is also the default value of `client_encoding`.
 pub const SERVER_ENCODING: &str = "UTF8";
 /// see <https://www.postgresql.org/docs/current/runtime-config-client.html#GUC-STANDARD-CONFORMING-STRINGS>

From e223b9f165f63ca5c63e4940a31547543a646073 Mon Sep 17 00:00:00 2001
From: Li0k <yuli@singularity-data.com>
Date: Fri, 23 Feb 2024 10:28:05 +0800
Subject: [PATCH 04/24] fix(storage): fix expired timeout sec (#15194)

---
 src/meta/node/src/lib.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/meta/node/src/lib.rs b/src/meta/node/src/lib.rs
index 2e770fb841ada..8d7c4253631d5 100644
--- a/src/meta/node/src/lib.rs
+++ b/src/meta/node/src/lib.rs
@@ -254,7 +254,7 @@ pub fn start(opts: MetaNodeOpts) -> Pin<Box<dyn Future<Output = ()> + Send>> {
 
         const MIN_TIMEOUT_INTERVAL_SEC: u64 = 20;
         let compaction_task_max_progress_interval_secs = {
-            config
+            (config
                 .storage
                 .object_store
                 .object_store_read_timeout_ms
@@ -271,7 +271,8 @@ pub fn start(opts: MetaNodeOpts) -> Pin<Box<dyn Future<Output = ()> + Send>> {
                         .object_store
                         .object_store_streaming_upload_timeout_ms,
                 )
-                .max(config.meta.compaction_task_max_progress_interval_secs)
+                .max(config.meta.compaction_task_max_progress_interval_secs * 1000))
+                / 1000
         } + MIN_TIMEOUT_INTERVAL_SEC;
 
         let (mut join_handle, leader_lost_handle, shutdown_send) = rpc_serve(

From 41e723b08c4de17db2a31ea5909df7c5ca6beb2a Mon Sep 17 00:00:00 2001
From: Li0k <yuli@singularity-data.com>
Date: Fri, 23 Feb 2024 10:30:35 +0800
Subject: [PATCH 05/24] feat(storage): improve block memory usage (#15024)

---
 src/storage/src/hummock/file_cache/store.rs |  9 +--
 src/storage/src/hummock/sstable/block.rs    | 79 ++++++++++++++-------
 src/storage/src/hummock/sstable/builder.rs  | 20 ++++--
 3 files changed, 71 insertions(+), 37 deletions(-)

diff --git a/src/storage/src/hummock/file_cache/store.rs b/src/storage/src/hummock/file_cache/store.rs
index 3435227bd317b..c640ba8f1db58 100644
--- a/src/storage/src/hummock/file_cache/store.rs
+++ b/src/storage/src/hummock/file_cache/store.rs
@@ -701,13 +701,8 @@ mod tests {
         builder.add_for_test(construct_full_key_struct(0, b"k3", 3), b"v03");
         builder.add_for_test(construct_full_key_struct(0, b"k4", 4), b"v04");
 
-        Box::new(
-            Block::decode(
-                builder.build().to_vec().into(),
-                builder.uncompressed_block_size(),
-            )
-            .unwrap(),
-        )
+        let uncompress = builder.uncompressed_block_size();
+        Box::new(Block::decode(builder.build().to_vec().into(), uncompress).unwrap())
     }
 
     fn sstable_for_test() -> Sstable {
diff --git a/src/storage/src/hummock/sstable/block.rs b/src/storage/src/hummock/sstable/block.rs
index 3d0b4f8c0f770..fe465bba5b41f 100644
--- a/src/storage/src/hummock/sstable/block.rs
+++ b/src/storage/src/hummock/sstable/block.rs
@@ -215,20 +215,20 @@ impl Block {
                 let mut decoder = lz4::Decoder::new(compressed_data.reader())
                     .map_err(HummockError::decode_error)?;
                 let mut decoded = Vec::with_capacity(uncompressed_capacity);
-                decoder
+                let read_size = decoder
                     .read_to_end(&mut decoded)
                     .map_err(HummockError::decode_error)?;
-                debug_assert_eq!(decoded.capacity(), uncompressed_capacity);
+                assert_eq!(read_size, uncompressed_capacity);
                 Bytes::from(decoded)
             }
             CompressionAlgorithm::Zstd => {
                 let mut decoder = zstd::Decoder::new(compressed_data.reader())
                     .map_err(HummockError::decode_error)?;
                 let mut decoded = Vec::with_capacity(uncompressed_capacity);
-                decoder
+                let read_size = decoder
                     .read_to_end(&mut decoded)
                     .map_err(HummockError::decode_error)?;
-                debug_assert_eq!(decoded.capacity(), uncompressed_capacity);
+                assert_eq!(read_size, uncompressed_capacity);
                 Bytes::from(decoded)
             }
         };
@@ -445,6 +445,8 @@ impl Default for BlockBuilderOptions {
 pub struct BlockBuilder {
     /// Write buffer.
     buf: BytesMut,
+    /// Compress buffer
+    compress_buf: BytesMut,
     /// Entry interval between restart points.
     restart_count: usize,
     /// Restart points.
@@ -465,8 +467,9 @@ pub struct BlockBuilder {
 impl BlockBuilder {
     pub fn new(options: BlockBuilderOptions) -> Self {
         Self {
-            // add more space to avoid re-allocate space.
-            buf: BytesMut::with_capacity(options.capacity + 256),
+            // add more space to avoid re-allocate space. (for restart_points and restart_points_type_index)
+            buf: BytesMut::with_capacity(Self::buf_reserve_size(&options)),
+            compress_buf: BytesMut::default(),
             restart_count: options.restart_interval,
             restart_points: Vec::with_capacity(
                 options.capacity / DEFAULT_ENTRY_SIZE / options.restart_interval + 1,
@@ -664,22 +667,35 @@ impl BlockBuilder {
         );
 
         self.buf.put_u32_le(self.table_id.unwrap());
-        if self.compression_algorithm != CompressionAlgorithm::None {
-            self.buf = Self::compress(&self.buf[..], self.compression_algorithm);
-        }
+        let result_buf = if self.compression_algorithm != CompressionAlgorithm::None {
+            self.compress_buf.clear();
+            self.compress_buf = Self::compress(
+                &self.buf[..],
+                self.compression_algorithm,
+                std::mem::take(&mut self.compress_buf),
+            );
+
+            &mut self.compress_buf
+        } else {
+            &mut self.buf
+        };
 
-        self.compression_algorithm.encode(&mut self.buf);
-        let checksum = xxhash64_checksum(&self.buf);
-        self.buf.put_u64_le(checksum);
+        self.compression_algorithm.encode(result_buf);
+        let checksum = xxhash64_checksum(result_buf);
+        result_buf.put_u64_le(checksum);
         assert!(
-            self.buf.len() < (u32::MAX) as usize,
+            result_buf.len() < (u32::MAX) as usize,
             "buf_len {} entry_count {} table {:?}",
-            self.buf.len(),
+            result_buf.len(),
             self.entry_count,
             self.table_id
         );
 
-        self.buf.as_ref()
+        if self.compression_algorithm != CompressionAlgorithm::None {
+            self.compress_buf.as_ref()
+        } else {
+            self.buf.as_ref()
+        }
     }
 
     pub fn compress_block(
@@ -693,21 +709,29 @@ impl BlockBuilder {
         let compression = CompressionAlgorithm::decode(&mut &buf[buf.len() - 9..buf.len() - 8])?;
         let compressed_data = &buf[..buf.len() - 9];
         assert_eq!(compression, CompressionAlgorithm::None);
-        let mut writer = Self::compress(compressed_data, target_compression);
+        let mut compress_writer = Self::compress(
+            compressed_data,
+            target_compression,
+            BytesMut::with_capacity(buf.len()),
+        );
 
-        target_compression.encode(&mut writer);
-        let checksum = xxhash64_checksum(&writer);
-        writer.put_u64_le(checksum);
-        Ok(writer.freeze())
+        target_compression.encode(&mut compress_writer);
+        let checksum = xxhash64_checksum(&compress_writer);
+        compress_writer.put_u64_le(checksum);
+        Ok(compress_writer.freeze())
     }
 
-    pub fn compress(buf: &[u8], compression_algorithm: CompressionAlgorithm) -> BytesMut {
+    pub fn compress(
+        buf: &[u8],
+        compression_algorithm: CompressionAlgorithm,
+        compress_writer: BytesMut,
+    ) -> BytesMut {
         match compression_algorithm {
             CompressionAlgorithm::None => unreachable!(),
             CompressionAlgorithm::Lz4 => {
                 let mut encoder = lz4::EncoderBuilder::new()
                     .level(4)
-                    .build(BytesMut::with_capacity(buf.len()).writer())
+                    .build(compress_writer.writer())
                     .map_err(HummockError::encode_error)
                     .unwrap();
                 encoder
@@ -719,10 +743,9 @@ impl BlockBuilder {
                 writer.into_inner()
             }
             CompressionAlgorithm::Zstd => {
-                let mut encoder =
-                    zstd::Encoder::new(BytesMut::with_capacity(buf.len()).writer(), 4)
-                        .map_err(HummockError::encode_error)
-                        .unwrap();
+                let mut encoder = zstd::Encoder::new(compress_writer.writer(), 4)
+                    .map_err(HummockError::encode_error)
+                    .unwrap();
                 encoder
                     .write_all(buf)
                     .map_err(HummockError::encode_error)
@@ -762,6 +785,10 @@ impl BlockBuilder {
     pub fn table_id(&self) -> Option<u32> {
         self.table_id
     }
+
+    fn buf_reserve_size(option: &BlockBuilderOptions) -> usize {
+        option.capacity + 1024 + 256
+    }
 }
 
 #[cfg(test)]
diff --git a/src/storage/src/hummock/sstable/builder.rs b/src/storage/src/hummock/sstable/builder.rs
index 4fe331f677321..ebaa60e167056 100644
--- a/src/storage/src/hummock/sstable/builder.rs
+++ b/src/storage/src/hummock/sstable/builder.rs
@@ -240,7 +240,6 @@ impl<W: SstableWriter, F: FilterBuilder> SstableBuilder<W, F> {
         self.add(full_key, value).await
     }
 
-    /// only for test
     pub fn current_block_size(&self) -> usize {
         self.block_builder.approximate_len()
     }
@@ -344,6 +343,12 @@ impl<W: SstableWriter, F: FilterBuilder> SstableBuilder<W, F> {
             || !user_key(&self.raw_key).eq(user_key(&self.last_full_key));
         let table_id = full_key.user_key.table_id.table_id();
         let is_new_table = self.last_table_id.is_none() || self.last_table_id.unwrap() != table_id;
+        let current_block_size = self.current_block_size();
+        let is_block_full = current_block_size >= self.options.block_capacity
+            || (current_block_size > self.options.block_capacity / 4 * 3
+                && current_block_size + self.raw_value.len() + self.raw_key.len()
+                    > self.options.block_capacity);
+
         if is_new_table {
             assert!(
                 could_switch_block,
@@ -356,9 +361,7 @@ impl<W: SstableWriter, F: FilterBuilder> SstableBuilder<W, F> {
             if !self.block_builder.is_empty() {
                 self.build_block().await?;
             }
-        } else if self.block_builder.approximate_len() >= self.options.block_capacity
-            && could_switch_block
-        {
+        } else if is_block_full && could_switch_block {
             self.build_block().await?;
         }
         self.last_table_stats.total_key_count += 1;
@@ -704,6 +707,15 @@ impl<W: SstableWriter, F: FilterBuilder> SstableBuilder<W, F> {
                 data_len, block_meta.offset
             )
         });
+
+        if data_len as usize > self.options.capacity * 2 {
+            tracing::warn!(
+                "WARN unexpected block size {} table {:?}",
+                data_len,
+                self.block_builder.table_id()
+            );
+        }
+
         self.block_builder.clear();
         Ok(())
     }

From 65550a0e0c81282ffb4d5286bd23bada5519172b Mon Sep 17 00:00:00 2001
From: congyi wang <58715567+wcy-fdu@users.noreply.github.com>
Date: Fri, 23 Feb 2024 10:31:26 +0800
Subject: [PATCH 06/24] fix(object_store): fs and hdfs object store should set
 atomic_write_dir (#15155)

---
 risedev.yml                                           | 11 +++++++++++
 src/object_store/src/object/opendal_engine/fs.rs      |  6 ++++--
 src/object_store/src/object/opendal_engine/hdfs.rs    |  5 ++++-
 src/object_store/src/object/opendal_engine/mod.rs     |  3 +++
 src/object_store/src/object/opendal_engine/webhdfs.rs |  3 +++
 5 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/risedev.yml b/risedev.yml
index 69b0c23b05dd3..22356f2e1ac89 100644
--- a/risedev.yml
+++ b/risedev.yml
@@ -164,6 +164,17 @@ profile:
       - use: compactor
       # - use: prometheus
       # - use: grafana
+  fs:
+    steps:
+      # - use: etcd
+      - use: meta-node
+      - use: compute-node
+      - use: frontend
+      - use: opendal
+        engine: fs
+      - use: compactor
+      # - use: prometheus
+      # - use: grafana
   webhdfs:
     steps:
       # - use: etcd
diff --git a/src/object_store/src/object/opendal_engine/fs.rs b/src/object_store/src/object/opendal_engine/fs.rs
index 23d7dcbd503e8..ece3555d5b777 100644
--- a/src/object_store/src/object/opendal_engine/fs.rs
+++ b/src/object_store/src/object/opendal_engine/fs.rs
@@ -17,15 +17,17 @@ use opendal::services::Fs;
 use opendal::Operator;
 
 use super::{EngineType, OpendalObjectStore};
+use crate::object::opendal_engine::ATOMIC_WRITE_DIR;
 use crate::object::ObjectResult;
+
 impl OpendalObjectStore {
     /// create opendal fs engine.
     pub fn new_fs_engine(root: String) -> ObjectResult<Self> {
         // Create fs backend builder.
         let mut builder = Fs::default();
-
         builder.root(&root);
-
+        let atomic_write_dir = format!("{}/{}", root, ATOMIC_WRITE_DIR);
+        builder.atomic_write_dir(&atomic_write_dir);
         let op: Operator = Operator::new(builder)?
             .layer(RetryLayer::default())
             .finish();
diff --git a/src/object_store/src/object/opendal_engine/hdfs.rs b/src/object_store/src/object/opendal_engine/hdfs.rs
index b52be4094df80..12ee292a85416 100644
--- a/src/object_store/src/object/opendal_engine/hdfs.rs
+++ b/src/object_store/src/object/opendal_engine/hdfs.rs
@@ -17,7 +17,9 @@ use opendal::services::Hdfs;
 use opendal::Operator;
 
 use super::{EngineType, OpendalObjectStore};
+use crate::object::opendal_engine::ATOMIC_WRITE_DIR;
 use crate::object::ObjectResult;
+
 impl OpendalObjectStore {
     /// create opendal hdfs engine.
     pub fn new_hdfs_engine(namenode: String, root: String) -> ObjectResult<Self> {
@@ -26,7 +28,8 @@ impl OpendalObjectStore {
         // Set the name node for hdfs.
         builder.name_node(&namenode);
         builder.root(&root);
-
+        let atomic_write_dir = format!("{}/{}", root, ATOMIC_WRITE_DIR);
+        builder.atomic_write_dir(&atomic_write_dir);
         let op: Operator = Operator::new(builder)?
             .layer(LoggingLayer::default())
             .layer(RetryLayer::default())
diff --git a/src/object_store/src/object/opendal_engine/mod.rs b/src/object_store/src/object/opendal_engine/mod.rs
index ccaba375a1302..c1ab929d5586f 100644
--- a/src/object_store/src/object/opendal_engine/mod.rs
+++ b/src/object_store/src/object/opendal_engine/mod.rs
@@ -31,3 +31,6 @@ pub mod opendal_s3;
 pub mod oss;
 
 pub mod fs;
+
+// To make sure the the operation is consistent, we should specially set `atomic_write_dir` for fs, hdfs and webhdfs services.
+const ATOMIC_WRITE_DIR: &str = "atomic_write_dir/";
diff --git a/src/object_store/src/object/opendal_engine/webhdfs.rs b/src/object_store/src/object/opendal_engine/webhdfs.rs
index ff61b39ec9e79..1f6b87b44fd5e 100644
--- a/src/object_store/src/object/opendal_engine/webhdfs.rs
+++ b/src/object_store/src/object/opendal_engine/webhdfs.rs
@@ -17,6 +17,7 @@ use opendal::services::Webhdfs;
 use opendal::Operator;
 
 use super::{EngineType, OpendalObjectStore};
+use crate::object::opendal_engine::ATOMIC_WRITE_DIR;
 use crate::object::ObjectResult;
 
 impl OpendalObjectStore {
@@ -30,6 +31,8 @@ impl OpendalObjectStore {
         // NOTE: the root must be absolute path.
         builder.root(&root);
 
+        let atomic_write_dir = format!("{}/{}", root, ATOMIC_WRITE_DIR);
+        builder.atomic_write_dir(&atomic_write_dir);
         let op: Operator = Operator::new(builder)?
             .layer(LoggingLayer::default())
             .layer(RetryLayer::default())

From b5e3a22b270e26c439f354f021be201ebd3ab729 Mon Sep 17 00:00:00 2001
From: congyi wang <58715567+wcy-fdu@users.noreply.github.com>
Date: Fri, 23 Feb 2024 10:31:35 +0800
Subject: [PATCH 07/24] refactor(object store): use AssumeRoleWithWebIdentity
 for opendal s3 (#15182)

---
 .../src/object/opendal_engine/opendal_s3.rs   | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/src/object_store/src/object/opendal_engine/opendal_s3.rs b/src/object_store/src/object/opendal_engine/opendal_s3.rs
index 425d0a7576691..c10aff55d342b 100644
--- a/src/object_store/src/object/opendal_engine/opendal_s3.rs
+++ b/src/object_store/src/object/opendal_engine/opendal_s3.rs
@@ -31,30 +31,11 @@ impl OpendalObjectStore {
         // Create s3 builder.
         let mut builder = S3::default();
         builder.bucket(&bucket);
-
         // For AWS S3, there is no need to set an endpoint; for other S3 compatible object stores, it is necessary to set this field.
         if let Ok(endpoint_url) = std::env::var("RW_S3_ENDPOINT") {
             builder.endpoint(&endpoint_url);
         }
 
-        if let Ok(region) = std::env::var("AWS_REGION") {
-            builder.region(&region);
-        } else {
-            tracing::error!("aws s3 region is not set, bucket {}", bucket);
-        }
-
-        if let Ok(access) = std::env::var("AWS_ACCESS_KEY_ID") {
-            builder.access_key_id(&access);
-        } else {
-            tracing::error!("access key id of aws s3 is not set, bucket {}", bucket);
-        }
-
-        if let Ok(secret) = std::env::var("AWS_SECRET_ACCESS_KEY") {
-            builder.secret_access_key(&secret);
-        } else {
-            tracing::error!("secret access key of aws s3 is not set, bucket {}", bucket);
-        }
-
         if std::env::var("RW_IS_FORCE_PATH_STYLE").is_err() {
             builder.enable_virtual_host_style();
         }

From d8cca2ab7f34e63733f91fb0431a50ef42f0da40 Mon Sep 17 00:00:00 2001
From: Xinhao Xu <84456268+xxhZs@users.noreply.github.com>
Date: Fri, 23 Feb 2024 10:42:07 +0800
Subject: [PATCH 08/24] feat(test): Add starrocks redis doris cassandra e2e
 test (#14142)

---
 ci/docker-compose.yml                      | 123 +++++++++++++++++++--
 ci/scripts/e2e-cassandra-sink-test.sh      |  65 +++++++++++
 ci/scripts/e2e-clickhouse-sink-test.sh     |   2 +-
 ci/scripts/e2e-deltalake-sink-rust-test.sh |   3 +-
 ci/scripts/e2e-doris-sink-test.sh          |  59 ++++++++++
 ci/scripts/e2e-pulsar-sink-test.sh         |   2 +-
 ci/scripts/e2e-redis-sink-test.sh          |  48 ++++++++
 ci/scripts/e2e-starrocks-sink-test.sh      |  58 ++++++++++
 ci/workflows/main-cron.yml                 |  88 +++++++++++++++
 ci/workflows/pull-request.yml              |  69 ++++++++++++
 e2e_test/sink/cassandra_sink.slt           |  33 ++++++
 e2e_test/sink/doris_sink.slt               |  34 ++++++
 e2e_test/sink/redis_sink.slt               |  41 +++++++
 e2e_test/sink/starrocks_sink.slt           |  36 ++++++
 risedev.yml                                |  22 +---
 src/connector/src/sink/starrocks.rs        |  14 +--
 src/connector/with_options_sink.yaml       |   2 +
 17 files changed, 660 insertions(+), 39 deletions(-)
 create mode 100755 ci/scripts/e2e-cassandra-sink-test.sh
 create mode 100755 ci/scripts/e2e-doris-sink-test.sh
 create mode 100755 ci/scripts/e2e-redis-sink-test.sh
 create mode 100755 ci/scripts/e2e-starrocks-sink-test.sh
 create mode 100644 e2e_test/sink/cassandra_sink.slt
 create mode 100644 e2e_test/sink/doris_sink.slt
 create mode 100644 e2e_test/sink/redis_sink.slt
 create mode 100644 e2e_test/sink/starrocks_sink.slt

diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml
index 4a9f2970b84c7..db017be647376 100644
--- a/ci/docker-compose.yml
+++ b/ci/docker-compose.yml
@@ -88,10 +88,27 @@ services:
       - message_queue
       - elasticsearch
       - clickhouse-server
-      - pulsar
+      - redis-server
+      - pulsar-server
+      - cassandra-server
+      - starrocks-fe-server
+      - starrocks-be-server
     volumes:
       - ..:/risingwave
 
+  sink-doris-env:
+    image: public.ecr.aws/x5u3w5h6/rw-build-env:v20231109
+    depends_on:
+      - doris-fe-server
+      - doris-be-server
+    volumes:
+      - ..:/risingwave
+    command: >
+      sh -c "sudo sysctl -w vm.max_map_count=2000000"
+    networks:
+      mynetwork:
+        ipv4_address: 172.121.0.4
+
   rw-build-env:
     image: public.ecr.aws/x5u3w5h6/rw-build-env:v20240213
     volumes:
@@ -159,10 +176,96 @@ services:
     expose:
       - 9009
 
-# Temporary workaround for json schema registry test since redpanda only supports
-# protobuf/avro schema registry. Should be removed after the support.
-# Related tracking issue:
-# https://github.com/redpanda-data/redpanda/issues/1878
+  redis-server:
+    container_name: redis-server
+    image: 'redis:latest'
+    expose:
+      - 6379
+    ports:
+      - 6378:6379
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 5s
+      timeout: 30s
+      retries: 50
+
+  doris-fe-server:
+    platform: linux/amd64
+    image: apache/doris:2.0.0_alpha-fe-x86_64
+    hostname: doris-fe-server
+    command: >
+      sh -c "sudo sysctl -w vm.max_map_count=2000000"
+    environment:
+     - FE_SERVERS=fe1:172.121.0.2:9010
+     - FE_ID=1
+    ports:
+      - "8030:8030"
+      - "9030:9030"
+    networks:
+      mynetwork:
+        ipv4_address: 172.121.0.2
+
+  doris-be-server:
+    platform: linux/amd64
+    image: apache/doris:2.0.0_alpha-be-x86_64
+    hostname: doris-be-server
+    command: >
+      sh -c "sudo sysctl -w vm.max_map_count=2000000"
+    environment:
+     - FE_SERVERS=fe1:172.121.0.2:9010
+     - BE_ADDR=172.121.0.3:9050
+    depends_on:
+      - doris-fe-server
+    ports:
+      - "9050:9050"
+    networks:
+      mynetwork:
+        ipv4_address: 172.121.0.3
+
+  cassandra-server:
+    container_name: cassandra-server
+    image: cassandra:4.0
+    ports:
+      - 9042:9042
+    environment:
+      - CASSANDRA_CLUSTER_NAME=cloudinfra
+
+  starrocks-fe-server:
+    container_name: starrocks-fe-server
+    image: starrocks/fe-ubuntu:3.1.7
+    hostname: starrocks-fe-server
+    command:
+      /opt/starrocks/fe/bin/start_fe.sh
+    ports:
+      - 28030:8030
+      - 29020:9020
+      - 29030:9030
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:9030"]
+      interval: 5s
+      timeout: 5s
+      retries: 30
+
+  starrocks-be-server:
+    image: starrocks/be-ubuntu:3.1.7
+    command:
+      - /bin/bash
+      - -c
+      - |
+        sleep 15s; mysql --connect-timeout 2 -h starrocks-fe-server -P9030 -uroot -e "alter system add backend \"starrocks-be-server:9050\";"
+        /opt/starrocks/be/bin/start_be.sh
+    ports:
+      - 28040:8040
+      - 29050:9050
+    hostname: starrocks-be-server
+    container_name: starrocks-be-server
+    depends_on:
+      - starrocks-fe-server
+
+# # Temporary workaround for json schema registry test since redpanda only supports
+# # protobuf/avro schema registry. Should be removed after the support.
+# # Related tracking issue:
+# # https://github.com/redpanda-data/redpanda/issues/1878
   zookeeper:
     container_name: zookeeper
     image: confluentinc/cp-zookeeper:latest
@@ -201,8 +304,8 @@ services:
       KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9093,PLAINTEXT_INTERNAL://localhost:29093
       KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
 
-  pulsar:
-    container_name: pulsar
+  pulsar-server:
+    container_name: pulsar-server
     image: apachepulsar/pulsar:latest
     command: bin/pulsar standalone
     ports:
@@ -216,3 +319,9 @@ services:
       interval: 5s
       timeout: 5s
       retries: 5
+networks:
+  mynetwork:
+    ipam:
+      config:
+        - subnet: 172.121.80.0/16
+  default:
diff --git a/ci/scripts/e2e-cassandra-sink-test.sh b/ci/scripts/e2e-cassandra-sink-test.sh
new file mode 100755
index 0000000000000..c393d510d19a2
--- /dev/null
+++ b/ci/scripts/e2e-cassandra-sink-test.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+
+# Exits as soon as any line fails.
+set -euo pipefail
+
+source ci/scripts/common.sh
+
+# prepare environment
+export CONNECTOR_LIBS_PATH="./connector-node/libs"
+
+while getopts 'p:' opt; do
+    case ${opt} in
+        p )
+            profile=$OPTARG
+            ;;
+        \? )
+            echo "Invalid Option: -$OPTARG" 1>&2
+            exit 1
+            ;;
+        : )
+            echo "Invalid option: $OPTARG requires an argument" 1>&2
+            ;;
+    esac
+done
+shift $((OPTIND -1))
+
+download_and_prepare_rw "$profile" source
+
+echo "--- Download connector node package"
+buildkite-agent artifact download risingwave-connector.tar.gz ./
+mkdir ./connector-node
+tar xf ./risingwave-connector.tar.gz -C ./connector-node
+
+echo "--- starting risingwave cluster"
+cargo make ci-start ci-sink-test
+sleep 1
+
+echo "--- create cassandra table"
+curl https://downloads.apache.org/cassandra/4.1.3/apache-cassandra-4.1.3-bin.tar.gz  --output apache-cassandra-4.1.3-bin.tar.gz
+tar xfvz apache-cassandra-4.1.3-bin.tar.gz
+cd apache-cassandra-4.1.3/bin
+export CQLSH_HOST=cassandra-server
+export CQLSH_PORT=9042
+./cqlsh -e "CREATE KEYSPACE demo WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};use demo;
+CREATE table demo_bhv_table(v1 int primary key,v2 smallint,v3 bigint,v4 float,v5 double,v6 text,v7 date,v8 timestamp,v9 boolean);"
+
+echo "--- testing sinks"
+cd ../../
+sqllogictest -p 4566 -d dev './e2e_test/sink/cassandra_sink.slt'
+sleep 1
+cd apache-cassandra-4.1.3/bin
+./cqlsh -e "COPY demo.demo_bhv_table TO './query_result.csv' WITH HEADER = false AND ENCODING = 'UTF-8';"
+
+if cat ./query_result.csv | awk -F "," '{
+    exit !($1 == 1 && $2 == 1 && $3 == 1 && $4 == 1.1 && $5 == 1.2 && $6 == "test" && $7 == "2013-01-01" && $8 == "2013-01-01 01:01:01.000+0000" && $9 == "False\r"); }'; then
+  echo "Cassandra sink check passed"
+else
+  cat ./query_result.csv
+  echo "The output is not as expected."
+  exit 1
+fi
+
+echo "--- Kill cluster"
+cd ../../
+cargo make ci-kill
\ No newline at end of file
diff --git a/ci/scripts/e2e-clickhouse-sink-test.sh b/ci/scripts/e2e-clickhouse-sink-test.sh
index 3464bd3c3c14d..c14d83e8c4281 100755
--- a/ci/scripts/e2e-clickhouse-sink-test.sh
+++ b/ci/scripts/e2e-clickhouse-sink-test.sh
@@ -24,7 +24,7 @@ shift $((OPTIND -1))
 download_and_prepare_rw "$profile" source
 
 echo "--- starting risingwave cluster"
-cargo make ci-start ci-clickhouse-test
+cargo make ci-start ci-sink-test
 sleep 1
 
 
diff --git a/ci/scripts/e2e-deltalake-sink-rust-test.sh b/ci/scripts/e2e-deltalake-sink-rust-test.sh
index 71ff1eede8e4d..cc0c287e8b572 100755
--- a/ci/scripts/e2e-deltalake-sink-rust-test.sh
+++ b/ci/scripts/e2e-deltalake-sink-rust-test.sh
@@ -32,8 +32,7 @@ mkdir ./connector-node
 tar xf ./risingwave-connector.tar.gz -C ./connector-node
 
 echo "--- starting risingwave cluster"
-mkdir -p .risingwave/log
-cargo make ci-start ci-deltalake-test
+cargo make ci-start ci-sink-test
 sleep 1
 
 # prepare minio deltalake sink
diff --git a/ci/scripts/e2e-doris-sink-test.sh b/ci/scripts/e2e-doris-sink-test.sh
new file mode 100755
index 0000000000000..30bfdaf129e26
--- /dev/null
+++ b/ci/scripts/e2e-doris-sink-test.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+
+# Exits as soon as any line fails.
+set -euo pipefail
+
+source ci/scripts/common.sh
+
+while getopts 'p:' opt; do
+    case ${opt} in
+        p )
+            profile=$OPTARG
+            ;;
+        \? )
+            echo "Invalid Option: -$OPTARG" 1>&2
+            exit 1
+            ;;
+        : )
+            echo "Invalid option: $OPTARG requires an argument" 1>&2
+            ;;
+    esac
+done
+shift $((OPTIND -1))
+
+download_and_prepare_rw "$profile" source
+
+echo "--- starting risingwave cluster"
+cargo make ci-start ci-sink-test
+sleep 1
+
+echo "--- create doris table"
+apt-get update -y && apt-get install -y mysql-client
+sleep 2
+mysql -uroot -P 9030 -h doris-fe-server -e "CREATE database demo;use demo;
+CREATE table demo_bhv_table(v1 int,v2 smallint,v3 bigint,v4 float,v5 double,v6 string,v7 datev2,v8 datetime,v9 boolean) UNIQUE KEY(\`v1\`)
+DISTRIBUTED BY HASH(\`v1\`) BUCKETS 1
+PROPERTIES (
+    \"replication_allocation\" = \"tag.location.default: 1\"
+);
+CREATE USER 'users'@'%' IDENTIFIED BY '123456';
+GRANT ALL ON *.* TO 'users'@'%';"
+sleep 2
+
+echo "--- testing sinks"
+sqllogictest -p 4566 -d dev './e2e_test/sink/doris_sink.slt'
+sleep 1
+mysql -uroot -P 9030 -h doris-fe-server -e "select * from demo.demo_bhv_table" > ./query_result.csv
+
+
+if cat ./query_result.csv | sed '1d; s/\t/,/g' | awk -F "," '{
+    exit !($1 == 1 && $2 == 1 && $3 == 1 && $4 == 1.1 && $5 == 1.2 && $6 == "test" && $7 == "2013-01-01" && $8 == "2013-01-01 01:01:01" && $9 == 0); }'; then
+  echo "Doris sink check passed"
+else
+  cat ./query_result.csv
+  echo "The output is not as expected."
+  exit 1
+fi
+
+echo "--- Kill cluster"
+cargo make ci-kill
\ No newline at end of file
diff --git a/ci/scripts/e2e-pulsar-sink-test.sh b/ci/scripts/e2e-pulsar-sink-test.sh
index ee8848832f940..f942ad945b3e9 100755
--- a/ci/scripts/e2e-pulsar-sink-test.sh
+++ b/ci/scripts/e2e-pulsar-sink-test.sh
@@ -21,7 +21,7 @@ shift $((OPTIND -1))
 download_and_prepare_rw "$profile" source
 
 echo "--- starting risingwave cluster"
-cargo make ci-start ci-pulsar-test
+cargo make ci-start ci-sink-test
 sleep 1
 
 echo "--- waiting until pulsar is healthy"
diff --git a/ci/scripts/e2e-redis-sink-test.sh b/ci/scripts/e2e-redis-sink-test.sh
new file mode 100755
index 0000000000000..cf64662db4051
--- /dev/null
+++ b/ci/scripts/e2e-redis-sink-test.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+# Exits as soon as any line fails.
+set -euo pipefail
+
+source ci/scripts/common.sh
+
+while getopts 'p:' opt; do
+    case ${opt} in
+        p )
+            profile=$OPTARG
+            ;;
+        \? )
+            echo "Invalid Option: -$OPTARG" 1>&2
+            exit 1
+            ;;
+        : )
+            echo "Invalid option: $OPTARG requires an argument" 1>&2
+            ;;
+    esac
+done
+shift $((OPTIND -1))
+
+download_and_prepare_rw "$profile" source
+
+echo "--- starting risingwave cluster"
+cargo make ci-start ci-sink-test
+apt-get update -y && apt-get install -y redis-server
+sleep 1
+
+echo "--- testing sinks"
+sqllogictest -p 4566 -d dev './e2e_test/sink/redis_sink.slt'
+sleep 1
+
+redis-cli -h redis-server -p 6379 get {\"v1\":1} >> ./query_result.txt
+redis-cli -h redis-server -p 6379 get V1:1 >> ./query_result.txt
+
+# check sink destination using shell
+if cat ./query_result.txt | tr '\n' '\0' | xargs -0 -n1 bash -c '[[ "$0" == "{\"v1\":1,\"v2\":1,\"v3\":1,\"v4\":1.100000023841858,\"v5\":1.2,\"v6\":\"test\",\"v7\":734869,\"v8\":\"2013-01-01T01:01:01.000000Z\",\"v9\":false}" || "$0" == "V2:1,V3:1" ]]'; then
+    echo "Redis sink check passed"
+else
+    cat ./query_result.txt
+  echo "The output is not as expected."
+  exit 1
+fi
+
+echo "--- Kill cluster"
+cargo make ci-kill
\ No newline at end of file
diff --git a/ci/scripts/e2e-starrocks-sink-test.sh b/ci/scripts/e2e-starrocks-sink-test.sh
new file mode 100755
index 0000000000000..256f4448f9198
--- /dev/null
+++ b/ci/scripts/e2e-starrocks-sink-test.sh
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+
+# Exits as soon as any line fails.
+set -euo pipefail
+
+source ci/scripts/common.sh
+
+while getopts 'p:' opt; do
+    case ${opt} in
+        p )
+            profile=$OPTARG
+            ;;
+        \? )
+            echo "Invalid Option: -$OPTARG" 1>&2
+            exit 1
+            ;;
+        : )
+            echo "Invalid option: $OPTARG requires an argument" 1>&2
+            ;;
+    esac
+done
+shift $((OPTIND -1))
+
+download_and_prepare_rw "$profile" source
+
+echo "--- starting risingwave cluster"
+cargo make ci-start ci-sink-test
+sleep 1
+
+
+echo "--- create starrocks table"
+apt-get update -y && apt-get install -y mysql-client
+sleep 2
+mysql -uroot -P 9030 -h starrocks-fe-server -e "CREATE database demo;use demo;
+CREATE table demo_bhv_table(v1 int,v2 smallint,v3 bigint,v4 float,v5 double,v6 string,v7 date,v8 datetime,v9 boolean,v10 json) ENGINE=OLAP
+PRIMARY KEY(\`v1\`)
+DISTRIBUTED BY HASH(\`v1\`) properties(\"replication_num\" = \"1\");
+CREATE USER 'users'@'%' IDENTIFIED BY '123456';
+GRANT ALL ON *.* TO 'users'@'%';"
+sleep 2
+
+echo "--- testing sinks"
+sqllogictest -p 4566 -d dev './e2e_test/sink/starrocks_sink.slt'
+sleep 1
+mysql -uroot -P 9030 -h starrocks-fe-server -e "select * from demo.demo_bhv_table" > ./query_result.csv
+
+
+if cat ./query_result.csv | sed '1d; s/\t/,/g' | awk -F "," '{
+    exit !($1 == 1 && $2 == 1 && $3 == 1 && $4 == 1.1 && $5 == 1.2 && $6 == "test" && $7 == "2013-01-01" && $8 == "2013-01-01 01:01:01" && $9 == 0 && $10 = "{"v101": 100}"); }'; then
+  echo "Starrocks sink check passed"
+else
+  cat ./query_result.csv
+  echo "The output is not as expected."
+  exit 1
+fi
+
+echo "--- Kill cluster"
+cargo make ci-kill
\ No newline at end of file
diff --git a/ci/workflows/main-cron.yml b/ci/workflows/main-cron.yml
index 835c46fb01e60..934458bcca1bc 100644
--- a/ci/workflows/main-cron.yml
+++ b/ci/workflows/main-cron.yml
@@ -815,6 +815,94 @@ steps:
     timeout_in_minutes: 10
     retry: *auto-retry
 
+  - label: "end-to-end redis sink test"
+    key: "e2e-redis-sink-tests"
+    command: "ci/scripts/e2e-redis-sink-test.sh -p ci-release"
+    if: |
+      !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null
+      || build.pull_request.labels includes "ci/run-e2e-redis-sink-tests"
+      || build.env("CI_STEPS") =~ /(^|,)e2e-redis-sink-tests?(,|$$)/
+    depends_on:
+      - "build"
+      - "build-other"
+    plugins:
+      - docker-compose#v4.9.0:
+          run: sink-test-env
+          config: ci/docker-compose.yml
+          mount-buildkite-agent: true
+      - ./ci/plugins/upload-failure-logs
+    timeout_in_minutes: 10
+    retry: *auto-retry
+
+  - label: "set vm_max_map_count_2000000"
+    key: "set-vm_max_map_count"
+    if: |
+      !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null
+      || build.pull_request.labels includes "ci/run-e2e-doris-sink-tests"
+      || build.env("CI_STEPS") =~ /(^|,)e2e-doris-sink-tests?(,|$$)/
+    command: "sudo sysctl -w vm.max_map_count=2000000"
+    depends_on:
+      - "build"
+      - "build-other"
+
+  - label: "end-to-end doris sink test"
+    key: "e2e-doris-sink-tests"
+    command: "ci/scripts/e2e-doris-sink-test.sh -p ci-release"
+    if: |
+      !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null
+      || build.pull_request.labels includes "ci/run-e2e-doris-sink-tests"
+      || build.env("CI_STEPS") =~ /(^|,)e2e-doris-sink-tests?(,|$$)/
+    depends_on:
+      - "build"
+      - "build-other"
+      - "set-vm_max_map_count"
+    plugins:
+      - docker-compose#v4.9.0:
+          run: sink-doris-env
+          config: ci/docker-compose.yml
+          mount-buildkite-agent: true
+      - ./ci/plugins/upload-failure-logs
+    timeout_in_minutes: 10
+    retry: *auto-retry
+
+  - label: "end-to-end starrocks sink test"
+    key: "e2e-starrocks-sink-tests"
+    command: "ci/scripts/e2e-starrocks-sink-test.sh -p ci-release"
+    if: |
+      !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null
+      || build.pull_request.labels includes "ci/run-e2e-starrocks-sink-tests"
+      || build.env("CI_STEPS") =~ /(^|,)e2e-starrocks-sink-tests?(,|$$)/
+    depends_on:
+      - "build"
+      - "build-other"
+    plugins:
+      - docker-compose#v4.9.0:
+          run: sink-test-env
+          config: ci/docker-compose.yml
+          mount-buildkite-agent: true
+      - ./ci/plugins/upload-failure-logs
+    timeout_in_minutes: 10
+    retry: *auto-retry
+
+  - label: "end-to-end cassandra sink test"
+    key: "e2e-cassandra-sink-tests"
+    command: "ci/scripts/e2e-cassandra-sink-test.sh -p ci-release"
+    if: |
+      !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null
+      || build.pull_request.labels includes "ci/run-e2e-cassandra-sink-tests"
+      || build.env("CI_STEPS") =~ /(^|,)e2e-cassandra-sink-tests?(,|$$)/
+    depends_on:
+      - "build"
+      - "build-other"
+    plugins:
+      - docker-compose#v4.9.0:
+          run: sink-test-env
+          config: ci/docker-compose.yml
+          mount-buildkite-agent: true
+      - ./ci/plugins/upload-failure-logs
+    timeout_in_minutes: 10
+    retry: *auto-retry
+
   - label: "end-to-end clickhouse sink test"
     key: "e2e-clickhouse-sink-tests"
     command: "ci/scripts/e2e-clickhouse-sink-test.sh -p ci-release"
diff --git a/ci/workflows/pull-request.yml b/ci/workflows/pull-request.yml
index c48de6df64f1c..a67f915d943cc 100644
--- a/ci/workflows/pull-request.yml
+++ b/ci/workflows/pull-request.yml
@@ -292,6 +292,75 @@ steps:
     timeout_in_minutes: 10
     retry: *auto-retry
 
+  - label: "end-to-end redis sink test"
+    if: build.pull_request.labels includes "ci/run-e2e-redis-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-redis-sink-tests?(,|$$)/
+    command: "ci/scripts/e2e-redis-sink-test.sh -p ci-dev"
+    depends_on:
+      - "build"
+      - "build-other"
+    plugins:
+      - docker-compose#v4.9.0:
+          run: sink-test-env
+          config: ci/docker-compose.yml
+          mount-buildkite-agent: true
+      - ./ci/plugins/upload-failure-logs
+    timeout_in_minutes: 10
+    retry: *auto-retry
+
+  - label: "set vm_max_map_count_2000000"
+    key: "set-vm_max_map_count"
+    if: build.pull_request.labels includes "ci/run-e2e-doris-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-doris-sink-tests?(,|$$)/
+    command: "sudo sysctl -w vm.max_map_count=2000000"
+    depends_on:
+      - "build"
+      - "build-other"
+
+  - label: "end-to-end doris sink test"
+    if: build.pull_request.labels includes "ci/run-e2e-doris-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-doris-sink-tests?(,|$$)/
+    command: "ci/scripts/e2e-doris-sink-test.sh -p ci-dev"
+    depends_on:
+      - "build"
+      - "build-other"
+      - "set-vm_max_map_count"
+    plugins:
+      - docker-compose#v4.9.0:
+          run: sink-doris-env
+          config: ci/docker-compose.yml
+          mount-buildkite-agent: true
+      - ./ci/plugins/upload-failure-logs
+    timeout_in_minutes: 10
+    retry: *auto-retry
+
+  - label: "end-to-end starrocks sink test"
+    if: build.pull_request.labels includes "ci/run-e2e-starrocks-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-starrocks-sink-tests?(,|$$)/
+    command: "ci/scripts/e2e-starrocks-sink-test.sh -p ci-dev"
+    depends_on:
+      - "build"
+      - "build-other"
+    plugins:
+      - docker-compose#v4.9.0:
+          run: sink-test-env
+          config: ci/docker-compose.yml
+          mount-buildkite-agent: true
+      - ./ci/plugins/upload-failure-logs
+    timeout_in_minutes: 10
+    retry: *auto-retry
+
+  - label: "end-to-end cassandra sink test"
+    if: build.pull_request.labels includes "ci/run-e2e-cassandra-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-cassandra-sink-tests?(,|$$)/
+    command: "ci/scripts/e2e-cassandra-sink-test.sh -p ci-dev"
+    depends_on:
+      - "build"
+      - "build-other"
+    plugins:
+      - docker-compose#v4.9.0:
+          run: sink-test-env
+          config: ci/docker-compose.yml
+          mount-buildkite-agent: true
+      - ./ci/plugins/upload-failure-logs
+    timeout_in_minutes: 10
+    retry: *auto-retry
+
   - label: "e2e java-binding test"
     if: build.pull_request.labels includes "ci/run-java-binding-tests" || build.env("CI_STEPS") =~ /(^|,)java-binding-tests?(,|$$)/
     command: "ci/scripts/java-binding-test.sh -p ci-dev"
diff --git a/e2e_test/sink/cassandra_sink.slt b/e2e_test/sink/cassandra_sink.slt
new file mode 100644
index 0000000000000..7091e8da70783
--- /dev/null
+++ b/e2e_test/sink/cassandra_sink.slt
@@ -0,0 +1,33 @@
+statement ok
+CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamptz, v9 boolean);
+
+statement ok
+CREATE MATERIALIZED VIEW mv6 AS SELECT * FROM t6;
+
+statement ok
+CREATE SINK s6
+FROM
+    mv6 WITH (
+    connector = 'cassandra',
+    type = 'append-only',
+    force_append_only='true',
+    cassandra.url = 'cassandra-server:9042',
+    cassandra.keyspace  = 'demo',
+    cassandra.table = 'demo_bhv_table',
+    cassandra.datacenter = 'datacenter1',
+);
+
+statement ok
+INSERT INTO t6 VALUES (1, 1, 1, 1.1, 1.2, 'test', '2013-01-01', '2013-01-01 01:01:01+00:00' , false);
+
+statement ok
+FLUSH;
+
+statement ok
+DROP SINK s6;
+
+statement ok
+DROP MATERIALIZED VIEW mv6;
+
+statement ok
+DROP TABLE t6;
\ No newline at end of file
diff --git a/e2e_test/sink/doris_sink.slt b/e2e_test/sink/doris_sink.slt
new file mode 100644
index 0000000000000..2c552bbb26143
--- /dev/null
+++ b/e2e_test/sink/doris_sink.slt
@@ -0,0 +1,34 @@
+statement ok
+CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamp, v9 boolean);
+
+statement ok
+CREATE MATERIALIZED VIEW mv6 AS SELECT * FROM t6;
+
+statement ok
+CREATE SINK s6
+FROM
+    mv6 WITH (
+    connector = 'doris',
+    type = 'append-only',
+    doris.url = 'http://doris-fe-server:8030',
+    doris.user = 'users',
+    doris.password = '123456',
+    doris.database = 'demo',
+    doris.table='demo_bhv_table',
+    force_append_only='true'
+);
+
+statement ok
+INSERT INTO t6 VALUES (1, 1, 1, 1.1, 1.2, 'test', '2013-01-01', '2013-01-01 01:01:01' , false);
+
+statement ok
+FLUSH;
+
+statement ok
+DROP SINK s6;
+
+statement ok
+DROP MATERIALIZED VIEW mv6;
+
+statement ok
+DROP TABLE t6;
\ No newline at end of file
diff --git a/e2e_test/sink/redis_sink.slt b/e2e_test/sink/redis_sink.slt
new file mode 100644
index 0000000000000..7475a80ae696e
--- /dev/null
+++ b/e2e_test/sink/redis_sink.slt
@@ -0,0 +1,41 @@
+statement ok
+CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamptz, v9 boolean);
+
+statement ok
+CREATE MATERIALIZED VIEW mv6 AS SELECT * FROM t6;
+
+statement ok
+CREATE SINK s61
+FROM
+    mv6 WITH (
+    primary_key = 'v1',
+    connector = 'redis',
+    redis.url= 'redis://redis-server:6379/',
+)FORMAT PLAIN ENCODE JSON(force_append_only='true');
+
+statement ok
+CREATE SINK s62
+FROM
+    mv6 WITH (
+    primary_key = 'v1',
+    connector = 'redis',
+    redis.url= 'redis://redis-server:6379/',
+)FORMAT PLAIN ENCODE TEMPLATE(force_append_only='true', key_format = 'V1:{v1}', value_format = 'V2:{v2},V3:{v3}');
+
+statement ok
+INSERT INTO t6 VALUES (1, 1, 1, 1.1, 1.2, 'test', '2013-01-01', '2013-01-01 01:01:01+00:00' , false);
+
+statement ok
+FLUSH;
+
+statement ok
+DROP SINK s61;
+
+statement ok
+DROP SINK s62;
+
+statement ok
+DROP MATERIALIZED VIEW mv6;
+
+statement ok
+DROP TABLE t6;
\ No newline at end of file
diff --git a/e2e_test/sink/starrocks_sink.slt b/e2e_test/sink/starrocks_sink.slt
new file mode 100644
index 0000000000000..a1ee1b0ffe039
--- /dev/null
+++ b/e2e_test/sink/starrocks_sink.slt
@@ -0,0 +1,36 @@
+statement ok
+CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamp, v9 boolean, v10 jsonb);
+
+statement ok
+CREATE MATERIALIZED VIEW mv6 AS SELECT * FROM t6;
+
+statement ok
+CREATE SINK s6
+FROM
+    mv6 WITH (
+    connector = 'starrocks',
+    type = 'upsert',
+    starrocks.host = 'starrocks-fe-server',
+    starrocks.mysqlport = '9030',
+    starrocks.httpport = '8030',
+    starrocks.user = 'users',
+    starrocks.password = '123456',
+    starrocks.database = 'demo',
+    starrocks.table = 'demo_bhv_table',
+    primary_key = 'v1'
+);
+
+statement ok
+INSERT INTO t6 VALUES (1, 1, 1, 1.1, 1.2, 'test', '2013-01-01', '2013-01-01 01:01:01' , false, '{"v101":100}');
+
+statement ok
+FLUSH;
+
+statement ok
+DROP SINK s6;
+
+statement ok
+DROP MATERIALIZED VIEW mv6;
+
+statement ok
+DROP TABLE t6;
\ No newline at end of file
diff --git a/risedev.yml b/risedev.yml
index 22356f2e1ac89..cb352daab6cf9 100644
--- a/risedev.yml
+++ b/risedev.yml
@@ -883,27 +883,7 @@ profile:
       - use: frontend
       - use: compactor
 
-  ci-deltalake-test:
-    config-path: src/config/ci.toml
-    steps:
-      - use: minio
-      - use: meta-node
-      - use: compute-node
-        enable-tiered-cache: true
-      - use: frontend
-      - use: compactor
-
-  ci-clickhouse-test:
-    config-path: src/config/ci.toml
-    steps:
-      - use: minio
-      - use: meta-node
-      - use: compute-node
-        enable-tiered-cache: true
-      - use: frontend
-      - use: compactor
-
-  ci-pulsar-test:
+  ci-sink-test:
     config-path: src/config/ci.toml
     steps:
       - use: minio
diff --git a/src/connector/src/sink/starrocks.rs b/src/connector/src/sink/starrocks.rs
index 11594133695d4..c5a0740b0736f 100644
--- a/src/connector/src/sink/starrocks.rs
+++ b/src/connector/src/sink/starrocks.rs
@@ -52,10 +52,10 @@ pub struct StarrocksCommon {
     #[serde(rename = "starrocks.host")]
     pub host: String,
     /// The port to the MySQL server of StarRocks FE.
-    #[serde(rename = "starrocks.mysqlport")]
+    #[serde(rename = "starrocks.mysqlport", alias = "starrocks.query_port")]
     pub mysql_port: String,
     /// The port to the HTTP server of StarRocks FE.
-    #[serde(rename = "starrocks.httpport")]
+    #[serde(rename = "starrocks.httpport", alias = "starrocks.http_port")]
     pub http_port: String,
     /// The user name used to access the StarRocks database.
     #[serde(rename = "starrocks.user")]
@@ -175,7 +175,7 @@ impl StarrocksSink {
                 Ok(starrocks_data_type.contains("varchar"))
             }
             risingwave_common::types::DataType::Time => Err(SinkError::Starrocks(
-                "starrocks can not support Time".to_string(),
+                "TIME is not supported for Starrocks sink. Please convert to VARCHAR or other supported types.".to_string(),
             )),
             risingwave_common::types::DataType::Timestamp => {
                 Ok(starrocks_data_type.contains("datetime"))
@@ -184,24 +184,24 @@ impl StarrocksSink {
                 "TIMESTAMP WITH TIMEZONE is not supported for Starrocks sink as Starrocks doesn't store time values with timezone information. Please convert to TIMESTAMP first.".to_string(),
             )),
             risingwave_common::types::DataType::Interval => Err(SinkError::Starrocks(
-                "starrocks can not support Interval".to_string(),
+                "INTERVAL is not supported for Starrocks sink. Please convert to VARCHAR or other supported types.".to_string(),
             )),
             // todo! Validate the type struct and list
             risingwave_common::types::DataType::Struct(_) => Err(SinkError::Starrocks(
-                "starrocks can not support import struct".to_string(),
+                "STRUCT is not supported for Starrocks sink.".to_string(),
             )),
             risingwave_common::types::DataType::List(_) => {
                 Ok(starrocks_data_type.contains("unknown"))
             }
             risingwave_common::types::DataType::Bytea => Err(SinkError::Starrocks(
-                "starrocks can not support Bytea".to_string(),
+                "BYTEA is not supported for Starrocks sink. Please convert to VARCHAR or other supported types.".to_string(),
             )),
             risingwave_common::types::DataType::Jsonb => Ok(starrocks_data_type.contains("json")),
             risingwave_common::types::DataType::Serial => {
                 Ok(starrocks_data_type.contains("bigint"))
             }
             risingwave_common::types::DataType::Int256 => Err(SinkError::Starrocks(
-                "starrocks can not support Int256".to_string(),
+                "INT256 is not supported for Starrocks sink.".to_string(),
             )),
         }
     }
diff --git a/src/connector/with_options_sink.yaml b/src/connector/with_options_sink.yaml
index 74cb5c21e9c7f..2b23913a1fc32 100644
--- a/src/connector/with_options_sink.yaml
+++ b/src/connector/with_options_sink.yaml
@@ -466,10 +466,12 @@ StarrocksConfig:
     field_type: String
     comments: The port to the MySQL server of StarRocks FE.
     required: true
+    alias: starrocks.query_port
   - name: starrocks.httpport
     field_type: String
     comments: The port to the HTTP server of StarRocks FE.
     required: true
+    alias: starrocks.http_port
   - name: starrocks.user
     field_type: String
     comments: The user name used to access the StarRocks database.

From 316f180b0097b75c5e6ce0785110561a1aa6dc58 Mon Sep 17 00:00:00 2001
From: Xinhao Xu <84456268+xxhZs@users.noreply.github.com>
Date: Fri, 23 Feb 2024 10:42:20 +0800
Subject: [PATCH 09/24] feat(sink demo): Add http sink demo (#15149)

---
 integration_tests/http-sink/README.md         | 34 +++++++++++++++++
 integration_tests/http-sink/create_mv.sql     |  6 +++
 integration_tests/http-sink/create_sink.sql   | 11 ++++++
 integration_tests/http-sink/create_source.sql | 18 +++++++++
 .../http-sink/docker-compose.yml              | 37 +++++++++++++++++++
 .../risingwave-connector-service/pom.xml      |  1 -
 .../flink/http/HttpFlinkMockSinkFactory.java  |  2 +
 7 files changed, 108 insertions(+), 1 deletion(-)
 create mode 100644 integration_tests/http-sink/README.md
 create mode 100644 integration_tests/http-sink/create_mv.sql
 create mode 100644 integration_tests/http-sink/create_sink.sql
 create mode 100644 integration_tests/http-sink/create_source.sql
 create mode 100644 integration_tests/http-sink/docker-compose.yml

diff --git a/integration_tests/http-sink/README.md b/integration_tests/http-sink/README.md
new file mode 100644
index 0000000000000..d956cb4ea95a4
--- /dev/null
+++ b/integration_tests/http-sink/README.md
@@ -0,0 +1,34 @@
+# Demo: Sinking to Http
+
+In this demo, we want to showcase how RisingWave is able to sink data to Http. This feature is depended on https://github.com/getindata/flink-http-connector.
+
+It has a few limitations:
+1. It offers only two options for HTTP method, i.e, PUT and POST.
+2. It can only execute one request-reply round to the service (session-less).
+3. It cannot handle status codes in the SQL API.
+
+Therefore, we suggest you to try Python UDF at first.
+
+### Demo:
+1. Launch the cluster:
+
+```sh
+docker-compose up -d
+```
+
+The cluster contains a RisingWave cluster and its necessary dependencies, a datagen that generates the data.
+
+2. Build an Http Server that can be built on its own
+
+3. Execute the SQL queries in sequence:
+
+- create_source.sql
+- create_mv.sql
+- create_sink.sql
+
+4. Check the contents in Http Server:
+On the Http Server side it will receive the json string, something like:
+```
+{"user_id":5,"target_id":"siFqrkdlCn"}
+```
+The number of json is 1000
diff --git a/integration_tests/http-sink/create_mv.sql b/integration_tests/http-sink/create_mv.sql
new file mode 100644
index 0000000000000..8a291a3c95ea7
--- /dev/null
+++ b/integration_tests/http-sink/create_mv.sql
@@ -0,0 +1,6 @@
+CREATE MATERIALIZED VIEW bhv_mv AS
+SELECT
+    user_id,
+    target_id
+FROM
+    user_behaviors;
diff --git a/integration_tests/http-sink/create_sink.sql b/integration_tests/http-sink/create_sink.sql
new file mode 100644
index 0000000000000..0644d1d51934b
--- /dev/null
+++ b/integration_tests/http-sink/create_sink.sql
@@ -0,0 +1,11 @@
+CREATE sink bhv_http_sink FROM bhv_mv WITH (
+  connector = 'http',
+  url = 'http://localhost:8080/endpoint',
+  format = 'json',
+  type = 'append-only',
+  force_append_only='true',
+  primary_key = 'user_id',
+  gid.connector.http.sink.header.Origin = '*',
+  "gid.connector.http.sink.header.X-Content-Type-Options" = 'nosniff',
+  "gid.connector.http.sink.header.Content-Type" = 'application/json'
+);
\ No newline at end of file
diff --git a/integration_tests/http-sink/create_source.sql b/integration_tests/http-sink/create_source.sql
new file mode 100644
index 0000000000000..c28c10f3616da
--- /dev/null
+++ b/integration_tests/http-sink/create_source.sql
@@ -0,0 +1,18 @@
+CREATE table user_behaviors (
+    user_id int,
+    target_id VARCHAR,
+    target_type VARCHAR,
+    event_timestamp TIMESTAMP,
+    behavior_type VARCHAR,
+    parent_target_type VARCHAR,
+    parent_target_id VARCHAR,
+    PRIMARY KEY(user_id)
+) WITH (
+    connector = 'datagen',
+    fields.user_id.kind = 'sequence',
+    fields.user_id.start = '1',
+    fields.user_id.end = '1000',
+    fields.user_name.kind = 'random',
+    fields.user_name.length = '10',
+    datagen.rows.per.second = '10'
+) FORMAT PLAIN ENCODE JSON;
\ No newline at end of file
diff --git a/integration_tests/http-sink/docker-compose.yml b/integration_tests/http-sink/docker-compose.yml
new file mode 100644
index 0000000000000..8fba5ff352dc0
--- /dev/null
+++ b/integration_tests/http-sink/docker-compose.yml
@@ -0,0 +1,37 @@
+---
+version: "3"
+services:
+  risingwave-standalone:
+    extends:
+      file: ../../docker/docker-compose.yml
+      service: risingwave-standalone
+  etcd-0:
+    extends:
+      file: ../../docker/docker-compose.yml
+      service: etcd-0
+  grafana-0:
+    extends:
+      file: ../../docker/docker-compose.yml
+      service: grafana-0
+  minio-0:
+    extends:
+      file: ../../docker/docker-compose.yml
+      service: minio-0
+  prometheus-0:
+    extends:
+      file: ../../docker/docker-compose.yml
+      service: prometheus-0
+volumes:
+  risingwave-standalone:
+    external: false
+  etcd-0:
+    external: false
+  grafana-0:
+    external: false
+  minio-0:
+    external: false
+  prometheus-0:
+    external: false
+  message_queue:
+    external: false
+name: risingwave-compose
diff --git a/java/connector-node/risingwave-connector-service/pom.xml b/java/connector-node/risingwave-connector-service/pom.xml
index 047c523c1c7db..d51d67497ce05 100644
--- a/java/connector-node/risingwave-connector-service/pom.xml
+++ b/java/connector-node/risingwave-connector-service/pom.xml
@@ -99,7 +99,6 @@
         <dependency>
             <groupId>com.risingwave</groupId>
             <artifactId>risingwave-sink-mock-flink-http-sink</artifactId>
-            <scope>provided</scope>
         </dependency>
     </dependencies>
 </project>
diff --git a/java/connector-node/risingwave-sink-mock-flink/risingwave-sink-mock-flink-http-sink/src/main/java/com/risingwave/mock/flink/http/HttpFlinkMockSinkFactory.java b/java/connector-node/risingwave-sink-mock-flink/risingwave-sink-mock-flink-http-sink/src/main/java/com/risingwave/mock/flink/http/HttpFlinkMockSinkFactory.java
index a969dddd620f7..d316eeae74bed 100644
--- a/java/connector-node/risingwave-sink-mock-flink/risingwave-sink-mock-flink-http-sink/src/main/java/com/risingwave/mock/flink/http/HttpFlinkMockSinkFactory.java
+++ b/java/connector-node/risingwave-sink-mock-flink/risingwave-sink-mock-flink-http-sink/src/main/java/com/risingwave/mock/flink/http/HttpFlinkMockSinkFactory.java
@@ -26,6 +26,8 @@
 /**
  * The `FlinkMockSinkFactory` implementation of the http sink is responsible for creating the http
  * counterpart of the `DynamicTableSinkFactory`. And `validate` don't need to do anything.
+ *
+ * <p>This feature is depended on https://github.com/getindata/flink-http-connector
  */
 public class HttpFlinkMockSinkFactory implements FlinkMockSinkFactory {
     @Override

From 91d97acbfaae47f95b9ae40984a74ab14b948d49 Mon Sep 17 00:00:00 2001
From: Runji Wang <wangrunji0408@163.com>
Date: Fri, 23 Feb 2024 11:30:38 +0800
Subject: [PATCH 10/24] refactor(frontend): use `#[derive(Fields)]` in
 statement handlers (#15130)

Signed-off-by: Runji Wang <wangrunji0408@163.com>
---
 src/common/fields-derive/src/lib.rs           |  58 +++-
 src/frontend/src/handler/cancel_job.rs        |  24 +-
 src/frontend/src/handler/describe.rs          | 118 +++----
 src/frontend/src/handler/explain.rs           |  30 +-
 src/frontend/src/handler/mod.rs               |  44 ++-
 src/frontend/src/handler/show.rs              | 314 +++++++++++-------
 src/frontend/src/handler/transaction.rs       |  21 +-
 src/frontend/src/handler/util.rs              |  65 +---
 src/frontend/src/handler/variable.rs          | 104 +++---
 src/frontend/src/session.rs                   |  40 +--
 src/frontend/src/utils/infer_stmt_row_desc.rs | 253 --------------
 src/frontend/src/utils/mod.rs                 |   1 -
 12 files changed, 449 insertions(+), 623 deletions(-)
 delete mode 100644 src/frontend/src/utils/infer_stmt_row_desc.rs

diff --git a/src/common/fields-derive/src/lib.rs b/src/common/fields-derive/src/lib.rs
index 86fa229a5adcd..b38f579751683 100644
--- a/src/common/fields-derive/src/lib.rs
+++ b/src/common/fields-derive/src/lib.rs
@@ -16,7 +16,7 @@ use proc_macro2::TokenStream;
 use quote::quote;
 use syn::{Data, DeriveInput, Result};
 
-#[proc_macro_derive(Fields, attributes(primary_key))]
+#[proc_macro_derive(Fields, attributes(primary_key, fields))]
 pub fn fields(tokens: proc_macro::TokenStream) -> proc_macro::TokenStream {
     inner(tokens.into()).into()
 }
@@ -46,6 +46,16 @@ fn gen(tokens: TokenStream) -> Result<TokenStream> {
         ));
     };
 
+    let style = get_style(&input);
+    if let Some(style) = &style {
+        if !["Title Case", "TITLE CASE", "snake_case"].contains(&style.value().as_str()) {
+            return Err(syn::Error::new_spanned(
+                style,
+                "only `Title Case`, `TITLE CASE`, and `snake_case` are supported",
+            ));
+        }
+    }
+
     let fields_rw: Vec<TokenStream> = struct_
         .fields
         .iter()
@@ -55,6 +65,12 @@ fn gen(tokens: TokenStream) -> Result<TokenStream> {
             if name.starts_with("r#") {
                 name = name[2..].to_string();
             }
+            // cast style
+            match style.as_ref().map_or(String::new(), |f| f.value()).as_str() {
+                "Title Case" => name = to_title_case(&name),
+                "TITLE CASE" => name = to_title_case(&name).to_uppercase(),
+                _ => {}
+            }
             let ty = &field.ty;
             quote! {
                 (#name, <#ty as ::risingwave_common::types::WithDataType>::default_data_type())
@@ -132,6 +148,46 @@ fn get_primary_key(input: &syn::DeriveInput) -> Option<Vec<usize>> {
     None
 }
 
+/// Get name style from `#[fields(style = "xxx")]` attribute.
+fn get_style(input: &syn::DeriveInput) -> Option<syn::LitStr> {
+    let style = input.attrs.iter().find_map(|attr| match &attr.meta {
+        syn::Meta::List(list) if list.path.is_ident("fields") => {
+            let name_value: syn::MetaNameValue = syn::parse2(list.tokens.clone()).ok()?;
+            if name_value.path.is_ident("style") {
+                Some(name_value.value)
+            } else {
+                None
+            }
+        }
+        _ => None,
+    })?;
+    match style {
+        syn::Expr::Lit(lit) => match lit.lit {
+            syn::Lit::Str(s) => Some(s),
+            _ => None,
+        },
+        _ => None,
+    }
+}
+
+/// Convert `snake_case` to `Title Case`.
+fn to_title_case(s: &str) -> String {
+    let mut title = String::new();
+    let mut next_upper = true;
+    for c in s.chars() {
+        if c == '_' {
+            title.push(' ');
+            next_upper = true;
+        } else if next_upper {
+            title.push(c.to_uppercase().next().unwrap());
+            next_upper = false;
+        } else {
+            title.push(c);
+        }
+    }
+    title
+}
+
 #[cfg(test)]
 mod tests {
     use indoc::indoc;
diff --git a/src/frontend/src/handler/cancel_job.rs b/src/frontend/src/handler/cancel_job.rs
index f124a2a030bd1..278e01e3e1bc0 100644
--- a/src/frontend/src/handler/cancel_job.rs
+++ b/src/frontend/src/handler/cancel_job.rs
@@ -12,14 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use itertools::Itertools;
-use pgwire::pg_field_descriptor::PgFieldDescriptor;
 use pgwire::pg_response::{PgResponse, StatementType};
-use pgwire::types::Row;
-use risingwave_common::types::DataType;
+use risingwave_common::types::Fields;
 use risingwave_pb::meta::cancel_creating_jobs_request::{CreatingJobIds, PbJobs};
 use risingwave_sqlparser::ast::JobIdents;
 
+use super::RwPgResponseBuilderExt;
 use crate::error::Result;
 use crate::handler::{HandlerArgs, RwPgResponse};
 
@@ -36,16 +34,14 @@ pub(super) async fn handle_cancel(
         .await?;
     let rows = canceled_jobs
         .into_iter()
-        .map(|id| Row::new(vec![Some(id.to_string().into())]))
-        .collect_vec();
+        .map(|id| CancelRow { id: id.to_string() });
     Ok(PgResponse::builder(StatementType::CANCEL_COMMAND)
-        .values(
-            rows.into(),
-            vec![PgFieldDescriptor::new(
-                "Id".to_string(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            )],
-        )
+        .rows(rows)
         .into())
 }
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct CancelRow {
+    id: String,
+}
diff --git a/src/frontend/src/handler/describe.rs b/src/frontend/src/handler/describe.rs
index ef1a601cca590..36cff2e20e2b6 100644
--- a/src/frontend/src/handler/describe.rs
+++ b/src/frontend/src/handler/describe.rs
@@ -17,17 +17,16 @@ use std::fmt::Display;
 use itertools::Itertools;
 use pgwire::pg_field_descriptor::PgFieldDescriptor;
 use pgwire::pg_response::{PgResponse, StatementType};
-use pgwire::types::Row;
 use risingwave_common::catalog::{ColumnCatalog, ColumnDesc};
-use risingwave_common::types::DataType;
+use risingwave_common::types::Fields;
 use risingwave_sqlparser::ast::{display_comma_separated, ObjectName};
 
-use super::RwPgResponse;
+use super::show::ShowColumnRow;
+use super::{fields_to_descriptors, RwPgResponse};
 use crate::binder::{Binder, Relation};
 use crate::catalog::CatalogError;
 use crate::error::Result;
-use crate::handler::util::col_descs_to_rows;
-use crate::handler::HandlerArgs;
+use crate::handler::{HandlerArgs, RwPgResponseBuilderExt};
 
 pub fn handle_describe(handler_args: HandlerArgs, object_name: ObjectName) -> Result<RwPgResponse> {
     let session = handler_args.session;
@@ -156,7 +155,10 @@ pub fn handle_describe(handler_args: HandlerArgs, object_name: ObjectName) -> Re
         };
 
     // Convert all column descs to rows
-    let mut rows = col_descs_to_rows(columns);
+    let mut rows = columns
+        .into_iter()
+        .flat_map(ShowColumnRow::from_catalog)
+        .collect_vec();
 
     fn concat<T>(display_elems: impl IntoIterator<Item = T>) -> String
     where
@@ -170,96 +172,68 @@ pub fn handle_describe(handler_args: HandlerArgs, object_name: ObjectName) -> Re
 
     // Convert primary key to rows
     if !pk_columns.is_empty() {
-        rows.push(Row::new(vec![
-            Some("primary key".into()),
-            Some(concat(pk_columns.iter().map(|x| &x.name)).into()),
-            None, // Is Hidden
-            None, // Description
-        ]));
+        rows.push(ShowColumnRow {
+            name: "primary key".into(),
+            r#type: concat(pk_columns.iter().map(|x| &x.name)),
+            is_hidden: None,
+            description: None,
+        });
     }
 
     // Convert distribution keys to rows
     if !dist_columns.is_empty() {
-        rows.push(Row::new(vec![
-            Some("distribution key".into()),
-            Some(concat(dist_columns.iter().map(|x| &x.name)).into()),
-            None, // Is Hidden
-            None, // Description
-        ]));
+        rows.push(ShowColumnRow {
+            name: "distribution key".into(),
+            r#type: concat(dist_columns.iter().map(|x| &x.name)),
+            is_hidden: None,
+            description: None,
+        });
     }
 
     // Convert all indexes to rows
     rows.extend(indices.iter().map(|index| {
         let index_display = index.display();
 
-        Row::new(vec![
-            Some(index.name.clone().into()),
-            if index_display.include_columns.is_empty() {
-                Some(
-                    format!(
-                        "index({}) distributed by({})",
-                        display_comma_separated(&index_display.index_columns_with_ordering),
-                        display_comma_separated(&index_display.distributed_by_columns),
-                    )
-                    .into(),
+        ShowColumnRow {
+            name: index.name.clone(),
+            r#type: if index_display.include_columns.is_empty() {
+                format!(
+                    "index({}) distributed by({})",
+                    display_comma_separated(&index_display.index_columns_with_ordering),
+                    display_comma_separated(&index_display.distributed_by_columns),
                 )
             } else {
-                Some(
-                    format!(
-                        "index({}) include({}) distributed by({})",
-                        display_comma_separated(&index_display.index_columns_with_ordering),
-                        display_comma_separated(&index_display.include_columns),
-                        display_comma_separated(&index_display.distributed_by_columns),
-                    )
-                    .into(),
+                format!(
+                    "index({}) include({}) distributed by({})",
+                    display_comma_separated(&index_display.index_columns_with_ordering),
+                    display_comma_separated(&index_display.include_columns),
+                    display_comma_separated(&index_display.distributed_by_columns),
                 )
             },
-            // Is Hidden
-            None,
-            // Description
+            is_hidden: None,
             // TODO: index description
-            None,
-        ])
+            description: None,
+        }
     }));
 
-    rows.push(Row::new(vec![
-        Some("table description".into()),
-        Some(relname.into()),
-        None,                        // Is Hidden
-        description.map(Into::into), // Description
-    ]));
+    rows.push(ShowColumnRow {
+        name: "table description".into(),
+        r#type: relname,
+        is_hidden: None,
+        description: description.map(Into::into),
+    });
 
     // TODO: table name and description as title of response
     // TODO: recover the original user statement
     Ok(PgResponse::builder(StatementType::DESCRIBE)
-        .values(
-            rows.into(),
-            vec![
-                PgFieldDescriptor::new(
-                    "Name".to_owned(),
-                    DataType::Varchar.to_oid(),
-                    DataType::Varchar.type_len(),
-                ),
-                PgFieldDescriptor::new(
-                    "Type".to_owned(),
-                    DataType::Varchar.to_oid(),
-                    DataType::Varchar.type_len(),
-                ),
-                PgFieldDescriptor::new(
-                    "Is Hidden".to_owned(),
-                    DataType::Varchar.to_oid(),
-                    DataType::Varchar.type_len(),
-                ),
-                PgFieldDescriptor::new(
-                    "Description".to_owned(),
-                    DataType::Varchar.to_oid(),
-                    DataType::Varchar.type_len(),
-                ),
-            ],
-        )
+        .rows(rows)
         .into())
 }
 
+pub fn infer_describe() -> Vec<PgFieldDescriptor> {
+    fields_to_descriptors(ShowColumnRow::fields())
+}
+
 #[cfg(test)]
 mod tests {
     use std::collections::HashMap;
diff --git a/src/frontend/src/handler/explain.rs b/src/frontend/src/handler/explain.rs
index c25bf7678bd04..b966cca8f50cf 100644
--- a/src/frontend/src/handler/explain.rs
+++ b/src/frontend/src/handler/explain.rs
@@ -12,12 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use itertools::Itertools;
-use pgwire::pg_field_descriptor::PgFieldDescriptor;
 use pgwire::pg_response::{PgResponse, StatementType};
-use pgwire::types::Row;
 use risingwave_common::bail_not_implemented;
-use risingwave_common::types::DataType;
+use risingwave_common::types::Fields;
 use risingwave_sqlparser::ast::{ExplainOptions, ExplainType, Statement};
 use thiserror_ext::AsReport;
 
@@ -27,7 +24,7 @@ use super::create_sink::{gen_sink_plan, get_partition_compute_info};
 use super::create_table::ColumnIdGenerator;
 use super::query::gen_batch_plan_by_statement;
 use super::util::SourceSchemaCompatExt;
-use super::RwPgResponse;
+use super::{RwPgResponse, RwPgResponseBuilderExt};
 use crate::error::{ErrorCode, Result};
 use crate::handler::create_table::handle_create_table_plan;
 use crate::handler::HandlerArgs;
@@ -254,20 +251,17 @@ pub async fn handle_explain(
         }
     }
 
-    let rows = blocks
-        .iter()
-        .flat_map(|b| b.lines().map(|l| l.to_owned()))
-        .map(|l| Row::new(vec![Some(l.into())]))
-        .collect_vec();
+    let rows = blocks.iter().flat_map(|b| b.lines()).map(|l| ExplainRow {
+        query_plan: l.into(),
+    });
 
     Ok(PgResponse::builder(StatementType::EXPLAIN)
-        .values(
-            rows.into(),
-            vec![PgFieldDescriptor::new(
-                "QUERY PLAN".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            )],
-        )
+        .rows(rows)
         .into())
 }
+
+#[derive(Fields)]
+#[fields(style = "TITLE CASE")]
+struct ExplainRow {
+    query_plan: String,
+}
diff --git a/src/frontend/src/handler/mod.rs b/src/frontend/src/handler/mod.rs
index 3cdc4b191da92..827f28f87319e 100644
--- a/src/frontend/src/handler/mod.rs
+++ b/src/frontend/src/handler/mod.rs
@@ -18,11 +18,15 @@ use std::task::{Context, Poll};
 
 use futures::stream::{self, BoxStream};
 use futures::{Stream, StreamExt};
+use itertools::Itertools;
+use pgwire::pg_field_descriptor::PgFieldDescriptor;
 use pgwire::pg_response::StatementType::{self, ABORT, BEGIN, COMMIT, ROLLBACK, START_TRANSACTION};
 use pgwire::pg_response::{PgResponse, PgResponseBuilder, RowSetResult};
 use pgwire::pg_server::BoxedError;
 use pgwire::types::{Format, Row};
 use risingwave_common::bail_not_implemented;
+use risingwave_common::types::Fields;
+use risingwave_common::util::iter_util::ZipEqFast;
 use risingwave_sqlparser::ast::*;
 
 use self::util::{DataChunkToRowSetAdapter, SourceSchemaCompatExt};
@@ -59,7 +63,7 @@ pub mod create_table;
 pub mod create_table_as;
 pub mod create_user;
 pub mod create_view;
-mod describe;
+pub mod describe;
 mod drop_connection;
 mod drop_database;
 pub mod drop_function;
@@ -78,7 +82,7 @@ pub mod handle_privilege;
 mod kill_process;
 pub mod privilege;
 pub mod query;
-mod show;
+pub mod show;
 mod transaction;
 pub mod util;
 pub mod variable;
@@ -90,6 +94,42 @@ pub type RwPgResponseBuilder = PgResponseBuilder<PgResponseStream>;
 /// The [`PgResponse`] used by RisingWave.
 pub type RwPgResponse = PgResponse<PgResponseStream>;
 
+#[easy_ext::ext(RwPgResponseBuilderExt)]
+impl RwPgResponseBuilder {
+    /// Append rows to the response.
+    pub fn rows<T: Fields>(self, rows: impl IntoIterator<Item = T>) -> Self {
+        let fields = T::fields();
+        self.values(
+            rows.into_iter()
+                .map(|row| {
+                    Row::new(
+                        row.into_owned_row()
+                            .into_iter()
+                            .zip_eq_fast(&fields)
+                            .map(|(datum, (_, ty))| {
+                                datum.map(|scalar| {
+                                    scalar.as_scalar_ref_impl().text_format(ty).into()
+                                })
+                            })
+                            .collect(),
+                    )
+                })
+                .collect_vec()
+                .into(),
+            fields_to_descriptors(fields),
+        )
+    }
+}
+
+pub fn fields_to_descriptors(
+    fields: Vec<(&str, risingwave_common::types::DataType)>,
+) -> Vec<PgFieldDescriptor> {
+    fields
+        .iter()
+        .map(|(name, ty)| PgFieldDescriptor::new(name.to_string(), ty.to_oid(), ty.type_len()))
+        .collect()
+}
+
 pub enum PgResponseStream {
     LocalQuery(DataChunkToRowSetAdapter<LocalQueryStream>),
     DistributedQuery(DataChunkToRowSetAdapter<DistributedQueryStream>),
diff --git a/src/frontend/src/handler/show.rs b/src/frontend/src/handler/show.rs
index 4a98b6c7cd33d..226a219a11887 100644
--- a/src/frontend/src/handler/show.rs
+++ b/src/frontend/src/handler/show.rs
@@ -19,27 +19,24 @@ use pgwire::pg_field_descriptor::PgFieldDescriptor;
 use pgwire::pg_protocol::truncated_fmt;
 use pgwire::pg_response::{PgResponse, StatementType};
 use pgwire::pg_server::Session;
-use pgwire::types::Row;
 use risingwave_common::bail_not_implemented;
 use risingwave_common::catalog::{ColumnCatalog, ColumnDesc, DEFAULT_SCHEMA_NAME};
-use risingwave_common::types::DataType;
+use risingwave_common::types::{DataType, Fields};
 use risingwave_common::util::addr::HostAddr;
 use risingwave_connector::source::kafka::PRIVATELINK_CONNECTION;
 use risingwave_expr::scalar::like::{i_like_default, like_default};
 use risingwave_pb::catalog::connection;
 use risingwave_sqlparser::ast::{
-    Ident, ObjectName, ShowCreateType, ShowObject, ShowStatementFilter,
+    display_comma_separated, Ident, ObjectName, ShowCreateType, ShowObject, ShowStatementFilter,
 };
 use serde_json;
 
-use super::RwPgResponse;
+use super::{fields_to_descriptors, RwPgResponse, RwPgResponseBuilderExt};
 use crate::binder::{Binder, Relation};
 use crate::catalog::{CatalogError, IndexCatalog};
 use crate::error::Result;
-use crate::handler::util::{col_descs_to_rows, indexes_to_rows};
 use crate::handler::HandlerArgs;
 use crate::session::SessionImpl;
-use crate::utils::infer_stmt_row_desc::infer_show_object;
 
 pub fn get_columns_from_table(
     session: &SessionImpl,
@@ -109,6 +106,136 @@ fn schema_or_default(schema: &Option<Ident>) -> String {
         .map_or_else(|| DEFAULT_SCHEMA_NAME.to_string(), |s| s.real_value())
 }
 
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowObjectRow {
+    name: String,
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+pub struct ShowColumnRow {
+    pub name: String,
+    pub r#type: String,
+    pub is_hidden: Option<String>,
+    pub description: Option<String>,
+}
+
+impl ShowColumnRow {
+    pub fn from_catalog(col: ColumnCatalog) -> Vec<Self> {
+        col.column_desc
+            .flatten()
+            .into_iter()
+            .map(|c| {
+                let type_name = if let DataType::Struct { .. } = c.data_type {
+                    c.type_name.clone()
+                } else {
+                    c.data_type.to_string()
+                };
+                ShowColumnRow {
+                    name: c.name,
+                    r#type: type_name,
+                    is_hidden: Some(col.is_hidden.to_string()),
+                    description: c.description,
+                }
+            })
+            .collect()
+    }
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowConnectionRow {
+    name: String,
+    r#type: String,
+    properties: String,
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowFunctionRow {
+    name: String,
+    arguments: String,
+    return_type: String,
+    language: String,
+    link: Option<String>,
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowIndexRow {
+    name: String,
+    on: String,
+    key: String,
+    include: String,
+    distributed_by: String,
+}
+
+impl From<Arc<IndexCatalog>> for ShowIndexRow {
+    fn from(index: Arc<IndexCatalog>) -> Self {
+        let index_display = index.display();
+        ShowIndexRow {
+            name: index.name.clone(),
+            on: index.primary_table.name.clone(),
+            key: display_comma_separated(&index_display.index_columns_with_ordering).to_string(),
+            include: display_comma_separated(&index_display.include_columns).to_string(),
+            distributed_by: display_comma_separated(&index_display.distributed_by_columns)
+                .to_string(),
+        }
+    }
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowClusterRow {
+    addr: String,
+    state: String,
+    parallel_units: String,
+    is_streaming: String,
+    is_serving: String,
+    is_unschedulable: String,
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowJobRow {
+    id: i64,
+    statement: String,
+    progress: String,
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowProcessListRow {
+    id: String,
+    user: String,
+    host: String,
+    database: String,
+    time: Option<String>,
+    info: Option<String>,
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowCreateObjectRow {
+    name: String,
+    create_sql: String,
+}
+
+/// Infer the row description for different show objects.
+pub fn infer_show_object(objects: &ShowObject) -> Vec<PgFieldDescriptor> {
+    fields_to_descriptors(match objects {
+        ShowObject::Columns { .. } => ShowColumnRow::fields(),
+        ShowObject::Connection { .. } => ShowConnectionRow::fields(),
+        ShowObject::Function { .. } => ShowFunctionRow::fields(),
+        ShowObject::Indexes { .. } => ShowIndexRow::fields(),
+        ShowObject::Cluster => ShowClusterRow::fields(),
+        ShowObject::Jobs => ShowJobRow::fields(),
+        ShowObject::ProcessList => ShowProcessListRow::fields(),
+        _ => ShowObjectRow::fields(),
+    })
+}
+
 pub async fn handle_show_object(
     handler_args: HandlerArgs,
     command: ShowObject,
@@ -119,7 +246,6 @@ pub async fn handle_show_object(
     if let Some(ShowStatementFilter::Where(..)) = filter {
         bail_not_implemented!("WHERE clause in SHOW statement");
     }
-    let row_desc = infer_show_object(&command);
 
     let catalog_reader = session.env().catalog_reader();
 
@@ -178,18 +304,15 @@ pub async fn handle_show_object(
                 .into());
             };
 
-            let rows = col_descs_to_rows(columns);
-
             return Ok(PgResponse::builder(StatementType::SHOW_COMMAND)
-                .values(rows.into(), row_desc)
+                .rows(columns.into_iter().flat_map(ShowColumnRow::from_catalog))
                 .into());
         }
         ShowObject::Indexes { table } => {
             let indexes = get_indexes_from_table(&session, table)?;
-            let rows = indexes_to_rows(indexes);
 
             return Ok(PgResponse::builder(StatementType::SHOW_COMMAND)
-                .values(rows.into(), row_desc)
+                .rows(indexes.into_iter().map(ShowIndexRow::from))
                 .into());
         }
         ShowObject::Connection { schema } => {
@@ -200,7 +323,7 @@ pub async fn handle_show_object(
                 .iter_connections()
                 .map(|c| {
                     let name = c.name.clone();
-                    let conn_type = match &c.info {
+                    let r#type = match &c.info {
                         connection::Info::PrivateLinkService(_) => {
                             PRIVATELINK_CONNECTION.to_string()
                         },
@@ -230,105 +353,81 @@ pub async fn handle_show_object(
                             )
                         }
                     };
-                    Row::new(vec![
-                        Some(name.into()),
-                        Some(conn_type.into()),
-                        Some(properties.into()),
-                    ])
-                })
-                .collect_vec();
+                    ShowConnectionRow {
+                        name,
+                        r#type,
+                        properties,
+                    }
+                });
             return Ok(PgResponse::builder(StatementType::SHOW_COMMAND)
-                .values(rows.into(), row_desc)
+                .rows(rows)
                 .into());
         }
         ShowObject::Function { schema } => {
-            let rows = catalog_reader
-                .read_guard()
+            let reader = catalog_reader.read_guard();
+            let rows = reader
                 .get_schema_by_name(session.database(), &schema_or_default(&schema))?
                 .iter_function()
-                .map(|t| {
-                    Row::new(vec![
-                        Some(t.name.clone().into()),
-                        Some(t.arg_types.iter().map(|t| t.to_string()).join(", ").into()),
-                        Some(t.return_type.to_string().into()),
-                        Some(t.language.clone().into()),
-                        t.link.clone().map(Into::into),
-                    ])
-                })
-                .collect_vec();
+                .map(|t| ShowFunctionRow {
+                    name: t.name.clone(),
+                    arguments: t.arg_types.iter().map(|t| t.to_string()).join(", "),
+                    return_type: t.return_type.to_string(),
+                    language: t.language.clone(),
+                    link: t.link.clone(),
+                });
             return Ok(PgResponse::builder(StatementType::SHOW_COMMAND)
-                .values(rows.into(), row_desc)
+                .rows(rows)
                 .into());
         }
         ShowObject::Cluster => {
             let workers = session.env().worker_node_manager().list_worker_nodes();
-            let rows = workers
-                .into_iter()
-                .map(|worker| {
-                    let addr: HostAddr = worker.host.as_ref().unwrap().into();
-                    let property = worker.property.as_ref().unwrap();
-                    Row::new(vec![
-                        Some(addr.to_string().into()),
-                        Some(worker.get_state().unwrap().as_str_name().into()),
-                        Some(
-                            worker
-                                .parallel_units
-                                .into_iter()
-                                .map(|pu| pu.id)
-                                .join(", ")
-                                .into(),
-                        ),
-                        Some(property.is_streaming.to_string().into()),
-                        Some(property.is_serving.to_string().into()),
-                        Some(property.is_unschedulable.to_string().into()),
-                    ])
-                })
-                .collect_vec();
+            let rows = workers.into_iter().map(|worker| {
+                let addr: HostAddr = worker.host.as_ref().unwrap().into();
+                let property = worker.property.as_ref().unwrap();
+                ShowClusterRow {
+                    addr: addr.to_string(),
+                    state: worker.get_state().unwrap().as_str_name().to_string(),
+                    parallel_units: worker.parallel_units.into_iter().map(|pu| pu.id).join(", "),
+                    is_streaming: property.is_streaming.to_string(),
+                    is_serving: property.is_serving.to_string(),
+                    is_unschedulable: property.is_unschedulable.to_string(),
+                }
+            });
             return Ok(PgResponse::builder(StatementType::SHOW_COMMAND)
-                .values(rows.into(), row_desc)
+                .rows(rows)
                 .into());
         }
         ShowObject::Jobs => {
             let resp = session.env().meta_client().list_ddl_progress().await?;
-            let rows = resp
-                .into_iter()
-                .map(|job| {
-                    Row::new(vec![
-                        Some(job.id.to_string().into()),
-                        Some(job.statement.into()),
-                        Some(job.progress.into()),
-                    ])
-                })
-                .collect_vec();
+            let rows = resp.into_iter().map(|job| ShowJobRow {
+                id: job.id as i64,
+                statement: job.statement,
+                progress: job.progress,
+            });
             return Ok(PgResponse::builder(StatementType::SHOW_COMMAND)
-                .values(rows.into(), row_desc)
+                .rows(rows)
                 .into());
         }
         ShowObject::ProcessList => {
-            let rows = {
-                let sessions_map = session.env().sessions_map();
-                sessions_map
-                    .read()
-                    .values()
-                    .map(|s| {
-                        Row::new(vec![
-                            // Since process id and the secret id in the session id are the same in RisingWave, just display the process id.
-                            Some(format!("{}", s.id().0).into()),
-                            Some(s.user_name().to_owned().into()),
-                            Some(format!("{}", s.peer_addr()).into()),
-                            Some(s.database().to_owned().into()),
-                            s.elapse_since_running_sql()
-                                .map(|mills| format!("{}ms", mills).into()),
-                            s.running_sql().map(|sql| {
-                                format!("{}", truncated_fmt::TruncatedFmt(&sql, 1024)).into()
-                            }),
-                        ])
-                    })
-                    .collect_vec()
-            };
+            let sessions_map = session.env().sessions_map().read();
+            let rows = sessions_map.values().map(|s| {
+                ShowProcessListRow {
+                    // Since process id and the secret id in the session id are the same in RisingWave, just display the process id.
+                    id: format!("{}", s.id().0),
+                    user: s.user_name().to_owned(),
+                    host: format!("{}", s.peer_addr()),
+                    database: s.database().to_owned(),
+                    time: s
+                        .elapse_since_running_sql()
+                        .map(|mills| format!("{}ms", mills)),
+                    info: s
+                        .running_sql()
+                        .map(|sql| format!("{}", truncated_fmt::TruncatedFmt(&sql, 1024))),
+                }
+            });
 
             return Ok(PgResponse::builder(StatementType::SHOW_COMMAND)
-                .values(rows.into(), row_desc)
+                .rows(rows)
                 .into());
         }
     };
@@ -341,21 +440,17 @@ pub async fn handle_show_object(
             Some(ShowStatementFilter::Where(..)) => unreachable!(),
             None => true,
         })
-        .map(|n| Row::new(vec![Some(n.into())]))
-        .collect_vec();
+        .map(|name| ShowObjectRow { name });
 
     Ok(PgResponse::builder(StatementType::SHOW_COMMAND)
-        .values(
-            rows.into(),
-            vec![PgFieldDescriptor::new(
-                "Name".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            )],
-        )
+        .rows(rows)
         .into())
 }
 
+pub fn infer_show_create_object() -> Vec<PgFieldDescriptor> {
+    fields_to_descriptors(ShowCreateObjectRow::fields())
+}
+
 pub fn handle_show_create_object(
     handle_args: HandlerArgs,
     show_create_type: ShowCreateType,
@@ -415,21 +510,10 @@ pub fn handle_show_create_object(
     let name = format!("{}.{}", schema_name, object_name);
 
     Ok(PgResponse::builder(StatementType::SHOW_COMMAND)
-        .values(
-            vec![Row::new(vec![Some(name.into()), Some(sql.into())])].into(),
-            vec![
-                PgFieldDescriptor::new(
-                    "Name".to_owned(),
-                    DataType::Varchar.to_oid(),
-                    DataType::Varchar.type_len(),
-                ),
-                PgFieldDescriptor::new(
-                    "Create Sql".to_owned(),
-                    DataType::Varchar.to_oid(),
-                    DataType::Varchar.type_len(),
-                ),
-            ],
-        )
+        .rows([ShowCreateObjectRow {
+            name,
+            create_sql: sql,
+        }])
         .into())
 }
 
diff --git a/src/frontend/src/handler/transaction.rs b/src/frontend/src/handler/transaction.rs
index 452cfe0ed9299..8ab7af36c29ca 100644
--- a/src/frontend/src/handler/transaction.rs
+++ b/src/frontend/src/handler/transaction.rs
@@ -13,14 +13,13 @@
 // limitations under the License.
 
 use pgwire::pg_response::StatementType;
-use pgwire::types::Row;
 use risingwave_common::bail_not_implemented;
+use risingwave_common::types::Fields;
 use risingwave_sqlparser::ast::{TransactionAccessMode, TransactionMode, Value};
 
-use super::{HandlerArgs, RwPgResponse};
+use super::{HandlerArgs, RwPgResponse, RwPgResponseBuilderExt};
 use crate::error::Result;
 use crate::session::transaction::AccessMode;
-use crate::utils::infer_stmt_row_desc::infer_show_variable;
 
 macro_rules! not_impl {
     ($body:expr) => {
@@ -118,16 +117,20 @@ pub async fn handle_set(
         .into())
 }
 
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowVariableRow {
+    name: String,
+}
+
 pub fn handle_show_isolation_level(handler_args: HandlerArgs) -> Result<RwPgResponse> {
     let config_reader = handler_args.session.config();
 
-    let parameter_name = "transaction_isolation";
-    let row_desc = infer_show_variable(parameter_name);
-    let rows = vec![Row::new(vec![Some(
-        config_reader.get(parameter_name)?.into(),
-    )])];
+    let rows = [ShowVariableRow {
+        name: config_reader.get("transaction_isolation")?,
+    }];
 
     Ok(RwPgResponse::builder(StatementType::SHOW_VARIABLE)
-        .values(rows.into(), row_desc)
+        .rows(rows)
         .into())
 }
diff --git a/src/frontend/src/handler/util.rs b/src/frontend/src/handler/util.rs
index 6e91cf53f0b32..1e49ee8baf540 100644
--- a/src/frontend/src/handler/util.rs
+++ b/src/frontend/src/handler/util.rs
@@ -27,14 +27,13 @@ use pgwire::pg_server::BoxedError;
 use pgwire::types::{Format, FormatIterator, Row};
 use pin_project_lite::pin_project;
 use risingwave_common::array::DataChunk;
-use risingwave_common::catalog::{ColumnCatalog, Field};
+use risingwave_common::catalog::Field;
 use risingwave_common::row::Row as _;
 use risingwave_common::types::{DataType, ScalarRefImpl, Timestamptz};
 use risingwave_common::util::iter_util::ZipEqFast;
 use risingwave_connector::source::KAFKA_CONNECTOR;
-use risingwave_sqlparser::ast::{display_comma_separated, CompatibleSourceSchema, ConnectorSchema};
+use risingwave_sqlparser::ast::{CompatibleSourceSchema, ConnectorSchema};
 
-use crate::catalog::IndexCatalog;
 use crate::error::{ErrorCode, Result as RwResult};
 use crate::handler::create_source::UPSTREAM_SOURCE_KEY;
 use crate::session::{current, SessionImpl};
@@ -172,66 +171,6 @@ fn to_pg_rows(
         .try_collect()
 }
 
-/// Convert column descs to rows which conclude name and type
-pub fn col_descs_to_rows(columns: Vec<ColumnCatalog>) -> Vec<Row> {
-    columns
-        .iter()
-        .flat_map(|col| {
-            col.column_desc
-                .flatten()
-                .into_iter()
-                .map(|c| {
-                    let type_name = if let DataType::Struct { .. } = c.data_type {
-                        c.type_name.clone()
-                    } else {
-                        c.data_type.to_string()
-                    };
-                    Row::new(vec![
-                        Some(c.name.into()),
-                        Some(type_name.into()),
-                        Some(col.is_hidden.to_string().into()),
-                        c.description.map(Into::into),
-                    ])
-                })
-                .collect_vec()
-        })
-        .collect_vec()
-}
-
-pub fn indexes_to_rows(indexes: Vec<Arc<IndexCatalog>>) -> Vec<Row> {
-    indexes
-        .iter()
-        .map(|index| {
-            let index_display = index.display();
-            Row::new(vec![
-                Some(index.name.clone().into()),
-                Some(index.primary_table.name.clone().into()),
-                Some(
-                    format!(
-                        "{}",
-                        display_comma_separated(&index_display.index_columns_with_ordering)
-                    )
-                    .into(),
-                ),
-                Some(
-                    format!(
-                        "{}",
-                        display_comma_separated(&index_display.include_columns)
-                    )
-                    .into(),
-                ),
-                Some(
-                    format!(
-                        "{}",
-                        display_comma_separated(&index_display.distributed_by_columns)
-                    )
-                    .into(),
-                ),
-            ])
-        })
-        .collect_vec()
-}
-
 /// Convert from [`Field`] to [`PgFieldDescriptor`].
 pub fn to_pg_field(f: &Field) -> PgFieldDescriptor {
     PgFieldDescriptor::new(
diff --git a/src/frontend/src/handler/variable.rs b/src/frontend/src/handler/variable.rs
index 9b4828b232837..96fd232215ccd 100644
--- a/src/frontend/src/handler/variable.rs
+++ b/src/frontend/src/handler/variable.rs
@@ -14,19 +14,18 @@
 
 use anyhow::Context;
 use itertools::Itertools;
+use pgwire::pg_field_descriptor::PgFieldDescriptor;
 use pgwire::pg_protocol::ParameterStatus;
 use pgwire::pg_response::{PgResponse, StatementType};
-use pgwire::types::Row;
 use risingwave_common::session_config::{ConfigReporter, SESSION_CONFIG_LIST_SEP};
 use risingwave_common::system_param::reader::SystemParamsRead;
-use risingwave_common::types::{DataType, ScalarRefImpl};
+use risingwave_common::types::Fields;
 use risingwave_sqlparser::ast::{Ident, SetTimeZoneValue, SetVariableValue, Value};
 use risingwave_sqlparser::keywords::Keyword;
 
-use super::RwPgResponse;
+use super::{fields_to_descriptors, RwPgResponse, RwPgResponseBuilderExt};
 use crate::error::Result;
 use crate::handler::HandlerArgs;
-use crate::utils::infer_stmt_row_desc::infer_show_variable;
 
 /// convert `SetVariableValue` to string while remove the quotes on literals.
 pub(crate) fn set_var_to_param_str(value: &SetVariableValue) -> Option<String> {
@@ -117,40 +116,36 @@ pub(super) async fn handle_show(
 ) -> Result<RwPgResponse> {
     // TODO: Verify that the name used in `show` command is indeed always case-insensitive.
     let name = variable.iter().map(|e| e.real_value()).join(" ");
-    let row_desc = infer_show_variable(&name);
-    let rows = if name.eq_ignore_ascii_case("PARAMETERS") {
-        handle_show_system_params(handler_args).await?
+    if name.eq_ignore_ascii_case("PARAMETERS") {
+        handle_show_system_params(handler_args).await
     } else if name.eq_ignore_ascii_case("ALL") {
-        handle_show_all(handler_args.clone())?
+        handle_show_all(handler_args.clone())
     } else {
         let config_reader = handler_args.session.config();
-        vec![Row::new(vec![Some(config_reader.get(&name)?.into())])]
-    };
-
-    Ok(PgResponse::builder(StatementType::SHOW_VARIABLE)
-        .values(rows.into(), row_desc)
-        .into())
+        Ok(PgResponse::builder(StatementType::SHOW_VARIABLE)
+            .rows([ShowVariableRow {
+                name: config_reader.get(&name)?,
+            }])
+            .into())
+    }
 }
 
-fn handle_show_all(handler_args: HandlerArgs) -> Result<Vec<Row>> {
+fn handle_show_all(handler_args: HandlerArgs) -> Result<RwPgResponse> {
     let config_reader = handler_args.session.config();
 
     let all_variables = config_reader.show_all();
 
-    let rows = all_variables
-        .iter()
-        .map(|info| {
-            Row::new(vec![
-                Some(info.name.clone().into()),
-                Some(info.setting.clone().into()),
-                Some(info.description.clone().into()),
-            ])
-        })
-        .collect_vec();
-    Ok(rows)
+    let rows = all_variables.iter().map(|info| ShowVariableAllRow {
+        name: info.name.clone(),
+        setting: info.setting.clone(),
+        description: info.description.clone(),
+    });
+    Ok(PgResponse::builder(StatementType::SHOW_VARIABLE)
+        .rows(rows)
+        .into())
 }
 
-async fn handle_show_system_params(handler_args: HandlerArgs) -> Result<Vec<Row>> {
+async fn handle_show_system_params(handler_args: HandlerArgs) -> Result<RwPgResponse> {
     let params = handler_args
         .session
         .env()
@@ -160,17 +155,46 @@ async fn handle_show_system_params(handler_args: HandlerArgs) -> Result<Vec<Row>
     let rows = params
         .get_all()
         .into_iter()
-        .map(|info| {
-            let is_mutable_bytes = ScalarRefImpl::Bool(info.mutable)
-                .text_format(&DataType::Boolean)
-                .into();
-            Row::new(vec![
-                Some(info.name.into()),
-                Some(info.value.into()),
-                Some(info.description.into()),
-                Some(is_mutable_bytes),
-            ])
-        })
-        .collect_vec();
-    Ok(rows)
+        .map(|info| ShowVariableParamsRow {
+            name: info.name.into(),
+            value: info.value,
+            description: info.description.into(),
+            mutable: info.mutable,
+        });
+    Ok(PgResponse::builder(StatementType::SHOW_VARIABLE)
+        .rows(rows)
+        .into())
+}
+
+pub fn infer_show_variable(name: &str) -> Vec<PgFieldDescriptor> {
+    fields_to_descriptors(if name.eq_ignore_ascii_case("ALL") {
+        ShowVariableAllRow::fields()
+    } else if name.eq_ignore_ascii_case("PARAMETERS") {
+        ShowVariableParamsRow::fields()
+    } else {
+        ShowVariableRow::fields()
+    })
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowVariableRow {
+    name: String,
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowVariableAllRow {
+    name: String,
+    setting: String,
+    description: String,
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowVariableParamsRow {
+    name: String,
+    value: String,
+    description: String,
+    mutable: bool,
 }
diff --git a/src/frontend/src/session.rs b/src/frontend/src/session.rs
index 460c978535833..67a5da01e1213 100644
--- a/src/frontend/src/session.rs
+++ b/src/frontend/src/session.rs
@@ -81,11 +81,14 @@ use crate::catalog::{
     check_schema_writable, CatalogError, DatabaseId, OwnedByUserCatalog, SchemaId,
 };
 use crate::error::{ErrorCode, Result, RwError};
+use crate::handler::describe::infer_describe;
 use crate::handler::extended_handle::{
     handle_bind, handle_execute, handle_parse, Portal, PrepareStatement,
 };
 use crate::handler::privilege::ObjectCheckItem;
+use crate::handler::show::{infer_show_create_object, infer_show_object};
 use crate::handler::util::to_pg_field;
+use crate::handler::variable::infer_show_variable;
 use crate::handler::{handle, RwPgResponse};
 use crate::health_service::HealthServiceImpl;
 use crate::meta_client::{FrontendMetaClient, FrontendMetaClientImpl};
@@ -102,7 +105,6 @@ use crate::user::user_authentication::md5_hash_with_salt;
 use crate::user::user_manager::UserInfoManager;
 use crate::user::user_service::{UserInfoReader, UserInfoWriter, UserInfoWriterImpl};
 use crate::user::UserId;
-use crate::utils::infer_stmt_row_desc::{infer_show_object, infer_show_variable};
 use crate::{FrontendOpts, PgResponseStream};
 
 pub(crate) mod current;
@@ -1242,18 +1244,7 @@ fn infer(bound: Option<BoundStatement>, stmt: Statement) -> Result<Vec<PgFieldDe
             object: show_object,
             ..
         } => Ok(infer_show_object(&show_object)),
-        Statement::ShowCreateObject { .. } => Ok(vec![
-            PgFieldDescriptor::new(
-                "Name".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Create Sql".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-        ]),
+        Statement::ShowCreateObject { .. } => Ok(infer_show_create_object()),
         Statement::ShowTransactionIsolationLevel => {
             let name = "transaction_isolation";
             Ok(infer_show_variable(name))
@@ -1262,28 +1253,7 @@ fn infer(bound: Option<BoundStatement>, stmt: Statement) -> Result<Vec<PgFieldDe
             let name = &variable[0].real_value().to_lowercase();
             Ok(infer_show_variable(name))
         }
-        Statement::Describe { name: _ } => Ok(vec![
-            PgFieldDescriptor::new(
-                "Name".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Type".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Is Hidden".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Description".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-        ]),
+        Statement::Describe { name: _ } => Ok(infer_describe()),
         Statement::Explain { .. } => Ok(vec![PgFieldDescriptor::new(
             "QUERY PLAN".to_owned(),
             DataType::Varchar.to_oid(),
diff --git a/src/frontend/src/utils/infer_stmt_row_desc.rs b/src/frontend/src/utils/infer_stmt_row_desc.rs
deleted file mode 100644
index 690b2bf81872f..0000000000000
--- a/src/frontend/src/utils/infer_stmt_row_desc.rs
+++ /dev/null
@@ -1,253 +0,0 @@
-// Copyright 2024 RisingWave Labs
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use pgwire::pg_field_descriptor::PgFieldDescriptor;
-use risingwave_common::types::DataType;
-use risingwave_sqlparser::ast::ShowObject;
-
-/// `infer_stmt_row_desc` is used to infer the row description for different show objects.
-pub fn infer_show_object(objects: &ShowObject) -> Vec<PgFieldDescriptor> {
-    match objects {
-        ShowObject::Columns { .. } => vec![
-            PgFieldDescriptor::new(
-                "Name".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Type".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Is Hidden".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Description".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-        ],
-        ShowObject::Connection { .. } => vec![
-            PgFieldDescriptor::new(
-                "Name".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Type".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Properties".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-        ],
-        ShowObject::Function { .. } => vec![
-            PgFieldDescriptor::new(
-                "Name".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Arguments".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Return Type".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Language".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Link".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-        ],
-        ShowObject::Indexes { .. } => vec![
-            PgFieldDescriptor::new(
-                "Name".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "On".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Key".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Include".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Distributed By".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-        ],
-        ShowObject::Cluster => vec![
-            PgFieldDescriptor::new(
-                "Addr".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "State".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Parallel Units".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Is Streaming".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Is Serving".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Is Unschedulable".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-        ],
-        ShowObject::Jobs => vec![
-            PgFieldDescriptor::new(
-                "Id".to_owned(),
-                DataType::Int64.to_oid(),
-                DataType::Int64.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Statement".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Progress".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-        ],
-        ShowObject::ProcessList => vec![
-            PgFieldDescriptor::new(
-                "Id".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "User".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Host".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Database".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Time".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Info".to_owned(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-        ],
-        _ => vec![PgFieldDescriptor::new(
-            "Name".to_owned(),
-            DataType::Varchar.to_oid(),
-            DataType::Varchar.type_len(),
-        )],
-    }
-}
-
-pub fn infer_show_variable(name: &str) -> Vec<PgFieldDescriptor> {
-    if name.eq_ignore_ascii_case("ALL") {
-        vec![
-            PgFieldDescriptor::new(
-                "Name".to_string(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Setting".to_string(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Description".to_string(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-        ]
-    } else if name.eq_ignore_ascii_case("PARAMETERS") {
-        vec![
-            PgFieldDescriptor::new(
-                "Name".to_string(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Value".to_string(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Description".to_string(),
-                DataType::Varchar.to_oid(),
-                DataType::Varchar.type_len(),
-            ),
-            PgFieldDescriptor::new(
-                "Mutable".to_string(),
-                DataType::Boolean.to_oid(),
-                DataType::Boolean.type_len(),
-            ),
-        ]
-    } else {
-        vec![PgFieldDescriptor::new(
-            name.to_ascii_lowercase(),
-            DataType::Varchar.to_oid(),
-            DataType::Varchar.type_len(),
-        )]
-    }
-}
diff --git a/src/frontend/src/utils/mod.rs b/src/frontend/src/utils/mod.rs
index bfe7cb093aad0..697b626fb3398 100644
--- a/src/frontend/src/utils/mod.rs
+++ b/src/frontend/src/utils/mod.rs
@@ -30,7 +30,6 @@ pub use rewrite_index::*;
 mod index_set;
 pub use index_set::*;
 pub(crate) mod group_by;
-pub mod infer_stmt_row_desc;
 pub mod overwrite_options;
 pub use group_by::*;
 pub use overwrite_options::*;

From 1dd61bc25657e44abc10fa1b54104a3359350f83 Mon Sep 17 00:00:00 2001
From: August <pin@singularity-data.com>
Date: Fri, 23 Feb 2024 11:31:41 +0800
Subject: [PATCH 11/24] fix(sql-backend): fix error message for altering
 unrecognized system parameter (#15161)

---
 e2e_test/error_ui/simple/main.slt       | 2 +-
 src/common/src/system_param/mod.rs      | 2 +-
 src/meta/src/controller/system_param.rs | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/e2e_test/error_ui/simple/main.slt b/e2e_test/error_ui/simple/main.slt
index b4cebbdfeff70..3197544b45d75 100644
--- a/e2e_test/error_ui/simple/main.slt
+++ b/e2e_test/error_ui/simple/main.slt
@@ -27,7 +27,7 @@ db error: ERROR: Failed to run the query
 
 Caused by these errors (recent errors listed first):
   1: gRPC request to meta service failed: Internal error
-  2: SystemParams error: unrecognized system param "not_exist_key"
+  2: SystemParams error: unrecognized system parameter "not_exist_key"
 
 
 query error
diff --git a/src/common/src/system_param/mod.rs b/src/common/src/system_param/mod.rs
index 278390887dd51..82677e57e9753 100644
--- a/src/common/src/system_param/mod.rs
+++ b/src/common/src/system_param/mod.rs
@@ -340,7 +340,7 @@ macro_rules! impl_set_system_param {
                 )*
                 _ => {
                     Err(format!(
-                        "unrecognized system param {:?}",
+                        "unrecognized system parameter {:?}",
                         key
                     ))
                 }
diff --git a/src/meta/src/controller/system_param.rs b/src/meta/src/controller/system_param.rs
index 4b2e598a2c221..855112acb7167 100644
--- a/src/meta/src/controller/system_param.rs
+++ b/src/meta/src/controller/system_param.rs
@@ -186,7 +186,7 @@ impl SystemParamsController {
             .await?
         else {
             return Err(MetaError::system_params(format!(
-                "unrecognized system parameter {}",
+                "unrecognized system parameter {:?}",
                 name
             )));
         };

From 07bd89042aa2261faa38e0a690f5fb49bc7ea0b8 Mon Sep 17 00:00:00 2001
From: August <pin@singularity-data.com>
Date: Fri, 23 Feb 2024 11:31:57 +0800
Subject: [PATCH 12/24] feat: add some missing columns and views to support
 atlas (#15151)

---
 .../information_schema/columns.rs             | 55 +++++++++++++++++--
 .../catalog/system_catalog/pg_catalog/mod.rs  |  1 +
 .../system_catalog/pg_catalog/pg_index.rs     |  5 +-
 .../pg_catalog/pg_partitioned_table.rs        | 30 ++++++++++
 .../system_catalog/rw_catalog/rw_columns.rs   | 21 +++++++
 src/frontend/src/catalog/table_catalog.rs     | 11 +++-
 6 files changed, 116 insertions(+), 7 deletions(-)
 create mode 100644 src/frontend/src/catalog/system_catalog/pg_catalog/pg_partitioned_table.rs

diff --git a/src/frontend/src/catalog/system_catalog/information_schema/columns.rs b/src/frontend/src/catalog/system_catalog/information_schema/columns.rs
index 074b772ca0bb8..a9a0d8fc4f1b5 100644
--- a/src/frontend/src/catalog/system_catalog/information_schema/columns.rs
+++ b/src/frontend/src/catalog/system_catalog/information_schema/columns.rs
@@ -34,13 +34,37 @@ use risingwave_frontend_macro::system_catalog;
         NULL::integer AS numeric_scale,
         c.position AS ordinal_position,
         'YES' AS is_nullable,
-        NULL AS collation_name,
-        'pg_catalog' AS udt_schema,
         CASE
             WHEN c.data_type = 'varchar' THEN 'character varying'
             ELSE c.data_type
         END AS data_type,
-        c.udt_type AS udt_name
+        CURRENT_DATABASE() AS udt_catalog,
+        'pg_catalog' AS udt_schema,
+        c.udt_type AS udt_name,
+        NULL AS character_set_catalog,
+        NULL AS character_set_schema,
+        NULL AS character_set_name,
+        NULL AS collation_catalog,
+        NULL AS collation_schema,
+        NULL AS collation_name,
+        NULL AS domain_catalog,
+        NULL AS domain_schema,
+        NULL AS domain_name,
+        NULL AS scope_catalog,
+        NULL AS scope_schema,
+        NULL AS scope_name,
+        'NO' AS is_identity,
+        NULL AS identity_generation,
+        NULL AS identity_start,
+        NULL AS identity_increment,
+        NULL AS identity_maximum,
+        NULL AS identity_minimum,
+        NULL AS identity_cycle,
+        CASE
+            WHEN c.is_generated THEN 'ALWAYS'
+            ELSE 'NEVER'
+        END AS is_generated,
+        c.generation_expression
     FROM rw_catalog.rw_columns c
     LEFT JOIN rw_catalog.rw_relations r ON c.relation_id = r.id
     JOIN rw_catalog.rw_schemas s ON s.id = r.schema_id
@@ -58,8 +82,29 @@ struct Column {
     numeric_scale: i32,
     ordinal_position: i32,
     is_nullable: String,
-    collation_name: String,
-    udt_schema: String,
     data_type: String,
+    udt_catalog: String,
+    udt_schema: String,
     udt_name: String,
+    character_set_catalog: String,
+    character_set_schema: String,
+    character_set_name: String,
+    collation_catalog: String,
+    collation_schema: String,
+    collation_name: String,
+    domain_catalog: String,
+    domain_schema: String,
+    domain_name: String,
+    scope_catalog: String,
+    scope_schema: String,
+    scope_name: String,
+    is_identity: String,
+    identity_generation: String,
+    identity_start: String,
+    identity_increment: String,
+    identity_maximum: String,
+    identity_minimum: String,
+    identity_cycle: String,
+    is_generated: String,
+    generation_expression: String,
 }
diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs
index c1a935803f9f4..ce97aeaac552c 100644
--- a/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs
+++ b/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs
@@ -35,6 +35,7 @@ mod pg_matviews;
 mod pg_namespace;
 mod pg_opclass;
 mod pg_operator;
+mod pg_partitioned_table;
 mod pg_proc;
 mod pg_roles;
 mod pg_settings;
diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_index.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_index.rs
index 196c36ec7f1af..2dfb15f9e527b 100644
--- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_index.rs
+++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_index.rs
@@ -28,7 +28,8 @@ use risingwave_frontend_macro::system_catalog;
         ARRAY[]::smallint[] as indoption,
         NULL AS indexprs,
         NULL AS indpred,
-        FALSE AS indisprimary
+        FALSE AS indisprimary,
+        ARRAY[]::int[] AS indclass
     FROM rw_catalog.rw_indexes"
 )]
 #[derive(Fields)]
@@ -46,4 +47,6 @@ struct PgIndex {
     indpred: Option<String>,
     // TODO: we return false as the default value.
     indisprimary: bool,
+    // Empty array. We only have a dummy implementation of `pg_opclass` yet.
+    indclass: Vec<i32>,
 }
diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_partitioned_table.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_partitioned_table.rs
new file mode 100644
index 0000000000000..e11739e2609fd
--- /dev/null
+++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_partitioned_table.rs
@@ -0,0 +1,30 @@
+// Copyright 2024 RisingWave Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use risingwave_common::types::Fields;
+use risingwave_frontend_macro::system_catalog;
+
+/// The catalog `pg_partitioned_table` stores information about how tables are partitioned. Reference: [`https://www.postgresql.org/docs/current/catalog-pg-partitioned-table.html`]
+#[system_catalog(view, "pg_catalog.pg_partitioned_table")]
+#[derive(Fields)]
+struct PgPartitionedTable {
+    partrelid: i32,
+    partstrat: String,
+    partnatts: i16,
+    partdefid: i32,
+    partattrs: Vec<i16>,
+    partclass: Vec<i32>,
+    partcollation: Vec<i32>,
+    partexprs: Option<String>,
+}
diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_columns.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_columns.rs
index 40760df81a492..8491da7062711 100644
--- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_columns.rs
+++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_columns.rs
@@ -17,6 +17,7 @@ use risingwave_frontend_macro::system_catalog;
 
 use crate::catalog::system_catalog::SysCatalogReaderImpl;
 use crate::error::Result;
+use crate::expr::{ExprDisplay, ExprImpl};
 
 #[derive(Fields)]
 #[primary_key(relation_id, name)]
@@ -27,6 +28,8 @@ struct RwColumn {
     is_hidden: bool,
     is_primary_key: bool,
     is_distribution_key: bool,
+    is_generated: bool,
+    generation_expression: Option<String>,
     data_type: String,
     type_oid: i32,
     type_len: i16,
@@ -51,6 +54,8 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result<Vec<RwColumn>> {
                         is_hidden: false,
                         is_primary_key: false,
                         is_distribution_key: false,
+                        is_generated: false,
+                        generation_expression: None,
                         data_type: column.data_type().to_string(),
                         type_oid: column.data_type().to_oid(),
                         type_len: column.data_type().type_len(),
@@ -71,6 +76,8 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result<Vec<RwColumn>> {
                             is_hidden: column.is_hidden,
                             is_primary_key: sink.downstream_pk.contains(&index),
                             is_distribution_key: sink.distribution_key.contains(&index),
+                            is_generated: false,
+                            generation_expression: None,
                             data_type: column.data_type().to_string(),
                             type_oid: column.data_type().to_oid(),
                             type_len: column.data_type().type_len(),
@@ -93,6 +100,8 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result<Vec<RwColumn>> {
                             is_hidden: column.is_hidden,
                             is_primary_key: table.pk.contains(&index),
                             is_distribution_key: false,
+                            is_generated: false,
+                            generation_expression: None,
                             data_type: column.data_type().to_string(),
                             type_oid: column.data_type().to_oid(),
                             type_len: column.data_type().type_len(),
@@ -104,6 +113,7 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result<Vec<RwColumn>> {
             let table_rows = schema
                 .iter_valid_table()
                 .flat_map(|table| {
+                    let schema = table.column_schema();
                     table
                         .columns
                         .iter()
@@ -115,6 +125,15 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result<Vec<RwColumn>> {
                             is_hidden: column.is_hidden,
                             is_primary_key: table.pk().iter().any(|idx| idx.column_index == index),
                             is_distribution_key: table.distribution_key.contains(&index),
+                            is_generated: column.is_generated(),
+                            generation_expression: column.generated_expr().map(|expr_node| {
+                                let expr = ExprImpl::from_expr_proto(expr_node).unwrap();
+                                let expr_display = ExprDisplay {
+                                    expr: &expr,
+                                    input_schema: &schema,
+                                };
+                                expr_display.to_string()
+                            }),
                             data_type: column.data_type().to_string(),
                             type_oid: column.data_type().to_oid(),
                             type_len: column.data_type().type_len(),
@@ -138,6 +157,8 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result<Vec<RwColumn>> {
                             is_hidden: column.is_hidden,
                             is_primary_key: source.pk_col_ids.contains(&column.column_id()),
                             is_distribution_key: false,
+                            is_generated: false,
+                            generation_expression: None,
                             data_type: column.data_type().to_string(),
                             type_oid: column.data_type().to_oid(),
                             type_len: column.data_type().type_len(),
diff --git a/src/frontend/src/catalog/table_catalog.rs b/src/frontend/src/catalog/table_catalog.rs
index fbb77a0ca0bb5..2954cb37384dc 100644
--- a/src/frontend/src/catalog/table_catalog.rs
+++ b/src/frontend/src/catalog/table_catalog.rs
@@ -17,7 +17,7 @@ use std::collections::{HashMap, HashSet};
 use fixedbitset::FixedBitSet;
 use itertools::Itertools;
 use risingwave_common::catalog::{
-    ColumnCatalog, ConflictBehavior, TableDesc, TableId, TableVersionId,
+    ColumnCatalog, ConflictBehavior, Field, Schema, TableDesc, TableId, TableVersionId,
 };
 use risingwave_common::util::epoch::Epoch;
 use risingwave_common::util::sort_util::ColumnOrder;
@@ -492,6 +492,15 @@ impl TableCatalog {
     pub fn has_generated_column(&self) -> bool {
         self.columns.iter().any(|c| c.is_generated())
     }
+
+    pub fn column_schema(&self) -> Schema {
+        Schema::new(
+            self.columns
+                .iter()
+                .map(|c| Field::from(&c.column_desc))
+                .collect(),
+        )
+    }
 }
 
 impl From<PbTable> for TableCatalog {

From 304709b2d92b504add49700b13b4da23f9d9ee58 Mon Sep 17 00:00:00 2001
From: Shanicky Chen <peng@risingwave-labs.com>
Date: Fri, 23 Feb 2024 11:50:39 +0800
Subject: [PATCH 13/24] fix: refine cycle check for sink into table (#15170)

---
 e2e_test/sink/sink_into_table/basic.slt       | 29 +++++++++++++++++++
 src/frontend/src/catalog/table_catalog.rs     |  8 +++++
 src/frontend/src/handler/create_sink.rs       | 10 ++++++-
 .../optimizer/plan_node/stream_materialize.rs |  1 +
 src/frontend/src/optimizer/plan_node/utils.rs |  1 +
 .../src/scheduler/distributed/query.rs        |  1 +
 6 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/e2e_test/sink/sink_into_table/basic.slt b/e2e_test/sink/sink_into_table/basic.slt
index 1bc5a47907077..890087e207fd0 100644
--- a/e2e_test/sink/sink_into_table/basic.slt
+++ b/e2e_test/sink/sink_into_table/basic.slt
@@ -362,6 +362,35 @@ drop table t_b;
 statement ok
 drop table t_c;
 
+# cycle check (with materialize view)
+
+statement ok
+create table t_a(v int primary key);
+
+statement ok
+create materialized view m_a as select v from t_a;
+
+statement ok
+create table t_b(v int primary key);
+
+statement ok
+create sink s_a into t_b as select v from m_a;
+
+statement error Creating such a sink will result in circular dependency
+create sink s_b into t_a as select v from t_b;
+
+statement ok
+drop sink s_a;
+
+statement ok
+drop table t_b;
+
+statement ok
+drop materialized view m_a;
+
+statement ok
+drop table t_a;
+
 # multi sinks
 
 statement ok
diff --git a/src/frontend/src/catalog/table_catalog.rs b/src/frontend/src/catalog/table_catalog.rs
index 2954cb37384dc..edb458997e33f 100644
--- a/src/frontend/src/catalog/table_catalog.rs
+++ b/src/frontend/src/catalog/table_catalog.rs
@@ -74,6 +74,8 @@ pub struct TableCatalog {
 
     pub name: String,
 
+    pub dependent_relations: Vec<TableId>,
+
     /// All columns in this table.
     pub columns: Vec<ColumnCatalog>,
 
@@ -573,6 +575,11 @@ impl From<PbTable> for TableCatalog {
             created_at_cluster_version: tb.created_at_cluster_version.clone(),
             initialized_at_cluster_version: tb.initialized_at_cluster_version.clone(),
             retention_seconds: tb.retention_seconds,
+            dependent_relations: tb
+                .dependent_relations
+                .into_iter()
+                .map(TableId::from)
+                .collect_vec(),
         }
     }
 }
@@ -724,6 +731,7 @@ mod tests {
                 incoming_sinks: vec![],
                 created_at_cluster_version: None,
                 initialized_at_cluster_version: None,
+                dependent_relations: vec![],
             }
         );
         assert_eq!(table, TableCatalog::from(table.to_prost(0, 0)));
diff --git a/src/frontend/src/handler/create_sink.rs b/src/frontend/src/handler/create_sink.rs
index de8e93e04a784..830253675c1bd 100644
--- a/src/frontend/src/handler/create_sink.rs
+++ b/src/frontend/src/handler/create_sink.rs
@@ -504,7 +504,7 @@ fn check_cycle_for_sink(
             if let Ok(table) = reader.get_table_by_id(table_id) {
                 visit_table(session, reader, sink_index, table.as_ref(), visited_tables)?
             } else {
-                bail!("table not found: {:?}", table_id);
+                bail!("streaming job not found: {:?}", table_id);
             }
         }
 
@@ -533,6 +533,14 @@ fn check_cycle_for_sink(
             }
         }
 
+        for table_id in &table.dependent_relations {
+            if let Ok(table) = reader.get_table_by_id(table_id) {
+                visit_table(session, reader, sink_index, table.as_ref(), visited_tables)?
+            } else {
+                bail!("streaming job not found: {:?}", table_id);
+            }
+        }
+
         Ok(())
     }
 
diff --git a/src/frontend/src/optimizer/plan_node/stream_materialize.rs b/src/frontend/src/optimizer/plan_node/stream_materialize.rs
index 3abc7ace0e494..f2acbcf9d258c 100644
--- a/src/frontend/src/optimizer/plan_node/stream_materialize.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_materialize.rs
@@ -226,6 +226,7 @@ impl StreamMaterialize {
             id: TableId::placeholder(),
             associated_source_id: None,
             name,
+            dependent_relations: vec![],
             columns,
             pk: table_pk,
             stream_key,
diff --git a/src/frontend/src/optimizer/plan_node/utils.rs b/src/frontend/src/optimizer/plan_node/utils.rs
index 39d9ff5e7018d..c8cd1bb05fa83 100644
--- a/src/frontend/src/optimizer/plan_node/utils.rs
+++ b/src/frontend/src/optimizer/plan_node/utils.rs
@@ -141,6 +141,7 @@ impl TableCatalogBuilder {
             id: TableId::placeholder(),
             associated_source_id: None,
             name: String::new(),
+            dependent_relations: vec![],
             columns: self.columns.clone(),
             pk: self.pk,
             stream_key: vec![],
diff --git a/src/frontend/src/scheduler/distributed/query.rs b/src/frontend/src/scheduler/distributed/query.rs
index 6295d8036b566..515a83d0923ef 100644
--- a/src/frontend/src/scheduler/distributed/query.rs
+++ b/src/frontend/src/scheduler/distributed/query.rs
@@ -543,6 +543,7 @@ pub(crate) mod tests {
             id: table_id,
             associated_source_id: None,
             name: "test".to_string(),
+            dependent_relations: vec![],
             columns: vec![
                 ColumnCatalog {
                     column_desc: ColumnDesc::new_atomic(DataType::Int32, "a", 0),

From 219b1b1479e5dfb9068ca23a1307e5290f05bb37 Mon Sep 17 00:00:00 2001
From: William Wen <44139337+wenym1@users.noreply.github.com>
Date: Fri, 23 Feb 2024 12:23:14 +0800
Subject: [PATCH 14/24] refactor(meta): track finished create mv job in tracker
 (#15112)

---
 src/meta/src/barrier/command.rs  |   9 ---
 src/meta/src/barrier/mod.rs      | 101 +++++++------------------------
 src/meta/src/barrier/progress.rs |  52 ++++++++++------
 3 files changed, 55 insertions(+), 107 deletions(-)

diff --git a/src/meta/src/barrier/command.rs b/src/meta/src/barrier/command.rs
index 07765fe840c38..71fc9b98b355b 100644
--- a/src/meta/src/barrier/command.rs
+++ b/src/meta/src/barrier/command.rs
@@ -709,15 +709,6 @@ impl CommandContext {
         }
     }
 
-    /// For `CancelStreamingJob`, returns the actors of the `StreamScan` nodes. For other commands,
-    /// returns an empty set.
-    pub fn actors_to_cancel(&self) -> HashSet<ActorId> {
-        match &self.command {
-            Command::CancelStreamingJob(table_fragments) => table_fragments.backfill_actor_ids(),
-            _ => Default::default(),
-        }
-    }
-
     /// For `CancelStreamingJob`, returns the table id of the target table.
     pub fn table_to_cancel(&self) -> Option<TableId> {
         match &self.command {
diff --git a/src/meta/src/barrier/mod.rs b/src/meta/src/barrier/mod.rs
index 47bef49c66574..bd2f24f1baf46 100644
--- a/src/meta/src/barrier/mod.rs
+++ b/src/meta/src/barrier/mod.rs
@@ -50,7 +50,7 @@ use self::notifier::Notifier;
 use self::progress::TrackingCommand;
 use crate::barrier::info::InflightActorInfo;
 use crate::barrier::notifier::BarrierInfo;
-use crate::barrier::progress::{CreateMviewProgressTracker, TrackingJob};
+use crate::barrier::progress::CreateMviewProgressTracker;
 use crate::barrier::rpc::BarrierRpcManager;
 use crate::barrier::state::BarrierManagerState;
 use crate::barrier::BarrierEpochState::{Completed, InFlight};
@@ -152,7 +152,7 @@ pub struct GlobalBarrierManagerContext {
 
     sink_manager: SinkCoordinatorManager,
 
-    metrics: Arc<MetaMetrics>,
+    pub(super) metrics: Arc<MetaMetrics>,
 
     stream_rpc_manager: StreamRpcManager,
 
@@ -196,76 +196,34 @@ struct CheckpointControl {
     /// Save the state and message of barrier in order.
     command_ctx_queue: VecDeque<EpochNode>,
 
-    metrics: Arc<MetaMetrics>,
-
-    /// Get notified when we finished Create MV and collect a barrier(checkpoint = true)
-    finished_jobs: Vec<TrackingJob>,
+    context: GlobalBarrierManagerContext,
 }
 
 impl CheckpointControl {
-    fn new(metrics: Arc<MetaMetrics>) -> Self {
+    fn new(context: GlobalBarrierManagerContext) -> Self {
         Self {
             command_ctx_queue: Default::default(),
-            metrics,
-            finished_jobs: Default::default(),
-        }
-    }
-
-    /// Stash a command to finish later.
-    fn stash_command_to_finish(&mut self, finished_job: TrackingJob) {
-        self.finished_jobs.push(finished_job);
-    }
-
-    /// Finish stashed jobs.
-    /// If checkpoint, means all jobs can be finished.
-    /// If not checkpoint, jobs which do not require checkpoint can be finished.
-    ///
-    /// Returns whether there are still remaining stashed jobs to finish.
-    async fn finish_jobs(&mut self, checkpoint: bool) -> MetaResult<bool> {
-        for job in self
-            .finished_jobs
-            .extract_if(|job| checkpoint || !job.is_checkpoint_required())
-        {
-            // The command is ready to finish. We can now call `pre_finish`.
-            job.pre_finish().await?;
-            job.notify_finished();
-        }
-        Ok(!self.finished_jobs.is_empty())
-    }
-
-    fn cancel_command(&mut self, cancelled_job: TrackingJob) {
-        if let TrackingJob::New(cancelled_command) = cancelled_job {
-            if let Some(index) = self.command_ctx_queue.iter().position(|x| {
-                x.command_ctx.prev_epoch.value() == cancelled_command.context.prev_epoch.value()
-            }) {
-                self.command_ctx_queue.remove(index);
-            }
-        } else {
-            // Recovered jobs do not need to be cancelled since only `RUNNING` actors will get recovered.
+            context,
         }
     }
 
-    fn cancel_stashed_command(&mut self, id: TableId) {
-        self.finished_jobs
-            .retain(|x| x.table_to_create() != Some(id));
-    }
-
     /// Update the metrics of barrier nums.
     fn update_barrier_nums_metrics(&self) {
-        self.metrics.in_flight_barrier_nums.set(
+        self.context.metrics.in_flight_barrier_nums.set(
             self.command_ctx_queue
                 .iter()
                 .filter(|x| matches!(x.state, InFlight))
                 .count() as i64,
         );
-        self.metrics
+        self.context
+            .metrics
             .all_barrier_nums
             .set(self.command_ctx_queue.len() as i64);
     }
 
     /// Enqueue a barrier command, and init its state to `InFlight`.
     fn enqueue_command(&mut self, command_ctx: Arc<CommandContext>, notifiers: Vec<Notifier>) {
-        let timer = self.metrics.barrier_latency.start_timer();
+        let timer = self.context.metrics.barrier_latency.start_timer();
 
         self.command_ctx_queue.push_back(EpochNode {
             timer: Some(timer),
@@ -285,7 +243,11 @@ impl CheckpointControl {
         result: Vec<BarrierCompleteResponse>,
     ) -> Vec<EpochNode> {
         // change state to complete, and wait for nodes with the smaller epoch to commit
-        let wait_commit_timer = self.metrics.barrier_wait_commit_latency.start_timer();
+        let wait_commit_timer = self
+            .context
+            .metrics
+            .barrier_wait_commit_latency
+            .start_timer();
         if let Some(node) = self
             .command_ctx_queue
             .iter_mut()
@@ -341,11 +303,6 @@ impl CheckpointControl {
             .iter()
             .any(|x| x.command_ctx.prev_epoch.value().0 == epoch)
     }
-
-    /// We need to make sure there are no changes when doing recovery
-    pub fn clear_changes(&mut self) {
-        self.finished_jobs.clear();
-    }
 }
 
 /// The state and message of this barrier, a node for concurrent checkpoint.
@@ -401,7 +358,6 @@ impl GlobalBarrierManager {
             InflightActorInfo::default(),
             None,
         );
-        let checkpoint_control = CheckpointControl::new(metrics.clone());
 
         let active_streaming_nodes = ActiveStreamingWorkerNodes::uninitialized();
 
@@ -420,6 +376,8 @@ impl GlobalBarrierManager {
             env: env.clone(),
         };
 
+        let checkpoint_control = CheckpointControl::new(context.clone());
+
         let rpc_manager = BarrierRpcManager::new(context.clone());
 
         Self {
@@ -738,7 +696,6 @@ impl GlobalBarrierManager {
         err: MetaError,
         fail_nodes: impl IntoIterator<Item = EpochNode>,
     ) {
-        self.checkpoint_control.clear_changes();
         self.rpc_manager.clear();
 
         for node in fail_nodes {
@@ -833,20 +790,13 @@ impl GlobalBarrierManager {
                     notifier.notify_collected();
                 });
 
-                // Save `cancelled_command` for Create MVs.
-                let actors_to_cancel = node.command_ctx.actors_to_cancel();
-                let cancelled_command = if !actors_to_cancel.is_empty() {
-                    let mut tracker = self.context.tracker.lock().await;
-                    tracker.find_cancelled_command(actors_to_cancel)
-                } else {
-                    None
-                };
+                // Notify about collected.
+                let version_stats = self.context.hummock_manager.get_version_stats().await;
+                let mut tracker = self.context.tracker.lock().await;
 
                 // Save `finished_commands` for Create MVs.
                 let finished_commands = {
                     let mut commands = vec![];
-                    let version_stats = self.context.hummock_manager.get_version_stats().await;
-                    let mut tracker = self.context.tracker.lock().await;
                     // Add the command to tracker.
                     if let Some(command) = tracker.add(
                         TrackingCommand {
@@ -872,21 +822,16 @@ impl GlobalBarrierManager {
                 };
 
                 for command in finished_commands {
-                    self.checkpoint_control.stash_command_to_finish(command);
+                    tracker.stash_command_to_finish(command);
                 }
 
-                if let Some(command) = cancelled_command {
-                    self.checkpoint_control.cancel_command(command);
-                } else if let Some(table_id) = node.command_ctx.table_to_cancel() {
+                if let Some(table_id) = node.command_ctx.table_to_cancel() {
                     // the cancelled command is possibly stashed in `finished_commands` and waiting
                     // for checkpoint, we should also clear it.
-                    self.checkpoint_control.cancel_stashed_command(table_id);
+                    tracker.cancel_command(table_id);
                 }
 
-                let remaining = self
-                    .checkpoint_control
-                    .finish_jobs(kind.is_checkpoint())
-                    .await?;
+                let remaining = tracker.finish_jobs(kind.is_checkpoint()).await?;
                 // If there are remaining commands (that requires checkpoint to finish), we force
                 // the next barrier to be a checkpoint.
                 if remaining {
diff --git a/src/meta/src/barrier/progress.rs b/src/meta/src/barrier/progress.rs
index f22c5a2bbb216..5c1e701e6fc81 100644
--- a/src/meta/src/barrier/progress.rs
+++ b/src/meta/src/barrier/progress.rs
@@ -16,7 +16,6 @@ use std::collections::hash_map::Entry;
 use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 
-use itertools::Itertools;
 use risingwave_common::catalog::TableId;
 use risingwave_common::util::epoch::Epoch;
 use risingwave_pb::ddl_service::DdlProgress;
@@ -44,7 +43,7 @@ enum BackfillState {
 
 /// Progress of all actors containing backfill executors while creating mview.
 #[derive(Debug)]
-struct Progress {
+pub(super) struct Progress {
     states: HashMap<ActorId, BackfillState>,
 
     done_count: usize,
@@ -254,6 +253,9 @@ pub(super) struct CreateMviewProgressTracker {
 
     /// Find the epoch of the create-mview DDL by the actor containing the backfill executors.
     actor_map: HashMap<ActorId, TableId>,
+
+    /// Get notified when we finished Create MV and collect a barrier(checkpoint = true)
+    finished_jobs: Vec<TrackingJob>,
 }
 
 impl CreateMviewProgressTracker {
@@ -313,6 +315,7 @@ impl CreateMviewProgressTracker {
         Self {
             progress_map,
             actor_map,
+            finished_jobs: Vec::new(),
         }
     }
 
@@ -320,6 +323,7 @@ impl CreateMviewProgressTracker {
         Self {
             progress_map: Default::default(),
             actor_map: Default::default(),
+            finished_jobs: Vec::new(),
         }
     }
 
@@ -338,25 +342,33 @@ impl CreateMviewProgressTracker {
             .collect()
     }
 
-    /// Try to find the target create-streaming-job command from track.
+    /// Stash a command to finish later.
+    pub(super) fn stash_command_to_finish(&mut self, finished_job: TrackingJob) {
+        self.finished_jobs.push(finished_job);
+    }
+
+    /// Finish stashed jobs.
+    /// If checkpoint, means all jobs can be finished.
+    /// If not checkpoint, jobs which do not require checkpoint can be finished.
     ///
-    /// Return the target command as it should be cancelled based on the input actors.
-    pub fn find_cancelled_command(
-        &mut self,
-        actors_to_cancel: HashSet<ActorId>,
-    ) -> Option<TrackingJob> {
-        let epochs = actors_to_cancel
-            .into_iter()
-            .map(|actor_id| self.actor_map.get(&actor_id))
-            .collect_vec();
-        assert!(epochs.iter().all_equal());
-        // If the target command found in progress map, return and remove it. Note that the command
-        // should have finished if not found.
-        if let Some(Some(epoch)) = epochs.first() {
-            Some(self.progress_map.remove(epoch).unwrap().1)
-        } else {
-            None
+    /// Returns whether there are still remaining stashed jobs to finish.
+    pub(super) async fn finish_jobs(&mut self, checkpoint: bool) -> MetaResult<bool> {
+        for job in self
+            .finished_jobs
+            .extract_if(|job| checkpoint || !job.is_checkpoint_required())
+        {
+            // The command is ready to finish. We can now call `pre_finish`.
+            job.pre_finish().await?;
+            job.notify_finished();
         }
+        Ok(!self.finished_jobs.is_empty())
+    }
+
+    pub(super) fn cancel_command(&mut self, id: TableId) {
+        let _ = self.progress_map.remove(&id);
+        self.finished_jobs
+            .retain(|x| x.table_to_create() != Some(id));
+        self.actor_map.retain(|_, table_id| *table_id != id);
     }
 
     /// Add a new create-mview DDL command to track.
@@ -496,7 +508,7 @@ impl CreateMviewProgressTracker {
                         table_id
                     );
 
-                    // Clean-up the mapping from actors to DDL epoch.
+                    // Clean-up the mapping from actors to DDL table_id.
                     for actor in o.get().0.actors() {
                         self.actor_map.remove(&actor);
                     }

From c5f90144469f9781e1a76ada368b1221eca935cb Mon Sep 17 00:00:00 2001
From: Eric Fu <eric@singularity-data.com>
Date: Fri, 23 Feb 2024 12:35:20 +0800
Subject: [PATCH 15/24] chore: fix `risedev kill` (#15191)

---
 Makefile.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile.toml b/Makefile.toml
index 983b304d74e51..8820acf67c7bd 100644
--- a/Makefile.toml
+++ b/Makefile.toml
@@ -757,10 +757,10 @@ tmux list-windows -t risedev -F "#{window_name} #{pane_id}" \
 if [[ -n $(tmux list-windows -t risedev | grep kafka) ]];
 then
   echo "kill kafka"
-  kill_kafka
+  kill_kafka || true
 
   echo "kill zookeeper"
-  kill_zookeeper
+  kill_zookeeper || true
 
   # Kill their tmux sessions
   tmux list-windows -t risedev -F "#{pane_id}" | xargs -I {} tmux send-keys -t {} C-c C-d

From 86df42b0252a7c6d3adb5c17e4aab60fa884df4a Mon Sep 17 00:00:00 2001
From: Eric Fu <eric@singularity-data.com>
Date: Fri, 23 Feb 2024 12:40:58 +0800
Subject: [PATCH 16/24] chore: bump version of spotless plugin (#15203)

---
 .../main/java/com/risingwave/connector/CassandraConfig.java   | 1 +
 java/pom.xml                                                  | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraConfig.java b/java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraConfig.java
index 9ac3d257b2bad..7c883335cfc23 100644
--- a/java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraConfig.java
+++ b/java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraConfig.java
@@ -23,6 +23,7 @@
 public class CassandraConfig extends CommonSinkConfig {
     /** Required */
     private String type;
+
     /** Required */
     private String url;
 
diff --git a/java/pom.xml b/java/pom.xml
index 5f168c48bd9ef..c6e39b34cfc0b 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -69,7 +69,7 @@
         <grpc.version>1.53.0</grpc.version>
         <gson.version>2.10</gson.version>
         <module.version>0.1.0-SNAPSHOT</module.version>
-        <spotless.version>2.27.1</spotless.version>
+        <spotless.version>2.43.0</spotless.version>
         <log4j.version>2.20.0</log4j.version>
         <slf4j.version>2.0.9</slf4j.version>
         <commons.cli.version>1.5.0</commons.cli.version>
@@ -391,7 +391,7 @@
                 <configuration>
                     <java>
                         <googleJavaFormat>
-                            <version>1.7</version>
+                            <version>1.20.0</version>
                             <style>AOSP</style>
                         </googleJavaFormat>
 

From 0c329e9aec39d73635f4af7d87ff2763a6fb9ae5 Mon Sep 17 00:00:00 2001
From: Dylan <chenzl25@mail2.sysu.edu.cn>
Date: Fri, 23 Feb 2024 12:53:25 +0800
Subject: [PATCH 17/24] feat(frontend): support create iceberg source (#14971)

---
 proto/plan_common.proto                       |   2 +
 src/connector/src/macros.rs                   |   3 +-
 src/connector/src/sink/catalog/mod.rs         |   9 +-
 src/connector/src/sink/iceberg/mod.rs         |   2 +-
 src/connector/src/source/base.rs              |   2 +-
 src/connector/src/source/iceberg/mod.rs       | 128 ++++++++++++++++++
 src/connector/src/source/mod.rs               |   1 +
 src/connector/with_options_source.yaml        |  29 ++++
 .../src/handler/alter_source_with_sr.rs       |   2 +
 src/frontend/src/handler/create_sink.rs       |   4 +-
 src/frontend/src/handler/create_source.rs     | 112 +++++++++++++--
 src/frontend/src/handler/create_table.rs      |  23 +++-
 src/frontend/src/handler/util.rs              |  14 ++
 .../src/optimizer/plan_node/logical_source.rs |  15 +-
 src/sqlparser/src/ast/statement.rs            |  26 ++++
 15 files changed, 343 insertions(+), 29 deletions(-)
 create mode 100644 src/connector/src/source/iceberg/mod.rs

diff --git a/proto/plan_common.proto b/proto/plan_common.proto
index 82f9fbc63a0f8..1dd45ad08a6ef 100644
--- a/proto/plan_common.proto
+++ b/proto/plan_common.proto
@@ -136,6 +136,7 @@ enum FormatType {
   FORMAT_TYPE_CANAL = 5;
   FORMAT_TYPE_UPSERT = 6;
   FORMAT_TYPE_PLAIN = 7;
+  FORMAT_TYPE_NONE = 8;
 }
 
 enum EncodeType {
@@ -147,6 +148,7 @@ enum EncodeType {
   ENCODE_TYPE_JSON = 5;
   ENCODE_TYPE_BYTES = 6;
   ENCODE_TYPE_TEMPLATE = 7;
+  ENCODE_TYPE_NONE = 8;
 }
 
 enum RowFormatType {
diff --git a/src/connector/src/macros.rs b/src/connector/src/macros.rs
index 9a2383dbb4a96..e34171717ae6c 100644
--- a/src/connector/src/macros.rs
+++ b/src/connector/src/macros.rs
@@ -36,7 +36,8 @@ macro_rules! for_all_classified_sources {
                 { Gcs, $crate::source::filesystem::opendal_source::GcsProperties , $crate::source::filesystem::OpendalFsSplit<$crate::source::filesystem::opendal_source::OpendalGcs> },
                 { OpendalS3, $crate::source::filesystem::opendal_source::OpendalS3Properties, $crate::source::filesystem::OpendalFsSplit<$crate::source::filesystem::opendal_source::OpendalS3> },
                 { PosixFs, $crate::source::filesystem::opendal_source::PosixFsProperties, $crate::source::filesystem::OpendalFsSplit<$crate::source::filesystem::opendal_source::OpendalPosixFs> },
-                { Test, $crate::source::test_source::TestSourceProperties, $crate::source::test_source::TestSourceSplit}
+                { Test, $crate::source::test_source::TestSourceProperties, $crate::source::test_source::TestSourceSplit},
+                { Iceberg, $crate::source::iceberg::IcebergProperties, $crate::source::iceberg::IcebergSplit}
             }
             $(
                 ,$extra_args
diff --git a/src/connector/src/sink/catalog/mod.rs b/src/connector/src/sink/catalog/mod.rs
index d4e38cac4d1c9..e6a654f75a5fd 100644
--- a/src/connector/src/sink/catalog/mod.rs
+++ b/src/connector/src/sink/catalog/mod.rs
@@ -205,7 +205,12 @@ impl TryFrom<PbSinkFormatDesc> for SinkFormatDesc {
             F::Plain => SinkFormat::AppendOnly,
             F::Upsert => SinkFormat::Upsert,
             F::Debezium => SinkFormat::Debezium,
-            f @ (F::Unspecified | F::Native | F::DebeziumMongo | F::Maxwell | F::Canal) => {
+            f @ (F::Unspecified
+            | F::Native
+            | F::DebeziumMongo
+            | F::Maxwell
+            | F::Canal
+            | F::None) => {
                 return Err(SinkError::Config(anyhow!(
                     "sink format unsupported: {}",
                     f.as_str_name()
@@ -217,7 +222,7 @@ impl TryFrom<PbSinkFormatDesc> for SinkFormatDesc {
             E::Protobuf => SinkEncode::Protobuf,
             E::Template => SinkEncode::Template,
             E::Avro => SinkEncode::Avro,
-            e @ (E::Unspecified | E::Native | E::Csv | E::Bytes) => {
+            e @ (E::Unspecified | E::Native | E::Csv | E::Bytes | E::None) => {
                 return Err(SinkError::Config(anyhow!(
                     "sink encode unsupported: {}",
                     e.as_str_name()
diff --git a/src/connector/src/sink/iceberg/mod.rs b/src/connector/src/sink/iceberg/mod.rs
index 68c5654533a64..326f8586d76eb 100644
--- a/src/connector/src/sink/iceberg/mod.rs
+++ b/src/connector/src/sink/iceberg/mod.rs
@@ -927,7 +927,7 @@ impl SinkCommitCoordinator for IcebergSinkCommitter {
 }
 
 /// Try to match our schema with iceberg schema.
-fn try_matches_arrow_schema(rw_schema: &Schema, arrow_schema: &ArrowSchema) -> Result<()> {
+pub fn try_matches_arrow_schema(rw_schema: &Schema, arrow_schema: &ArrowSchema) -> Result<()> {
     if rw_schema.fields.len() != arrow_schema.fields().len() {
         return Err(SinkError::Iceberg(anyhow!(
             "Schema length not match, ours is {}, and iceberg is {}",
diff --git a/src/connector/src/source/base.rs b/src/connector/src/source/base.rs
index 5b909a2738f3c..fed8e0263aac4 100644
--- a/src/connector/src/source/base.rs
+++ b/src/connector/src/source/base.rs
@@ -150,7 +150,7 @@ pub struct SourceEnumeratorContext {
     pub connector_client: Option<ConnectorClient>,
 }
 
-#[derive(Clone, Copy, Debug, Default)]
+#[derive(Clone, Debug, Default)]
 pub struct SourceEnumeratorInfo {
     pub source_id: u32,
 }
diff --git a/src/connector/src/source/iceberg/mod.rs b/src/connector/src/source/iceberg/mod.rs
new file mode 100644
index 0000000000000..e274f639f15b2
--- /dev/null
+++ b/src/connector/src/source/iceberg/mod.rs
@@ -0,0 +1,128 @@
+// Copyright 2024 RisingWave Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+
+use async_trait::async_trait;
+use risingwave_common::types::JsonbVal;
+use serde::{Deserialize, Serialize};
+
+use crate::parser::ParserConfig;
+use crate::source::{
+    BoxChunkSourceStream, Column, SourceContextRef, SourceEnumeratorContextRef, SourceProperties,
+    SplitEnumerator, SplitId, SplitMetaData, SplitReader, UnknownFields,
+};
+
+pub const ICEBERG_CONNECTOR: &str = "iceberg";
+
+#[derive(Clone, Debug, Deserialize, PartialEq, with_options::WithOptions)]
+pub struct IcebergProperties {
+    #[serde(rename = "catalog.type")]
+    pub catalog_type: String,
+    #[serde(rename = "s3.region")]
+    pub region_name: String,
+    #[serde(rename = "s3.endpoint", default)]
+    pub endpoint: String,
+    #[serde(rename = "s3.access.key", default)]
+    pub s3_access: String,
+    #[serde(rename = "s3.secret.key", default)]
+    pub s3_secret: String,
+    #[serde(rename = "warehouse.path")]
+    pub warehouse_path: String,
+    #[serde(rename = "database.name")]
+    pub database_name: String,
+    #[serde(rename = "table.name")]
+    pub table_name: String,
+
+    #[serde(flatten)]
+    pub unknown_fields: HashMap<String, String>,
+}
+
+impl SourceProperties for IcebergProperties {
+    type Split = IcebergSplit;
+    type SplitEnumerator = IcebergSplitEnumerator;
+    type SplitReader = IcebergFileReader;
+
+    const SOURCE_NAME: &'static str = ICEBERG_CONNECTOR;
+}
+
+impl UnknownFields for IcebergProperties {
+    fn unknown_fields(&self) -> HashMap<String, String> {
+        self.unknown_fields.clone()
+    }
+}
+
+#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
+pub struct IcebergSplit {}
+
+impl SplitMetaData for IcebergSplit {
+    fn id(&self) -> SplitId {
+        unimplemented!()
+    }
+
+    fn restore_from_json(_value: JsonbVal) -> anyhow::Result<Self> {
+        unimplemented!()
+    }
+
+    fn encode_to_json(&self) -> JsonbVal {
+        unimplemented!()
+    }
+
+    fn update_with_offset(&mut self, _start_offset: String) -> anyhow::Result<()> {
+        unimplemented!()
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct IcebergSplitEnumerator {}
+
+#[async_trait]
+impl SplitEnumerator for IcebergSplitEnumerator {
+    type Properties = IcebergProperties;
+    type Split = IcebergSplit;
+
+    async fn new(
+        _properties: Self::Properties,
+        _context: SourceEnumeratorContextRef,
+    ) -> anyhow::Result<Self> {
+        Ok(Self {})
+    }
+
+    async fn list_splits(&mut self) -> anyhow::Result<Vec<Self::Split>> {
+        Ok(vec![])
+    }
+}
+
+#[derive(Debug)]
+pub struct IcebergFileReader {}
+
+#[async_trait]
+impl SplitReader for IcebergFileReader {
+    type Properties = IcebergProperties;
+    type Split = IcebergSplit;
+
+    async fn new(
+        _props: IcebergProperties,
+        _splits: Vec<IcebergSplit>,
+        _parser_config: ParserConfig,
+        _source_ctx: SourceContextRef,
+        _columns: Option<Vec<Column>>,
+    ) -> anyhow::Result<Self> {
+        unimplemented!()
+    }
+
+    fn into_stream(self) -> BoxChunkSourceStream {
+        unimplemented!()
+    }
+}
diff --git a/src/connector/src/source/mod.rs b/src/connector/src/source/mod.rs
index cba63b3005c1a..3656820ed95b0 100644
--- a/src/connector/src/source/mod.rs
+++ b/src/connector/src/source/mod.rs
@@ -31,6 +31,7 @@ pub use kafka::KAFKA_CONNECTOR;
 pub use kinesis::KINESIS_CONNECTOR;
 pub use nats::NATS_CONNECTOR;
 mod common;
+pub mod iceberg;
 mod manager;
 pub mod reader;
 pub mod test_source;
diff --git a/src/connector/with_options_source.yaml b/src/connector/with_options_source.yaml
index 2d811ce639c96..dec3cf6a8941a 100644
--- a/src/connector/with_options_source.yaml
+++ b/src/connector/with_options_source.yaml
@@ -33,6 +33,35 @@ GcsProperties:
     field_type: String
     required: false
     default: Default::default
+IcebergProperties:
+  fields:
+  - name: catalog.type
+    field_type: String
+    required: true
+  - name: s3.region
+    field_type: String
+    required: true
+  - name: s3.endpoint
+    field_type: String
+    required: false
+    default: Default::default
+  - name: s3.access.key
+    field_type: String
+    required: false
+    default: Default::default
+  - name: s3.secret.key
+    field_type: String
+    required: false
+    default: Default::default
+  - name: warehouse.path
+    field_type: String
+    required: true
+  - name: database.name
+    field_type: String
+    required: true
+  - name: table.name
+    field_type: String
+    required: true
 KafkaProperties:
   fields:
   - name: bytes.per.second
diff --git a/src/frontend/src/handler/alter_source_with_sr.rs b/src/frontend/src/handler/alter_source_with_sr.rs
index a8e6892e5a908..06bb2d0387479 100644
--- a/src/frontend/src/handler/alter_source_with_sr.rs
+++ b/src/frontend/src/handler/alter_source_with_sr.rs
@@ -42,6 +42,7 @@ fn format_type_to_format(from: FormatType) -> Option<Format> {
         FormatType::Canal => Format::Canal,
         FormatType::Upsert => Format::Upsert,
         FormatType::Plain => Format::Plain,
+        FormatType::None => Format::None,
     })
 }
 
@@ -55,6 +56,7 @@ fn encode_type_to_encode(from: EncodeType) -> Option<Encode> {
         EncodeType::Json => Encode::Json,
         EncodeType::Bytes => Encode::Bytes,
         EncodeType::Template => Encode::Template,
+        EncodeType::None => Encode::None,
     })
 }
 
diff --git a/src/frontend/src/handler/create_sink.rs b/src/frontend/src/handler/create_sink.rs
index 830253675c1bd..245976bd913b9 100644
--- a/src/frontend/src/handler/create_sink.rs
+++ b/src/frontend/src/handler/create_sink.rs
@@ -701,7 +701,7 @@ fn bind_sink_format_desc(value: ConnectorSchema) -> Result<SinkFormatDesc> {
         F::Plain => SinkFormat::AppendOnly,
         F::Upsert => SinkFormat::Upsert,
         F::Debezium => SinkFormat::Debezium,
-        f @ (F::Native | F::DebeziumMongo | F::Maxwell | F::Canal) => {
+        f @ (F::Native | F::DebeziumMongo | F::Maxwell | F::Canal | F::None) => {
             return Err(ErrorCode::BindError(format!("sink format unsupported: {f}")).into());
         }
     };
@@ -710,7 +710,7 @@ fn bind_sink_format_desc(value: ConnectorSchema) -> Result<SinkFormatDesc> {
         E::Protobuf => SinkEncode::Protobuf,
         E::Avro => SinkEncode::Avro,
         E::Template => SinkEncode::Template,
-        e @ (E::Native | E::Csv | E::Bytes) => {
+        e @ (E::Native | E::Csv | E::Bytes | E::None) => {
             return Err(ErrorCode::BindError(format!("sink encode unsupported: {e}")).into());
         }
     };
diff --git a/src/frontend/src/handler/create_source.rs b/src/frontend/src/handler/create_source.rs
index 0fb4d1cd022f4..bbb2d93b21790 100644
--- a/src/frontend/src/handler/create_source.rs
+++ b/src/frontend/src/handler/create_source.rs
@@ -16,13 +16,13 @@ use std::collections::{BTreeMap, HashMap};
 use std::rc::Rc;
 use std::sync::LazyLock;
 
-use anyhow::Context;
+use anyhow::{anyhow, Context};
 use either::Either;
 use itertools::Itertools;
 use maplit::{convert_args, hashmap};
 use pgwire::pg_response::{PgResponse, StatementType};
 use risingwave_common::catalog::{
-    is_column_ids_dedup, ColumnCatalog, ColumnDesc, TableId, INITIAL_SOURCE_VERSION_ID,
+    is_column_ids_dedup, ColumnCatalog, ColumnDesc, Schema, TableId, INITIAL_SOURCE_VERSION_ID,
     KAFKA_TIMESTAMP_COLUMN_NAME,
 };
 use risingwave_common::types::DataType;
@@ -36,17 +36,20 @@ use risingwave_connector::parser::{
 use risingwave_connector::schema::schema_registry::{
     name_strategy_from_str, SchemaRegistryAuth, SCHEMA_REGISTRY_PASSWORD, SCHEMA_REGISTRY_USERNAME,
 };
+use risingwave_connector::sink::iceberg::IcebergConfig;
 use risingwave_connector::source::cdc::external::CdcTableType;
 use risingwave_connector::source::cdc::{
     CDC_SHARING_MODE_KEY, CDC_SNAPSHOT_BACKFILL, CDC_SNAPSHOT_MODE_KEY, CDC_TRANSACTIONAL_KEY,
     CITUS_CDC_CONNECTOR, MYSQL_CDC_CONNECTOR, POSTGRES_CDC_CONNECTOR,
 };
 use risingwave_connector::source::datagen::DATAGEN_CONNECTOR;
+use risingwave_connector::source::iceberg::ICEBERG_CONNECTOR;
 use risingwave_connector::source::nexmark::source::{get_event_data_types_with_names, EventType};
 use risingwave_connector::source::test_source::TEST_CONNECTOR;
 use risingwave_connector::source::{
-    GCS_CONNECTOR, GOOGLE_PUBSUB_CONNECTOR, KAFKA_CONNECTOR, KINESIS_CONNECTOR, NATS_CONNECTOR,
-    NEXMARK_CONNECTOR, OPENDAL_S3_CONNECTOR, POSIX_FS_CONNECTOR, PULSAR_CONNECTOR, S3_CONNECTOR,
+    ConnectorProperties, GCS_CONNECTOR, GOOGLE_PUBSUB_CONNECTOR, KAFKA_CONNECTOR,
+    KINESIS_CONNECTOR, NATS_CONNECTOR, NEXMARK_CONNECTOR, OPENDAL_S3_CONNECTOR, POSIX_FS_CONNECTOR,
+    PULSAR_CONNECTOR, S3_CONNECTOR,
 };
 use risingwave_pb::catalog::{
     PbSchemaRegistryNameStrategy, PbSource, StreamSourceInfo, WatermarkDesc,
@@ -72,7 +75,7 @@ use crate::handler::create_table::{
     ensure_table_constraints_supported, ColumnIdGenerator,
 };
 use crate::handler::util::{
-    get_connector, is_cdc_connector, is_kafka_connector, SourceSchemaCompatExt,
+    connector_need_pk, get_connector, is_cdc_connector, is_kafka_connector, SourceSchemaCompatExt,
 };
 use crate::handler::HandlerArgs;
 use crate::optimizer::plan_node::generic::SourceNodeKind;
@@ -316,6 +319,7 @@ pub(crate) async fn bind_columns_from_source(
 
     let columns = match (&source_schema.format, &source_schema.row_encode) {
         (Format::Native, Encode::Native)
+        | (Format::None, Encode::None)
         | (Format::Plain, Encode::Bytes)
         | (Format::DebeziumMongo, Encode::Json) => None,
         (Format::Plain, Encode::Protobuf) => {
@@ -706,7 +710,9 @@ pub(crate) async fn bind_source_pk(
         .collect_vec();
 
     let res = match (&source_schema.format, &source_schema.row_encode) {
-        (Format::Native, Encode::Native) | (Format::Plain, _) => sql_defined_pk_names,
+        (Format::Native, Encode::Native) | (Format::None, Encode::None) | (Format::Plain, _) => {
+            sql_defined_pk_names
+        }
 
         // For all Upsert formats, we only accept one and only key column as primary key.
         // Additional KEY columns must be set in this case and must be primary key.
@@ -977,6 +983,9 @@ static CONNECTORS_COMPATIBLE_FORMATS: LazyLock<HashMap<String, HashMap<Format, V
                 ),
                 TEST_CONNECTOR => hashmap!(
                     Format::Plain => vec![Encode::Json],
+                ),
+                ICEBERG_CONNECTOR => hashmap!(
+                    Format::None => vec![Encode::None],
                 )
         ))
     });
@@ -1054,12 +1063,11 @@ pub fn validate_compatibility(
 }
 
 /// Performs early stage checking in frontend to see if the schema of the given `columns` is
-/// compatible with the connector extracted from the properties. Currently this only works for
-/// `nexmark` connector since it's in chunk format.
+/// compatible with the connector extracted from the properties.
 ///
 /// One should only call this function after all properties of all columns are resolved, like
 /// generated column descriptors.
-pub(super) fn check_source_schema(
+pub(super) async fn check_source_schema(
     props: &HashMap<String, String>,
     row_id_index: Option<usize>,
     columns: &[ColumnCatalog],
@@ -1068,10 +1076,22 @@ pub(super) fn check_source_schema(
         return Ok(());
     };
 
-    if connector != NEXMARK_CONNECTOR {
-        return Ok(());
+    if connector == NEXMARK_CONNECTOR {
+        check_nexmark_schema(props, row_id_index, columns)
+    } else if connector == ICEBERG_CONNECTOR {
+        Ok(check_iceberg_source(props, columns)
+            .await
+            .map_err(|err| ProtocolError(err.to_string()))?)
+    } else {
+        Ok(())
     }
+}
 
+pub(super) fn check_nexmark_schema(
+    props: &HashMap<String, String>,
+    row_id_index: Option<usize>,
+    columns: &[ColumnCatalog],
+) -> Result<()> {
     let table_type = props
         .get("nexmark.table.type")
         .map(|t| t.to_ascii_lowercase());
@@ -1121,6 +1141,68 @@ pub(super) fn check_source_schema(
     Ok(())
 }
 
+pub async fn check_iceberg_source(
+    props: &HashMap<String, String>,
+    columns: &[ColumnCatalog],
+) -> anyhow::Result<()> {
+    let props = ConnectorProperties::extract(props.clone(), true)?;
+    let ConnectorProperties::Iceberg(properties) = props else {
+        return Err(anyhow!(format!(
+            "Invalid properties for iceberg source: {:?}",
+            props
+        )));
+    };
+
+    let iceberg_config = IcebergConfig {
+        database_name: properties.database_name,
+        table_name: properties.table_name,
+        catalog_type: Some(properties.catalog_type),
+        path: properties.warehouse_path,
+        endpoint: Some(properties.endpoint),
+        access_key: properties.s3_access,
+        secret_key: properties.s3_secret,
+        region: Some(properties.region_name),
+        ..Default::default()
+    };
+
+    let schema = Schema {
+        fields: columns
+            .iter()
+            .cloned()
+            .map(|c| c.column_desc.into())
+            .collect(),
+    };
+
+    let table = iceberg_config.load_table().await?;
+
+    let iceberg_schema: arrow_schema::Schema = table
+        .current_table_metadata()
+        .current_schema()?
+        .clone()
+        .try_into()?;
+
+    for f1 in schema.fields() {
+        if !iceberg_schema.fields.iter().any(|f2| f2.name() == &f1.name) {
+            return Err(anyhow::anyhow!(format!(
+                "Column {} not found in iceberg table",
+                f1.name
+            )));
+        }
+    }
+
+    let new_iceberg_field = iceberg_schema
+        .fields
+        .iter()
+        .filter(|f1| schema.fields.iter().any(|f2| f1.name() == &f2.name))
+        .cloned()
+        .collect::<Vec<_>>();
+    let new_iceberg_schema = arrow_schema::Schema::new(new_iceberg_field);
+
+    risingwave_connector::sink::iceberg::try_matches_arrow_schema(&schema, &new_iceberg_schema)?;
+
+    Ok(())
+}
+
 pub async fn handle_create_source(
     handler_args: HandlerArgs,
     stmt: CreateSourceStatement,
@@ -1215,8 +1297,8 @@ pub async fn handle_create_source(
         )
         .into());
     }
-
-    let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names)?;
+    let (mut columns, pk_column_ids, row_id_index) =
+        bind_pk_on_relation(columns, pk_names, connector_need_pk(&with_properties))?;
 
     debug_assert!(is_column_ids_dedup(&columns));
 
@@ -1233,7 +1315,7 @@ pub async fn handle_create_source(
         &pk_column_ids,
     )?;
 
-    check_source_schema(&with_properties, row_id_index, &columns)?;
+    check_source_schema(&with_properties, row_id_index, &columns).await?;
 
     let pk_column_ids = pk_column_ids.into_iter().map(Into::into).collect();
 
@@ -1310,6 +1392,7 @@ fn format_to_prost(format: &Format) -> FormatType {
         Format::DebeziumMongo => FormatType::DebeziumMongo,
         Format::Maxwell => FormatType::Maxwell,
         Format::Canal => FormatType::Canal,
+        Format::None => FormatType::None,
     }
 }
 fn row_encode_to_prost(row_encode: &Encode) -> EncodeType {
@@ -1321,6 +1404,7 @@ fn row_encode_to_prost(row_encode: &Encode) -> EncodeType {
         Encode::Csv => EncodeType::Csv,
         Encode::Bytes => EncodeType::Bytes,
         Encode::Template => EncodeType::Template,
+        Encode::None => EncodeType::None,
     }
 }
 
diff --git a/src/frontend/src/handler/create_table.rs b/src/frontend/src/handler/create_table.rs
index 8fc30c2c30e19..7fc757b71b6b7 100644
--- a/src/frontend/src/handler/create_table.rs
+++ b/src/frontend/src/handler/create_table.rs
@@ -61,6 +61,7 @@ use crate::handler::create_source::{
     bind_all_columns, bind_columns_from_source, bind_source_pk, bind_source_watermark,
     check_source_schema, handle_addition_columns, validate_compatibility, UPSTREAM_SOURCE_KEY,
 };
+use crate::handler::util::is_iceberg_connector;
 use crate::handler::HandlerArgs;
 use crate::optimizer::plan_node::generic::SourceNodeKind;
 use crate::optimizer::plan_node::{LogicalCdcScan, LogicalSource};
@@ -411,6 +412,7 @@ fn multiple_pk_definition_err() -> RwError {
 pub fn bind_pk_on_relation(
     mut columns: Vec<ColumnCatalog>,
     pk_names: Vec<String>,
+    must_need_pk: bool,
 ) -> Result<(Vec<ColumnCatalog>, Vec<ColumnId>, Option<usize>)> {
     for c in &columns {
         assert!(c.column_id() != ColumnId::placeholder());
@@ -431,8 +433,10 @@ pub fn bind_pk_on_relation(
         })
         .try_collect()?;
 
-    // Add `_row_id` column if `pk_column_ids` is empty.
-    let row_id_index = pk_column_ids.is_empty().then(|| {
+    // Add `_row_id` column if `pk_column_ids` is empty and must_need_pk
+    let need_row_id = pk_column_ids.is_empty() && must_need_pk;
+
+    let row_id_index = need_row_id.then(|| {
         let column = ColumnCatalog::row_id_column();
         let index = columns.len();
         pk_column_ids = vec![column.column_id()];
@@ -510,7 +514,12 @@ pub(crate) async fn gen_create_table_plan_with_source(
         c.column_desc.column_id = col_id_gen.generate(c.name())
     }
 
-    let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names)?;
+    if is_iceberg_connector(&with_properties) {
+        return Err(
+            ErrorCode::BindError("can't create table with iceberg connector".to_string()).into(),
+        );
+    }
+    let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names, true)?;
 
     let watermark_descs = bind_source_watermark(
         session,
@@ -531,7 +540,7 @@ pub(crate) async fn gen_create_table_plan_with_source(
         &pk_column_ids,
     )?;
 
-    check_source_schema(&with_properties, row_id_index, &columns)?;
+    check_source_schema(&with_properties, row_id_index, &columns).await?;
 
     gen_table_plan_inner(
         context.into(),
@@ -594,7 +603,7 @@ pub(crate) fn gen_create_table_plan_without_bind(
 ) -> Result<(PlanRef, Option<PbSource>, PbTable)> {
     ensure_table_constraints_supported(&constraints)?;
     let pk_names = bind_sql_pk_names(&column_defs, &constraints)?;
-    let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names)?;
+    let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names, true)?;
 
     let watermark_descs = bind_source_watermark(
         context.session_ctx(),
@@ -774,7 +783,7 @@ pub(crate) fn gen_create_table_plan_for_cdc_source(
     }
 
     let pk_names = bind_sql_pk_names(&column_defs, &constraints)?;
-    let (columns, pk_column_ids, _) = bind_pk_on_relation(columns, pk_names)?;
+    let (columns, pk_column_ids, _) = bind_pk_on_relation(columns, pk_names, true)?;
 
     let definition = context.normalized_sql().to_owned();
 
@@ -1275,7 +1284,7 @@ mod tests {
                 }
                 ensure_table_constraints_supported(&constraints)?;
                 let pk_names = bind_sql_pk_names(&column_defs, &constraints)?;
-                let (_, pk_column_ids, _) = bind_pk_on_relation(columns, pk_names)?;
+                let (_, pk_column_ids, _) = bind_pk_on_relation(columns, pk_names, true)?;
                 Ok(pk_column_ids)
             })();
             match (expected, actual) {
diff --git a/src/frontend/src/handler/util.rs b/src/frontend/src/handler/util.rs
index 1e49ee8baf540..ab9d4fe415b33 100644
--- a/src/frontend/src/handler/util.rs
+++ b/src/frontend/src/handler/util.rs
@@ -31,6 +31,7 @@ use risingwave_common::catalog::Field;
 use risingwave_common::row::Row as _;
 use risingwave_common::types::{DataType, ScalarRefImpl, Timestamptz};
 use risingwave_common::util::iter_util::ZipEqFast;
+use risingwave_connector::source::iceberg::ICEBERG_CONNECTOR;
 use risingwave_connector::source::KAFKA_CONNECTOR;
 use risingwave_sqlparser::ast::{CompatibleSourceSchema, ConnectorSchema};
 
@@ -180,6 +181,11 @@ pub fn to_pg_field(f: &Field) -> PgFieldDescriptor {
     )
 }
 
+pub fn connector_need_pk(with_properties: &HashMap<String, String>) -> bool {
+    // Currently only iceberg connector doesn't need primary key
+    !is_iceberg_connector(with_properties)
+}
+
 #[inline(always)]
 pub fn get_connector(with_properties: &HashMap<String, String>) -> Option<String> {
     with_properties
@@ -204,6 +210,14 @@ pub fn is_cdc_connector(with_properties: &HashMap<String, String>) -> bool {
     connector.contains("-cdc")
 }
 
+#[inline(always)]
+pub fn is_iceberg_connector(with_properties: &HashMap<String, String>) -> bool {
+    let Some(connector) = get_connector(with_properties) else {
+        return false;
+    };
+    connector == ICEBERG_CONNECTOR
+}
+
 #[easy_ext::ext(SourceSchemaCompatExt)]
 impl CompatibleSourceSchema {
     /// Convert `self` to [`ConnectorSchema`] and warn the user if the syntax is deprecated.
diff --git a/src/frontend/src/optimizer/plan_node/logical_source.rs b/src/frontend/src/optimizer/plan_node/logical_source.rs
index fa7ad908d01d4..43ec6d2a89de8 100644
--- a/src/frontend/src/optimizer/plan_node/logical_source.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_source.rs
@@ -23,7 +23,8 @@ use risingwave_common::bail_not_implemented;
 use risingwave_common::catalog::{
     ColumnCatalog, ColumnDesc, Field, Schema, KAFKA_TIMESTAMP_COLUMN_NAME,
 };
-use risingwave_connector::source::DataType;
+use risingwave_connector::source::iceberg::ICEBERG_CONNECTOR;
+use risingwave_connector::source::{DataType, UPSTREAM_SOURCE_KEY};
 use risingwave_pb::plan_common::column_desc::GeneratedOrDefaultColumn;
 use risingwave_pb::plan_common::GeneratedColumnDesc;
 
@@ -546,6 +547,18 @@ impl ToStream for LogicalSource {
                 }
             }
         }
+        if let Some(source) = &self.core.catalog {
+            let connector = &source
+                .with_properties
+                .get(UPSTREAM_SOURCE_KEY)
+                .map(|s| s.to_lowercase())
+                .unwrap();
+            if ICEBERG_CONNECTOR == connector {
+                return Err(
+                    anyhow::anyhow!("Iceberg source is not supported in stream queries").into(),
+                );
+            }
+        }
         Ok(plan)
     }
 
diff --git a/src/sqlparser/src/ast/statement.rs b/src/sqlparser/src/ast/statement.rs
index 3dd923b610542..e876a197c265d 100644
--- a/src/sqlparser/src/ast/statement.rs
+++ b/src/sqlparser/src/ast/statement.rs
@@ -94,6 +94,7 @@ pub struct CreateSourceStatement {
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 pub enum Format {
     Native,
+    None,          // Keyword::NONE
     Debezium,      // Keyword::DEBEZIUM
     DebeziumMongo, // Keyword::DEBEZIUM_MONGO
     Maxwell,       // Keyword::MAXWELL
@@ -116,6 +117,7 @@ impl fmt::Display for Format {
                 Format::Canal => "CANAL",
                 Format::Upsert => "UPSERT",
                 Format::Plain => "PLAIN",
+                Format::None => "NONE",
             }
         )
     }
@@ -149,6 +151,7 @@ pub enum Encode {
     Protobuf, // Keyword::PROTOBUF
     Json,     // Keyword::JSON
     Bytes,    // Keyword::BYTES
+    None,     // Keyword::None
     Native,
     Template,
 }
@@ -167,6 +170,7 @@ impl fmt::Display for Encode {
                 Encode::Bytes => "BYTES",
                 Encode::Native => "NATIVE",
                 Encode::Template => "TEMPLATE",
+                Encode::None => "NONE",
             }
         )
     }
@@ -249,6 +253,18 @@ impl Parser {
             } else {
                 ConnectorSchema::native().into()
             })
+        } else if connector.contains("iceberg") {
+            let expected = ConnectorSchema::none();
+            if self.peek_source_schema_format() {
+                let schema = parse_source_schema(self)?.into_v2();
+                if schema != expected {
+                    return Err(ParserError::ParserError(format!(
+                        "Row format for iceberg connectors should be \
+                         either omitted or set to `{expected}`",
+                    )));
+                }
+            }
+            Ok(expected.into())
         } else {
             Ok(parse_source_schema(self)?)
         }
@@ -304,6 +320,16 @@ impl ConnectorSchema {
         }
     }
 
+    /// Create a new source schema with `None` format and encoding.
+    /// Used for self-explanatory source like iceberg.
+    pub const fn none() -> Self {
+        ConnectorSchema {
+            format: Format::None,
+            row_encode: Encode::None,
+            row_options: Vec::new(),
+        }
+    }
+
     pub fn row_options(&self) -> &[SqlOption] {
         self.row_options.as_ref()
     }

From c6ed6d14aff2644c341868757d6fcf8abb41b64e Mon Sep 17 00:00:00 2001
From: Shanicky Chen <peng@risingwave-labs.com>
Date: Fri, 23 Feb 2024 12:58:20 +0800
Subject: [PATCH 18/24] feat: try to reduce memory usage in scaling (#15193)

Signed-off-by: Shanicky Chen <peng@risingwave-labs.com>
---
 src/meta/src/stream/scale.rs      | 220 +++++++++++++++++++++++++-----
 src/meta/src/stream/test_scale.rs |   8 +-
 2 files changed, 188 insertions(+), 40 deletions(-)

diff --git a/src/meta/src/stream/scale.rs b/src/meta/src/stream/scale.rs
index 7f40f8e3da033..0e571a0afebf7 100644
--- a/src/meta/src/stream/scale.rs
+++ b/src/meta/src/stream/scale.rs
@@ -31,15 +31,19 @@ use risingwave_common::catalog::TableId;
 use risingwave_common::hash::{ActorMapping, ParallelUnitId, VirtualNode};
 use risingwave_common::util::iter_util::ZipEqDebug;
 use risingwave_meta_model_v2::StreamingParallelism;
-use risingwave_pb::common::{ActorInfo, ParallelUnit, WorkerNode};
+use risingwave_pb::common::{ActorInfo, Buffer, ParallelUnit, ParallelUnitMapping, WorkerNode};
 use risingwave_pb::meta::get_reschedule_plan_request::{Policy, StableResizePolicy};
 use risingwave_pb::meta::subscribe_response::{Info, Operation};
 use risingwave_pb::meta::table_fragments::actor_status::ActorState;
-use risingwave_pb::meta::table_fragments::fragment::FragmentDistributionType;
-use risingwave_pb::meta::table_fragments::{self, ActorStatus, Fragment, State};
+use risingwave_pb::meta::table_fragments::fragment::{
+    FragmentDistributionType, PbFragmentDistributionType,
+};
+use risingwave_pb::meta::table_fragments::{self, ActorStatus, PbFragment, State};
 use risingwave_pb::meta::FragmentParallelUnitMappings;
 use risingwave_pb::stream_plan::stream_node::NodeBody;
-use risingwave_pb::stream_plan::{DispatcherType, FragmentTypeFlag, StreamActor, StreamNode};
+use risingwave_pb::stream_plan::{
+    Dispatcher, DispatcherType, FragmentTypeFlag, PbStreamActor, StreamNode,
+};
 use thiserror_ext::AsReport;
 use tokio::sync::oneshot::Receiver;
 use tokio::sync::{oneshot, RwLock, RwLockReadGuard, RwLockWriteGuard};
@@ -105,15 +109,85 @@ pub struct ParallelUnitReschedule {
     pub removed_parallel_units: BTreeSet<ParallelUnitId>,
 }
 
+pub struct CustomFragmentInfo {
+    pub fragment_id: u32,
+    pub fragment_type_mask: u32,
+    pub distribution_type: PbFragmentDistributionType,
+    pub vnode_mapping: Option<ParallelUnitMapping>,
+    pub state_table_ids: Vec<u32>,
+    pub upstream_fragment_ids: Vec<u32>,
+    pub actor_template: PbStreamActor,
+    pub actors: Vec<CustomActorInfo>,
+}
+
+#[derive(Default)]
+pub struct CustomActorInfo {
+    pub actor_id: u32,
+    pub fragment_id: u32,
+    pub dispatcher: Vec<Dispatcher>,
+    pub upstream_actor_id: Vec<u32>,
+    pub vnode_bitmap: Option<Buffer>,
+}
+
+impl From<&PbStreamActor> for CustomActorInfo {
+    fn from(
+        PbStreamActor {
+            actor_id,
+            fragment_id,
+            dispatcher,
+            upstream_actor_id,
+            vnode_bitmap,
+            ..
+        }: &PbStreamActor,
+    ) -> Self {
+        CustomActorInfo {
+            actor_id: *actor_id,
+            fragment_id: *fragment_id,
+            dispatcher: dispatcher.clone(),
+            upstream_actor_id: upstream_actor_id.clone(),
+            vnode_bitmap: vnode_bitmap.clone(),
+        }
+    }
+}
+
+impl From<&PbFragment> for CustomFragmentInfo {
+    fn from(fragment: &PbFragment) -> Self {
+        CustomFragmentInfo {
+            fragment_id: fragment.fragment_id,
+            fragment_type_mask: fragment.fragment_type_mask,
+            distribution_type: fragment.distribution_type(),
+            vnode_mapping: fragment.vnode_mapping.clone(),
+            state_table_ids: fragment.state_table_ids.clone(),
+            upstream_fragment_ids: fragment.upstream_fragment_ids.clone(),
+            actor_template: fragment
+                .actors
+                .first()
+                .cloned()
+                .expect("no actor in fragment"),
+            actors: fragment.actors.iter().map(CustomActorInfo::from).collect(),
+        }
+    }
+}
+
+impl CustomFragmentInfo {
+    pub fn get_fragment_type_mask(&self) -> u32 {
+        self.fragment_type_mask
+    }
+
+    pub fn distribution_type(&self) -> FragmentDistributionType {
+        self.distribution_type
+    }
+}
+
 pub struct RescheduleContext {
     /// Index used to map `ParallelUnitId` to `WorkerId`
     parallel_unit_id_to_worker_id: BTreeMap<ParallelUnitId, WorkerId>,
     /// Meta information for all Actors
-    actor_map: HashMap<ActorId, StreamActor>,
+    actor_map: HashMap<ActorId, CustomActorInfo>,
     /// Status of all Actors, used to find the location of the `Actor`
     actor_status: BTreeMap<ActorId, ActorStatus>,
     /// Meta information of all `Fragment`, used to find the `Fragment`'s `Actor`
-    fragment_map: HashMap<FragmentId, Fragment>,
+    fragment_map: HashMap<FragmentId, CustomFragmentInfo>,
     /// Indexes for all `Worker`s
     worker_nodes: HashMap<WorkerId, WorkerNode>,
     /// Index of all `Actor` upstreams, specific to `Dispatcher`
@@ -180,7 +254,7 @@ impl RescheduleContext {
 ///
 /// The return value is the bitmap distribution after scaling, which covers all virtual node indexes
 pub fn rebalance_actor_vnode(
-    actors: &[StreamActor],
+    actors: &[CustomActorInfo],
     actors_to_remove: &BTreeSet<ActorId>,
     actors_to_create: &BTreeSet<ActorId>,
 ) -> HashMap<ActorId, Bitmap> {
@@ -464,16 +538,29 @@ impl ScaleController {
         let mut fragment_state = HashMap::new();
         let mut fragment_to_table = HashMap::new();
 
-        let all_table_fragments = self.list_all_table_fragments().await?;
-
-        for table_fragments in all_table_fragments {
+        // We are reusing code for the metadata manager of both V1 and V2, which will be deprecated in the future.
+        fn fulfill_index_by_table_fragments_ref(
+            actor_map: &mut HashMap<u32, CustomActorInfo>,
+            fragment_map: &mut HashMap<FragmentId, CustomFragmentInfo>,
+            actor_status: &mut BTreeMap<ActorId, ActorStatus>,
+            fragment_state: &mut HashMap<FragmentId, State>,
+            fragment_to_table: &mut HashMap<FragmentId, TableId>,
+            table_fragments: &TableFragments,
+        ) {
             fragment_state.extend(
                 table_fragments
                     .fragment_ids()
                     .map(|f| (f, table_fragments.state())),
             );
-            fragment_map.extend(table_fragments.fragments.clone());
-            actor_map.extend(table_fragments.actor_map());
+
+            for (fragment_id, fragment) in &table_fragments.fragments {
+                for actor in &fragment.actors {
+                    actor_map.insert(actor.actor_id, CustomActorInfo::from(actor));
+                }
+
+                fragment_map.insert(*fragment_id, CustomFragmentInfo::from(fragment));
+            }
+
             actor_status.extend(table_fragments.actor_status.clone());
 
             fragment_to_table.extend(
@@ -483,6 +570,37 @@ impl ScaleController {
             );
         }
 
+        match &self.metadata_manager {
+            MetadataManager::V1(mgr) => {
+                let guard = mgr.fragment_manager.get_fragment_read_guard().await;
+
+                for table_fragments in guard.table_fragments().values() {
+                    fulfill_index_by_table_fragments_ref(
+                        &mut actor_map,
+                        &mut fragment_map,
+                        &mut actor_status,
+                        &mut fragment_state,
+                        &mut fragment_to_table,
+                        table_fragments,
+                    );
+                }
+            }
+            MetadataManager::V2(_) => {
+                let all_table_fragments = self.list_all_table_fragments().await?;
+
+                for table_fragments in &all_table_fragments {
+                    fulfill_index_by_table_fragments_ref(
+                        &mut actor_map,
+                        &mut fragment_map,
+                        &mut actor_status,
+                        &mut fragment_state,
+                        &mut fragment_to_table,
+                        table_fragments,
+                    );
+                }
+            }
+        };
+
         // NoShuffle relation index
         let mut no_shuffle_source_fragment_ids = HashSet::new();
         let mut no_shuffle_target_fragment_ids = HashSet::new();
@@ -608,7 +726,7 @@ impl ScaleController {
             }
 
             if (fragment.get_fragment_type_mask() & FragmentTypeFlag::Source as u32) != 0 {
-                let stream_node = fragment.actors.first().unwrap().get_nodes().unwrap();
+                let stream_node = fragment.actor_template.nodes.as_ref().unwrap();
                 if TableFragments::find_stream_source(stream_node).is_some() {
                     stream_source_fragment_ids.insert(*fragment_id);
                 }
@@ -698,7 +816,7 @@ impl ScaleController {
         &self,
         worker_nodes: &HashMap<WorkerId, WorkerNode>,
         actor_infos_to_broadcast: BTreeMap<ActorId, ActorInfo>,
-        node_actors_to_create: HashMap<WorkerId, Vec<StreamActor>>,
+        node_actors_to_create: HashMap<WorkerId, Vec<PbStreamActor>>,
         broadcast_worker_ids: HashSet<WorkerId>,
     ) -> MetaResult<()> {
         self.stream_rpc_manager
@@ -963,7 +1081,7 @@ impl ScaleController {
 
             for (actor_to_create, sample_actor) in actors_to_create
                 .iter()
-                .zip_eq_debug(repeat(fragment.actors.first().unwrap()).take(actors_to_create.len()))
+                .zip_eq_debug(repeat(&fragment.actor_template).take(actors_to_create.len()))
             {
                 let new_actor_id = actor_to_create.0;
                 let mut new_actor = sample_actor.clone();
@@ -1407,7 +1525,7 @@ impl ScaleController {
         fragment_actor_bitmap: &HashMap<FragmentId, HashMap<ActorId, Bitmap>>,
         no_shuffle_upstream_actor_map: &HashMap<ActorId, HashMap<FragmentId, ActorId>>,
         no_shuffle_downstream_actors_map: &HashMap<ActorId, HashMap<FragmentId, ActorId>>,
-        new_actor: &mut StreamActor,
+        new_actor: &mut PbStreamActor,
     ) -> MetaResult<()> {
         let fragment = &ctx.fragment_map.get(&new_actor.fragment_id).unwrap();
         let mut applied_upstream_fragment_actor_ids = HashMap::new();
@@ -1953,8 +2071,6 @@ impl ScaleController {
             })
             .collect::<HashMap<_, _>>();
 
-        let all_table_fragments = self.list_all_table_fragments().await?;
-
         // FIXME: only need actor id and dispatcher info, avoid clone it.
         let mut actor_map = HashMap::new();
         let mut actor_status = HashMap::new();
@@ -1962,24 +2078,56 @@ impl ScaleController {
         let mut fragment_map = HashMap::new();
         let mut fragment_parallelism = HashMap::new();
 
-        for table_fragments in all_table_fragments {
-            for (fragment_id, fragment) in table_fragments.fragments {
-                fragment
-                    .actors
-                    .iter()
-                    .map(|actor| (actor.actor_id, actor))
-                    .for_each(|(id, actor)| {
-                        actor_map.insert(id as ActorId, actor.clone());
-                    });
+        // We are reusing code for the metadata manager of both V1 and V2, which will be deprecated in the future.
+        fn fulfill_index_by_table_fragments_ref(
+            actor_map: &mut HashMap<u32, CustomActorInfo>,
+            actor_status: &mut HashMap<ActorId, ActorStatus>,
+            fragment_map: &mut HashMap<FragmentId, CustomFragmentInfo>,
+            fragment_parallelism: &mut HashMap<FragmentId, TableParallelism>,
+            table_fragments: &TableFragments,
+        ) {
+            for (fragment_id, fragment) in &table_fragments.fragments {
+                for actor in &fragment.actors {
+                    actor_map.insert(actor.actor_id, CustomActorInfo::from(actor));
+                }
 
-                fragment_map.insert(fragment_id, fragment);
+                fragment_map.insert(*fragment_id, CustomFragmentInfo::from(fragment));
 
-                fragment_parallelism.insert(fragment_id, table_fragments.assigned_parallelism);
+                fragment_parallelism.insert(*fragment_id, table_fragments.assigned_parallelism);
             }
 
-            actor_status.extend(table_fragments.actor_status);
+            actor_status.extend(table_fragments.actor_status.clone());
         }
 
+        match &self.metadata_manager {
+            MetadataManager::V1(mgr) => {
+                let guard = mgr.fragment_manager.get_fragment_read_guard().await;
+
+                for table_fragments in guard.table_fragments().values() {
+                    fulfill_index_by_table_fragments_ref(
+                        &mut actor_map,
+                        &mut actor_status,
+                        &mut fragment_map,
+                        &mut fragment_parallelism,
+                        table_fragments,
+                    );
+                }
+            }
+            MetadataManager::V2(_) => {
+                let all_table_fragments = self.list_all_table_fragments().await?;
+
+                for table_fragments in &all_table_fragments {
+                    fulfill_index_by_table_fragments_ref(
+                        &mut actor_map,
+                        &mut actor_status,
+                        &mut fragment_map,
+                        &mut fragment_parallelism,
+                        table_fragments,
+                    );
+                }
+            }
+        };
+
         let mut no_shuffle_source_fragment_ids = HashSet::new();
         let mut no_shuffle_target_fragment_ids = HashSet::new();
 
@@ -2034,7 +2182,7 @@ impl ScaleController {
             },
         ) in fragment_worker_changes
         {
-            let fragment = match fragment_map.get(&fragment_id).cloned() {
+            let fragment = match fragment_map.get(&fragment_id) {
                 None => bail!("Fragment id {} not found", fragment_id),
                 Some(fragment) => fragment,
             };
@@ -2122,7 +2270,7 @@ impl ScaleController {
                 // then we re-add the limited parallel units from the limited workers
                 target_parallel_unit_ids.extend(limited_worker_parallel_unit_ids.into_iter());
             }
-            match fragment.get_distribution_type().unwrap() {
+            match fragment.distribution_type() {
                 FragmentDistributionType::Unspecified => unreachable!(),
                 FragmentDistributionType::Single => {
                     let single_parallel_unit_id =
@@ -2274,7 +2422,7 @@ impl ScaleController {
     }
 
     pub fn build_no_shuffle_relation_index(
-        actor_map: &HashMap<ActorId, StreamActor>,
+        actor_map: &HashMap<ActorId, CustomActorInfo>,
         no_shuffle_source_fragment_ids: &mut HashSet<FragmentId>,
         no_shuffle_target_fragment_ids: &mut HashSet<FragmentId>,
     ) {
@@ -2302,7 +2450,7 @@ impl ScaleController {
     }
 
     pub fn build_fragment_dispatcher_index(
-        actor_map: &HashMap<ActorId, StreamActor>,
+        actor_map: &HashMap<ActorId, CustomActorInfo>,
         fragment_dispatcher_map: &mut HashMap<FragmentId, HashMap<FragmentId, DispatcherType>>,
     ) {
         for actor in actor_map.values() {
@@ -2324,7 +2472,7 @@ impl ScaleController {
 
     pub fn resolve_no_shuffle_upstream_tables(
         fragment_ids: HashSet<FragmentId>,
-        fragment_map: &HashMap<FragmentId, Fragment>,
+        fragment_map: &HashMap<FragmentId, CustomFragmentInfo>,
         no_shuffle_source_fragment_ids: &HashSet<FragmentId>,
         no_shuffle_target_fragment_ids: &HashSet<FragmentId>,
         fragment_to_table: &HashMap<FragmentId, TableId>,
@@ -2394,7 +2542,7 @@ impl ScaleController {
 
     pub fn resolve_no_shuffle_upstream_fragments<T>(
         reschedule: &mut HashMap<FragmentId, T>,
-        fragment_map: &HashMap<FragmentId, Fragment>,
+        fragment_map: &HashMap<FragmentId, CustomFragmentInfo>,
         no_shuffle_source_fragment_ids: &HashSet<FragmentId>,
         no_shuffle_target_fragment_ids: &HashSet<FragmentId>,
     ) -> MetaResult<()>
diff --git a/src/meta/src/stream/test_scale.rs b/src/meta/src/stream/test_scale.rs
index 2db55dbddbd4d..73d59ff52f2f4 100644
--- a/src/meta/src/stream/test_scale.rs
+++ b/src/meta/src/stream/test_scale.rs
@@ -21,10 +21,10 @@ mod tests {
     use risingwave_common::buffer::Bitmap;
     use risingwave_common::hash::{ActorMapping, ParallelUnitId, ParallelUnitMapping, VirtualNode};
     use risingwave_pb::common::ParallelUnit;
-    use risingwave_pb::stream_plan::StreamActor;
 
     use crate::model::ActorId;
     use crate::stream::scale::rebalance_actor_vnode;
+    use crate::stream::CustomActorInfo;
 
     fn simulated_parallel_unit_nums(min: Option<usize>, max: Option<usize>) -> Vec<usize> {
         let mut raw = vec![1, 3, 12, 42, VirtualNode::COUNT];
@@ -39,13 +39,13 @@ mod tests {
         raw
     }
 
-    fn build_fake_actors(info: &[(ActorId, ParallelUnitId)]) -> Vec<StreamActor> {
+    fn build_fake_actors(info: &[(ActorId, ParallelUnitId)]) -> Vec<CustomActorInfo> {
         let parallel_units = generate_parallel_units(info);
 
         let vnode_bitmaps = ParallelUnitMapping::build(&parallel_units).to_bitmaps();
 
         info.iter()
-            .map(|(actor_id, parallel_unit_id)| StreamActor {
+            .map(|(actor_id, parallel_unit_id)| CustomActorInfo {
                 actor_id: *actor_id,
                 vnode_bitmap: vnode_bitmaps
                     .get(parallel_unit_id)
@@ -64,7 +64,7 @@ mod tests {
             .collect_vec()
     }
 
-    fn check_affinity_for_scale_in(bitmap: &Bitmap, actor: &StreamActor) {
+    fn check_affinity_for_scale_in(bitmap: &Bitmap, actor: &CustomActorInfo) {
         let prev_bitmap = Bitmap::from(actor.vnode_bitmap.as_ref().unwrap());
 
         for idx in 0..VirtualNode::COUNT {

From ea0b01220efe97fa003bcfe05cb63a645aaf5e39 Mon Sep 17 00:00:00 2001
From: Bugen Zhao <i@bugenzhao.com>
Date: Fri, 23 Feb 2024 13:30:32 +0800
Subject: [PATCH 19/24] chore: set `buf breaking` rule back to `WIRE_JSON`
 (#15147)

---
 proto/buf.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/proto/buf.yaml b/proto/buf.yaml
index 1aa31816ce0af..abad30f04506c 100644
--- a/proto/buf.yaml
+++ b/proto/buf.yaml
@@ -1,7 +1,8 @@
 version: v1
 breaking:
   use:
-    - WIRE # https://docs.buf.build/breaking/rules
+    - WIRE_JSON # https://docs.buf.build/breaking/rules
+                # https://github.com/risingwavelabs/risingwave/issues/15030
 lint:
   use:
     - DEFAULT

From b95d9a9098d086eaea781c7d7926aa1bdaa91bee Mon Sep 17 00:00:00 2001
From: Bugen Zhao <i@bugenzhao.com>
Date: Fri, 23 Feb 2024 13:30:45 +0800
Subject: [PATCH 20/24] fix(frontend): require primary key for system table
 (#15126)

Signed-off-by: Bugen Zhao <i@bugenzhao.com>
---
 e2e_test/batch/catalog/pg_settings.slt.part   |  8 +++
 .../fields-derive/src/gen/test_empty_pk.rs    | 29 +++++++++
 .../fields-derive/src/gen/test_no_pk.rs       | 29 +++++++++
 .../fields-derive/src/gen/test_output.rs      |  4 +-
 src/common/fields-derive/src/lib.rs           | 61 ++++++++++++++-----
 src/common/src/types/fields.rs                | 11 ++--
 src/frontend/macro/src/lib.rs                 |  6 +-
 .../system_catalog/pg_catalog/pg_cast.rs      |  1 +
 .../system_catalog/pg_catalog/pg_settings.rs  |  1 +
 .../rw_catalog/rw_hummock_branched_objects.rs |  1 +
 .../rw_catalog/rw_hummock_pinned_snapshots.rs |  1 +
 .../rw_catalog/rw_hummock_pinned_versions.rs  |  1 +
 .../rw_catalog/rw_hummock_version.rs          |  1 +
 .../rw_catalog/rw_meta_snapshot.rs            |  1 +
 14 files changed, 132 insertions(+), 23 deletions(-)
 create mode 100644 src/common/fields-derive/src/gen/test_empty_pk.rs
 create mode 100644 src/common/fields-derive/src/gen/test_no_pk.rs

diff --git a/e2e_test/batch/catalog/pg_settings.slt.part b/e2e_test/batch/catalog/pg_settings.slt.part
index 5f37db11fcb91..c8e927ba72b9f 100644
--- a/e2e_test/batch/catalog/pg_settings.slt.part
+++ b/e2e_test/batch/catalog/pg_settings.slt.part
@@ -63,6 +63,14 @@ query TT
 SELECT * FROM pg_catalog.pg_settings where name='dummy';
 ----
 
+# https://github.com/risingwavelabs/risingwave/issues/15125
+query TT
+SELECT min(name) name, context FROM pg_catalog.pg_settings GROUP BY context;
+----
+application_name            user
+backup_storage_directory    postmaster
+block_size_kb               internal
+
 # Tab-completion of `SET` command
 query T
 SELECT name
diff --git a/src/common/fields-derive/src/gen/test_empty_pk.rs b/src/common/fields-derive/src/gen/test_empty_pk.rs
new file mode 100644
index 0000000000000..ffb5ff268bed1
--- /dev/null
+++ b/src/common/fields-derive/src/gen/test_empty_pk.rs
@@ -0,0 +1,29 @@
+impl ::risingwave_common::types::Fields for Data {
+    const PRIMARY_KEY: Option<&'static [usize]> = Some(&[]);
+    fn fields() -> Vec<(&'static str, ::risingwave_common::types::DataType)> {
+        vec![
+            ("v1", < i16 as ::risingwave_common::types::WithDataType >
+            ::default_data_type()), ("v2", < String as
+            ::risingwave_common::types::WithDataType > ::default_data_type())
+        ]
+    }
+    fn into_owned_row(self) -> ::risingwave_common::row::OwnedRow {
+        ::risingwave_common::row::OwnedRow::new(
+            vec![
+                ::risingwave_common::types::ToOwnedDatum::to_owned_datum(self.v1),
+                ::risingwave_common::types::ToOwnedDatum::to_owned_datum(self.v2)
+            ],
+        )
+    }
+}
+impl From<Data> for ::risingwave_common::types::ScalarImpl {
+    fn from(v: Data) -> Self {
+        ::risingwave_common::types::StructValue::new(
+                vec![
+                    ::risingwave_common::types::ToOwnedDatum::to_owned_datum(v.v1),
+                    ::risingwave_common::types::ToOwnedDatum::to_owned_datum(v.v2)
+                ],
+            )
+            .into()
+    }
+}
diff --git a/src/common/fields-derive/src/gen/test_no_pk.rs b/src/common/fields-derive/src/gen/test_no_pk.rs
new file mode 100644
index 0000000000000..9e1b3e7892969
--- /dev/null
+++ b/src/common/fields-derive/src/gen/test_no_pk.rs
@@ -0,0 +1,29 @@
+impl ::risingwave_common::types::Fields for Data {
+    const PRIMARY_KEY: Option<&'static [usize]> = None;
+    fn fields() -> Vec<(&'static str, ::risingwave_common::types::DataType)> {
+        vec![
+            ("v1", < i16 as ::risingwave_common::types::WithDataType >
+            ::default_data_type()), ("v2", < String as
+            ::risingwave_common::types::WithDataType > ::default_data_type())
+        ]
+    }
+    fn into_owned_row(self) -> ::risingwave_common::row::OwnedRow {
+        ::risingwave_common::row::OwnedRow::new(
+            vec![
+                ::risingwave_common::types::ToOwnedDatum::to_owned_datum(self.v1),
+                ::risingwave_common::types::ToOwnedDatum::to_owned_datum(self.v2)
+            ],
+        )
+    }
+}
+impl From<Data> for ::risingwave_common::types::ScalarImpl {
+    fn from(v: Data) -> Self {
+        ::risingwave_common::types::StructValue::new(
+                vec![
+                    ::risingwave_common::types::ToOwnedDatum::to_owned_datum(v.v1),
+                    ::risingwave_common::types::ToOwnedDatum::to_owned_datum(v.v2)
+                ],
+            )
+            .into()
+    }
+}
diff --git a/src/common/fields-derive/src/gen/test_output.rs b/src/common/fields-derive/src/gen/test_output.rs
index 517dcdefc7a8c..a804a379bfd4a 100644
--- a/src/common/fields-derive/src/gen/test_output.rs
+++ b/src/common/fields-derive/src/gen/test_output.rs
@@ -1,4 +1,5 @@
 impl ::risingwave_common::types::Fields for Data {
+    const PRIMARY_KEY: Option<&'static [usize]> = Some(&[1usize, 0usize]);
     fn fields() -> Vec<(&'static str, ::risingwave_common::types::DataType)> {
         vec![
             ("v1", < i16 as ::risingwave_common::types::WithDataType >
@@ -21,9 +22,6 @@ impl ::risingwave_common::types::Fields for Data {
             ],
         )
     }
-    fn primary_key() -> &'static [usize] {
-        &[1usize, 0usize]
-    }
 }
 impl From<Data> for ::risingwave_common::types::ScalarImpl {
     fn from(v: Data) -> Self {
diff --git a/src/common/fields-derive/src/lib.rs b/src/common/fields-derive/src/lib.rs
index b38f579751683..dae648d1dc343 100644
--- a/src/common/fields-derive/src/lib.rs
+++ b/src/common/fields-derive/src/lib.rs
@@ -82,16 +82,17 @@ fn gen(tokens: TokenStream) -> Result<TokenStream> {
         .iter()
         .map(|field| field.ident.as_ref().expect("field no name"))
         .collect::<Vec<_>>();
-    let primary_key = get_primary_key(&input).map(|indices| {
-        quote! {
-            fn primary_key() -> &'static [usize] {
-                &[#(#indices),*]
-            }
-        }
-    });
+    let primary_key = get_primary_key(&input).map_or_else(
+        || quote! { None },
+        |indices| {
+            quote! { Some(&[#(#indices),*]) }
+        },
+    );
 
     Ok(quote! {
         impl ::risingwave_common::types::Fields for #ident {
+            const PRIMARY_KEY: Option<&'static [usize]> = #primary_key;
+
             fn fields() -> Vec<(&'static str, ::risingwave_common::types::DataType)> {
                 vec![#(#fields_rw),*]
             }
@@ -100,7 +101,6 @@ fn gen(tokens: TokenStream) -> Result<TokenStream> {
                     ::risingwave_common::types::ToOwnedDatum::to_owned_datum(self.#names)
                 ),*])
             }
-            #primary_key
         }
         impl From<#ident> for ::risingwave_common::types::ScalarImpl {
             fn from(v: #ident) -> Self {
@@ -133,7 +133,9 @@ fn get_primary_key(input: &syn::DeriveInput) -> Option<Vec<usize>> {
         return Some(
             keys.to_string()
                 .split(',')
-                .map(|s| index(s.trim()))
+                .map(|s| s.trim())
+                .filter(|s| !s.is_empty())
+                .map(index)
                 .collect(),
         );
     }
@@ -199,6 +201,18 @@ mod tests {
         prettyplease::unparse(&output)
     }
 
+    fn do_test(code: &str, expected_path: &str) {
+        let input: TokenStream = str::parse(code).unwrap();
+
+        let output = super::gen(input).unwrap();
+
+        let output = pretty_print(output);
+
+        let expected = expect_test::expect_file![expected_path];
+
+        expected.assert_eq(&output);
+    }
+
     #[test]
     fn test_gen() {
         let code = indoc! {r#"
@@ -213,14 +227,33 @@ mod tests {
             }
         "#};
 
-        let input: TokenStream = str::parse(code).unwrap();
+        do_test(code, "gen/test_output.rs");
+    }
 
-        let output = super::gen(input).unwrap();
+    #[test]
+    fn test_no_pk() {
+        let code = indoc! {r#"
+            #[derive(Fields)]
+            struct Data {
+                v1: i16,
+                v2: String,
+            }
+        "#};
 
-        let output = pretty_print(output);
+        do_test(code, "gen/test_no_pk.rs");
+    }
 
-        let expected = expect_test::expect_file!["gen/test_output.rs"];
+    #[test]
+    fn test_empty_pk() {
+        let code = indoc! {r#"
+            #[derive(Fields)]
+            #[primary_key()]
+            struct Data {
+                v1: i16,
+                v2: String,
+            }
+        "#};
 
-        expected.assert_eq(&output);
+        do_test(code, "gen/test_empty_pk.rs");
     }
 }
diff --git a/src/common/src/types/fields.rs b/src/common/src/types/fields.rs
index f52717297792e..df1795804af00 100644
--- a/src/common/src/types/fields.rs
+++ b/src/common/src/types/fields.rs
@@ -58,17 +58,18 @@ use crate::util::chunk_coalesce::DataChunkBuilder;
 /// }
 /// ```
 pub trait Fields {
+    /// The primary key of the table.
+    ///
+    /// - `None` if the primary key is not applicable.
+    /// - `Some(&[])` if the primary key is empty, i.e., there'll be at most one row in the table.
+    const PRIMARY_KEY: Option<&'static [usize]>;
+
     /// Return the schema of the struct.
     fn fields() -> Vec<(&'static str, DataType)>;
 
     /// Convert the struct to an `OwnedRow`.
     fn into_owned_row(self) -> OwnedRow;
 
-    /// The primary key of the table.
-    fn primary_key() -> &'static [usize] {
-        &[]
-    }
-
     /// Create a [`DataChunkBuilder`](crate::util::chunk_coalesce::DataChunkBuilder) with the schema of the struct.
     fn data_chunk_builder(capacity: usize) -> DataChunkBuilder {
         DataChunkBuilder::new(
diff --git a/src/frontend/macro/src/lib.rs b/src/frontend/macro/src/lib.rs
index 8ba10a9f4454a..36b7f33eb99c0 100644
--- a/src/frontend/macro/src/lib.rs
+++ b/src/frontend/macro/src/lib.rs
@@ -117,11 +117,15 @@ fn gen_sys_table(attr: Attr, item_fn: ItemFn) -> Result<TokenStream2> {
         #[linkme::distributed_slice(crate::catalog::system_catalog::SYS_CATALOGS_SLICE)]
         #[no_mangle]    // to prevent duplicate schema.table name
         fn #gen_fn_name() -> crate::catalog::system_catalog::BuiltinCatalog {
+            const _: () = {
+                assert!(#struct_type::PRIMARY_KEY.is_some(), "primary key is required for system table");
+            };
+
             crate::catalog::system_catalog::BuiltinCatalog::Table(crate::catalog::system_catalog::BuiltinTable {
                 name: #table_name,
                 schema: #schema_name,
                 columns: #struct_type::fields(),
-                pk: #struct_type::primary_key(),
+                pk: #struct_type::PRIMARY_KEY.unwrap(),
                 function: |reader| std::boxed::Box::pin(async {
                     let rows = #user_fn_name(reader) #_await #handle_error;
                     let mut builder = #struct_type::data_chunk_builder(rows.len() + 1);
diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs
index c13e87f162afe..11bcabcde0f69 100644
--- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs
+++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs
@@ -22,6 +22,7 @@ use crate::expr::cast_map_array;
 /// Ref: [`https://www.postgresql.org/docs/current/catalog-pg-cast.html`]
 #[derive(Fields)]
 struct PgCast {
+    #[primary_key]
     oid: i32,
     castsource: i32,
     casttarget: i32,
diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_settings.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_settings.rs
index 0f079ca3f6452..58d44b1aef92b 100644
--- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_settings.rs
+++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_settings.rs
@@ -21,6 +21,7 @@ use crate::catalog::system_catalog::SysCatalogReaderImpl;
 /// The catalog `pg_settings` stores settings.
 /// Ref: [`https://www.postgresql.org/docs/current/view-pg-settings.html`]
 #[derive(Fields)]
+#[primary_key(name, context)]
 struct PgSetting {
     name: String,
     setting: String,
diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_branched_objects.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_branched_objects.rs
index 2699503a2fdd5..443fa255f4398 100644
--- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_branched_objects.rs
+++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_branched_objects.rs
@@ -19,6 +19,7 @@ use crate::catalog::system_catalog::SysCatalogReaderImpl;
 use crate::error::Result;
 
 #[derive(Fields)]
+#[primary_key(object_id, sst_id)] // TODO: is this correct?
 struct RwHummockBranchedObject {
     object_id: i64,
     sst_id: i64,
diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_snapshots.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_snapshots.rs
index ac2b96bdc0023..e4f18c8fecaf3 100644
--- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_snapshots.rs
+++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_snapshots.rs
@@ -20,6 +20,7 @@ use crate::error::Result;
 
 #[derive(Fields)]
 struct RwHummockPinnedSnapshot {
+    #[primary_key]
     worker_node_id: i32,
     min_pinned_snapshot_id: i64,
 }
diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_versions.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_versions.rs
index 45a8e23f0ecc5..c0a9dd9e7fc45 100644
--- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_versions.rs
+++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_versions.rs
@@ -20,6 +20,7 @@ use crate::error::Result;
 
 #[derive(Fields)]
 struct RwHummockPinnedVersion {
+    #[primary_key]
     worker_node_id: i32,
     min_pinned_version_id: i64,
 }
diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version.rs
index 5551170e57a6f..37d1ceb6486ea 100644
--- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version.rs
+++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version.rs
@@ -22,6 +22,7 @@ use crate::error::Result;
 
 #[derive(Fields)]
 struct RwHummockVersion {
+    #[primary_key]
     version_id: i64,
     max_committed_epoch: i64,
     safe_epoch: i64,
diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_meta_snapshot.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_meta_snapshot.rs
index ebb969cac462f..f31b1f7c67c5c 100644
--- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_meta_snapshot.rs
+++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_meta_snapshot.rs
@@ -21,6 +21,7 @@ use crate::error::Result;
 
 #[derive(Fields)]
 struct RwMetaSnapshot {
+    #[primary_key]
     meta_snapshot_id: i64,
     hummock_version_id: i64,
     // the smallest epoch this meta snapshot includes

From bd2914808dd310dc3c3c401525ab3baf3c3e67cb Mon Sep 17 00:00:00 2001
From: Shanicky Chen <peng@risingwave-labs.com>
Date: Fri, 23 Feb 2024 14:02:12 +0800
Subject: [PATCH 21/24] feat: refine sink into table functionalities (#15160)

Co-authored-by: August <pin@singularity-data.com>
---
 src/meta/src/controller/catalog.rs       | 276 +++++++++++++++++++++--
 src/meta/src/controller/rename.rs        |  19 ++
 src/meta/src/controller/streaming_job.rs |  20 +-
 src/meta/src/controller/utils.rs         | 131 +++++++++++
 4 files changed, 423 insertions(+), 23 deletions(-)

diff --git a/src/meta/src/controller/catalog.rs b/src/meta/src/controller/catalog.rs
index 6077efa7f88c1..e26e1af0f0cff 100644
--- a/src/meta/src/controller/catalog.rs
+++ b/src/meta/src/controller/catalog.rs
@@ -19,16 +19,18 @@ use std::sync::Arc;
 use anyhow::anyhow;
 use itertools::Itertools;
 use risingwave_common::catalog::{TableOption, DEFAULT_SCHEMA_NAME, SYSTEM_SCHEMAS};
+use risingwave_common::util::stream_graph_visitor::visit_stream_node_cont;
 use risingwave_common::{bail, current_cluster_version};
+use risingwave_meta_model_v2::fragment::StreamNode;
 use risingwave_meta_model_v2::object::ObjectType;
 use risingwave_meta_model_v2::prelude::*;
 use risingwave_meta_model_v2::table::TableType;
 use risingwave_meta_model_v2::{
-    connection, database, function, index, object, object_dependency, schema, sink, source,
-    streaming_job, table, user_privilege, view, ActorId, ColumnCatalogArray, ConnectionId,
-    CreateType, DatabaseId, FragmentId, FunctionId, IndexId, JobStatus, ObjectId,
-    PrivateLinkService, SchemaId, SourceId, StreamSourceInfo, StreamingParallelism, TableId,
-    UserId,
+    actor, connection, database, fragment, function, index, object, object_dependency, schema,
+    sink, source, streaming_job, table, user_privilege, view, ActorId, ActorUpstreamActors,
+    ColumnCatalogArray, ConnectionId, CreateType, DatabaseId, FragmentId, FunctionId, I32Array,
+    IndexId, JobStatus, ObjectId, PrivateLinkService, SchemaId, SourceId, StreamSourceInfo,
+    StreamingParallelism, TableId, UserId,
 };
 use risingwave_pb::catalog::table::PbTableType;
 use risingwave_pb::catalog::{
@@ -41,6 +43,8 @@ use risingwave_pb::meta::subscribe_response::{
     Info as NotificationInfo, Info, Operation as NotificationOperation, Operation,
 };
 use risingwave_pb::meta::{PbRelation, PbRelationGroup};
+use risingwave_pb::stream_plan::stream_node::NodeBody;
+use risingwave_pb::stream_plan::FragmentTypeFlag;
 use risingwave_pb::user::PbUserInfo;
 use sea_orm::sea_query::{Expr, SimpleExpr};
 use sea_orm::ActiveValue::Set;
@@ -423,6 +427,7 @@ impl CatalogController {
     pub async fn clean_dirty_creating_jobs(&self) -> MetaResult<ReleaseContext> {
         let inner = self.inner.write().await;
         let txn = inner.db.begin().await?;
+
         let creating_job_ids: Vec<ObjectId> = streaming_job::Entity::find()
             .select_only()
             .column(streaming_job::Column::JobId)
@@ -436,7 +441,14 @@ impl CatalogController {
             .into_tuple()
             .all(&txn)
             .await?;
+
+        let changed = Self::clean_dirty_sink_downstreams(&txn).await?;
+
         if creating_job_ids.is_empty() {
+            if changed {
+                txn.commit().await?;
+            }
+
             return Ok(ReleaseContext::default());
         }
 
@@ -476,6 +488,7 @@ impl CatalogController {
             .exec(&txn)
             .await?;
         assert!(res.rows_affected > 0);
+
         txn.commit().await?;
 
         Ok(ReleaseContext {
@@ -485,6 +498,175 @@ impl CatalogController {
         })
     }
 
+    async fn clean_dirty_sink_downstreams(txn: &DatabaseTransaction) -> MetaResult<bool> {
+        // clean incoming sink from (table)
+        // clean upstream fragment ids from (fragment)
+        // clean stream node from (fragment)
+        // clean upstream actor ids from (actor)
+        let all_fragment_ids: Vec<FragmentId> = Fragment::find()
+            .select_only()
+            .columns(vec![fragment::Column::FragmentId])
+            .into_tuple()
+            .all(txn)
+            .await?;
+
+        let all_fragment_ids: HashSet<_> = all_fragment_ids.into_iter().collect();
+
+        let table_sink_ids: Vec<ObjectId> = Sink::find()
+            .select_only()
+            .column(sink::Column::SinkId)
+            .filter(sink::Column::TargetTable.is_not_null())
+            .into_tuple()
+            .all(txn)
+            .await?;
+
+        let all_table_with_incoming_sinks: Vec<(ObjectId, I32Array)> = Table::find()
+            .select_only()
+            .columns(vec![table::Column::TableId, table::Column::IncomingSinks])
+            .into_tuple()
+            .all(txn)
+            .await?;
+
+        let table_incoming_sinks_to_update = all_table_with_incoming_sinks
+            .into_iter()
+            .filter(|(_, incoming_sinks)| {
+                let inner_ref = incoming_sinks.inner_ref();
+                !inner_ref.is_empty()
+                    && inner_ref
+                        .iter()
+                        .any(|sink_id| !table_sink_ids.contains(sink_id))
+            })
+            .collect_vec();
+
+        let new_table_incoming_sinks = table_incoming_sinks_to_update
+            .into_iter()
+            .map(|(table_id, incoming_sinks)| {
+                let new_incoming_sinks = incoming_sinks
+                    .into_inner()
+                    .extract_if(|id| table_sink_ids.contains(id))
+                    .collect_vec();
+                (table_id, I32Array::from(new_incoming_sinks))
+            })
+            .collect_vec();
+
+        // no need to update, returning
+        if new_table_incoming_sinks.is_empty() {
+            return Ok(false);
+        }
+
+        for (table_id, new_incoming_sinks) in new_table_incoming_sinks {
+            tracing::info!("cleaning dirty table sink downstream table {}", table_id);
+            Table::update_many()
+                .col_expr(table::Column::IncomingSinks, new_incoming_sinks.into())
+                .filter(table::Column::TableId.eq(table_id))
+                .exec(txn)
+                .await?;
+
+            let fragments: Vec<(FragmentId, I32Array, StreamNode, i32)> = Fragment::find()
+                .select_only()
+                .columns(vec![
+                    fragment::Column::FragmentId,
+                    fragment::Column::UpstreamFragmentId,
+                    fragment::Column::StreamNode,
+                    fragment::Column::FragmentTypeMask,
+                ])
+                .filter(fragment::Column::JobId.eq(table_id))
+                .into_tuple()
+                .all(txn)
+                .await?;
+
+            for (fragment_id, upstream_fragment_ids, stream_node, fragment_mask) in fragments {
+                let mut upstream_fragment_ids = upstream_fragment_ids.into_inner();
+
+                let dirty_upstream_fragment_ids = upstream_fragment_ids
+                    .extract_if(|id| !all_fragment_ids.contains(id))
+                    .collect_vec();
+
+                if !dirty_upstream_fragment_ids.is_empty() {
+                    // dirty downstream should be materialize fragment of table
+                    assert!(fragment_mask & FragmentTypeFlag::Mview as i32 > 0);
+
+                    tracing::info!(
+                        "cleaning dirty table sink fragment {:?} from downstream fragment {}",
+                        dirty_upstream_fragment_ids,
+                        fragment_id
+                    );
+
+                    let mut pb_stream_node = stream_node.to_protobuf();
+
+                    visit_stream_node_cont(&mut pb_stream_node, |node| {
+                        if let Some(NodeBody::Union(_)) = node.node_body {
+                            node.input.retain_mut(|input| {
+                                if let Some(NodeBody::Merge(merge_node)) = &mut input.node_body
+                                    && all_fragment_ids
+                                        .contains(&(merge_node.upstream_fragment_id as i32))
+                                {
+                                    true
+                                } else {
+                                    false
+                                }
+                            });
+                        }
+                        true
+                    });
+
+                    Fragment::update_many()
+                        .col_expr(
+                            fragment::Column::UpstreamFragmentId,
+                            I32Array::from(upstream_fragment_ids).into(),
+                        )
+                        .col_expr(
+                            fragment::Column::StreamNode,
+                            StreamNode::from_protobuf(&pb_stream_node).into(),
+                        )
+                        .filter(fragment::Column::FragmentId.eq(fragment_id))
+                        .exec(txn)
+                        .await?;
+
+                    let actors: Vec<(ActorId, ActorUpstreamActors)> = Actor::find()
+                        .select_only()
+                        .columns(vec![
+                            actor::Column::ActorId,
+                            actor::Column::UpstreamActorIds,
+                        ])
+                        .filter(actor::Column::FragmentId.eq(fragment_id))
+                        .into_tuple()
+                        .all(txn)
+                        .await?;
+
+                    for (actor_id, upstream_actor_ids) in actors {
+                        let mut upstream_actor_ids = upstream_actor_ids.into_inner();
+
+                        let dirty_actor_upstreams = upstream_actor_ids
+                            .extract_if(|id, _| !all_fragment_ids.contains(id))
+                            .map(|(id, _)| id)
+                            .collect_vec();
+
+                        if !dirty_actor_upstreams.is_empty() {
+                            tracing::debug!(
+                                "cleaning dirty table sink fragment {:?} from downstream fragment {} actor {}",
+                                dirty_actor_upstreams,
+                                fragment_id,
+                                actor_id,
+                            );
+
+                            Actor::update_many()
+                                .col_expr(
+                                    actor::Column::UpstreamActorIds,
+                                    ActorUpstreamActors::from(upstream_actor_ids).into(),
+                                )
+                                .filter(actor::Column::ActorId.eq(actor_id))
+                                .exec(txn)
+                                .await?;
+                        }
+                    }
+                }
+            }
+        }
+
+        Ok(true)
+    }
+
     /// `finish_streaming_job` marks job related objects as `Created` and notify frontend.
     pub async fn finish_streaming_job(&self, job_id: ObjectId) -> MetaResult<NotificationVersion> {
         let inner = self.inner.write().await;
@@ -1487,6 +1669,52 @@ impl CatalogController {
         );
         to_drop_objects.push(obj);
 
+        // Special handling for 'sink into table'.
+        if object_type != ObjectType::Sink {
+            // When dropping a table downstream, all incoming sinks of the table should be dropped as well.
+            if object_type == ObjectType::Table {
+                let table = Table::find_by_id(object_id)
+                    .one(&txn)
+                    .await?
+                    .ok_or_else(|| MetaError::catalog_id_not_found("table", object_id))?;
+
+                let incoming_sinks = table.incoming_sinks.into_inner();
+
+                if !incoming_sinks.is_empty() {
+                    let objs: Vec<PartialObject> = Object::find()
+                        .filter(object::Column::Oid.is_in(incoming_sinks))
+                        .into_partial_model()
+                        .all(&txn)
+                        .await?;
+
+                    to_drop_objects.extend(objs);
+                }
+            }
+
+            let to_drop_object_ids: HashSet<_> =
+                to_drop_objects.iter().map(|obj| obj.oid).collect();
+
+            // When there is a table sink in the dependency chain of drop cascade, an error message needs to be returned currently to manually drop the sink.
+            for obj in &to_drop_objects {
+                if obj.obj_type == ObjectType::Sink {
+                    let sink = Sink::find_by_id(obj.oid)
+                        .one(&txn)
+                        .await?
+                        .ok_or_else(|| MetaError::catalog_id_not_found("sink", obj.oid))?;
+
+                    // Since dropping the sink into the table requires the frontend to handle some of the logic (regenerating the plan), it’s not compatible with the current cascade dropping.
+                    if let Some(target_table) = sink.target_table
+                        && !to_drop_object_ids.contains(&target_table)
+                    {
+                        bail!(
+                            "Found sink into table with sink id {} in dependency, please drop them manually",
+                            obj.oid,
+                        );
+                    }
+                }
+            }
+        }
+
         let to_drop_table_ids = to_drop_objects
             .iter()
             .filter(|obj| obj.obj_type == ObjectType::Table || obj.obj_type == ObjectType::Index)
@@ -1856,22 +2084,28 @@ impl CatalogController {
                 });
             }};
         }
-        let objs = get_referring_objects(object_id, &txn).await?;
-        // TODO: For sink into table. when sink into table is ready.
-        // if object_type == ObjectType::Table {
-        //     let incoming_sinks: Vec<_> = Table::find_by_id(object_id)
-        //         .select_only()
-        //         .column(table::Column::IncomingSinks)
-        //         .into_tuple()
-        //         .one(&txn)
-        //         .await?
-        //         .ok_or_else(|| MetaError::catalog_id_not_found("table", object_id))?;
-        //     objs.extend(incoming_sinks.into_iter().map(|id| PartialObject {
-        //         oid: id as _,
-        //         obj_type: ObjectType::Sink,
-        //         ..Default::default()
-        //     }));
-        // }
+        let mut objs = get_referring_objects(object_id, &txn).await?;
+        if object_type == ObjectType::Table {
+            let incoming_sinks: I32Array = Table::find_by_id(object_id)
+                .select_only()
+                .column(table::Column::IncomingSinks)
+                .into_tuple()
+                .one(&txn)
+                .await?
+                .ok_or_else(|| MetaError::catalog_id_not_found("table", object_id))?;
+
+            objs.extend(
+                incoming_sinks
+                    .into_inner()
+                    .into_iter()
+                    .map(|id| PartialObject {
+                        oid: id,
+                        obj_type: ObjectType::Sink,
+                        schema_id: None,
+                        database_id: None,
+                    }),
+            );
+        }
 
         for obj in objs {
             match obj.obj_type {
diff --git a/src/meta/src/controller/rename.rs b/src/meta/src/controller/rename.rs
index bde954a587fdf..15be4d7ef83b8 100644
--- a/src/meta/src/controller/rename.rs
+++ b/src/meta/src/controller/rename.rs
@@ -79,6 +79,7 @@ pub fn alter_relation_rename_refs(definition: &str, from: &str, to: &str) -> Str
             stmt:
             CreateSinkStatement {
                 sink_from: CreateSink::AsQuery(query),
+                into_table_name: None,
                 ..
             },
         } => {
@@ -89,9 +90,27 @@ pub fn alter_relation_rename_refs(definition: &str, from: &str, to: &str) -> Str
             stmt:
             CreateSinkStatement {
                 sink_from: CreateSink::From(table_name),
+                into_table_name: None,
                 ..
             },
         } => replace_table_name(table_name, to),
+        Statement::CreateSink {
+            stmt: CreateSinkStatement {
+                sink_from,
+                into_table_name: Some(table_name),
+                ..
+            }
+        } => {
+            let idx = table_name.0.len() - 1;
+            if table_name.0[idx].real_value() == from {
+                table_name.0[idx] = Ident::new_unchecked(to);
+            } else {
+                match sink_from {
+                    CreateSink::From(table_name) => replace_table_name(table_name, to),
+                    CreateSink::AsQuery(query) => QueryRewriter::rewrite_query(query, from, to),
+                }
+            }
+        }
         _ => unreachable!(),
     };
     stmt.to_string()
diff --git a/src/meta/src/controller/streaming_job.rs b/src/meta/src/controller/streaming_job.rs
index 9bb8af6172469..7c4360a92f285 100644
--- a/src/meta/src/controller/streaming_job.rs
+++ b/src/meta/src/controller/streaming_job.rs
@@ -16,6 +16,7 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
 use std::num::NonZeroUsize;
 
 use itertools::Itertools;
+use risingwave_common::bail;
 use risingwave_common::buffer::Bitmap;
 use risingwave_common::hash::{ActorMapping, ParallelUnitId, ParallelUnitMapping};
 use risingwave_common::util::column_index_mapping::ColIndexMapping;
@@ -64,8 +65,8 @@ use crate::barrier::Reschedule;
 use crate::controller::catalog::CatalogController;
 use crate::controller::rename::ReplaceTableExprRewriter;
 use crate::controller::utils::{
-    check_relation_name_duplicate, ensure_object_id, ensure_user_id, get_fragment_actor_ids,
-    get_fragment_mappings,
+    check_relation_name_duplicate, check_sink_into_table_cycle, ensure_object_id, ensure_user_id,
+    get_fragment_actor_ids, get_fragment_mappings,
 };
 use crate::controller::ObjectModel;
 use crate::manager::{NotificationVersion, SinkId, StreamingJob};
@@ -141,6 +142,21 @@ impl CatalogController {
                 Table::insert(table).exec(&txn).await?;
             }
             StreamingJob::Sink(sink, _) => {
+                if let Some(target_table_id) = sink.target_table {
+                    if check_sink_into_table_cycle(
+                        target_table_id as ObjectId,
+                        sink.dependent_relations
+                            .iter()
+                            .map(|id| *id as ObjectId)
+                            .collect(),
+                        &txn,
+                    )
+                    .await?
+                    {
+                        bail!("Creating such a sink will result in circular dependency.");
+                    }
+                }
+
                 let job_id = Self::create_streaming_job_obj(
                     &txn,
                     ObjectType::Sink,
diff --git a/src/meta/src/controller/utils.rs b/src/meta/src/controller/utils.rs
index ff19892d516b5..6c7e61a316add 100644
--- a/src/meta/src/controller/utils.rs
+++ b/src/meta/src/controller/utils.rs
@@ -118,6 +118,107 @@ pub fn construct_obj_dependency_query(obj_id: ObjectId) -> WithQuery {
         .to_owned()
 }
 
+/// This function will construct a query using recursive cte to find if dependent objects are already relying on the target table.
+///
+/// # Examples
+///
+/// ```
+/// use risingwave_meta::controller::utils::construct_sink_cycle_check_query;
+/// use sea_orm::sea_query::*;
+/// use sea_orm::*;
+///
+/// let query = construct_sink_cycle_check_query(1, vec![2, 3]);
+///
+/// assert_eq!(
+///     query.to_string(MysqlQueryBuilder),
+///     r#"WITH RECURSIVE `used_by_object_ids_with_sink` (`oid`, `used_by`) AS (SELECT `oid`, `used_by` FROM `object_dependency` WHERE `object_dependency`.`oid` = 1 UNION ALL (SELECT `obj_dependency_with_sink`.`oid`, `obj_dependency_with_sink`.`used_by` FROM (SELECT `oid`, `used_by` FROM `object_dependency` UNION ALL (SELECT `sink_id`, `target_table` FROM `sink` WHERE `sink`.`target_table` IS NOT NULL)) AS `obj_dependency_with_sink` INNER JOIN `used_by_object_ids_with_sink` ON `used_by_object_ids_with_sink`.`used_by` = `obj_dependency_with_sink`.`oid` WHERE `used_by_object_ids_with_sink`.`used_by` <> `used_by_object_ids_with_sink`.`oid`)) SELECT COUNT(`used_by_object_ids_with_sink`.`used_by`) FROM `used_by_object_ids_with_sink` WHERE `used_by_object_ids_with_sink`.`used_by` IN (2, 3)"#
+/// );
+/// assert_eq!(
+///     query.to_string(PostgresQueryBuilder),
+///     r#"WITH RECURSIVE "used_by_object_ids_with_sink" ("oid", "used_by") AS (SELECT "oid", "used_by" FROM "object_dependency" WHERE "object_dependency"."oid" = 1 UNION ALL (SELECT "obj_dependency_with_sink"."oid", "obj_dependency_with_sink"."used_by" FROM (SELECT "oid", "used_by" FROM "object_dependency" UNION ALL (SELECT "sink_id", "target_table" FROM "sink" WHERE "sink"."target_table" IS NOT NULL)) AS "obj_dependency_with_sink" INNER JOIN "used_by_object_ids_with_sink" ON "used_by_object_ids_with_sink"."used_by" = "obj_dependency_with_sink"."oid" WHERE "used_by_object_ids_with_sink"."used_by" <> "used_by_object_ids_with_sink"."oid")) SELECT COUNT("used_by_object_ids_with_sink"."used_by") FROM "used_by_object_ids_with_sink" WHERE "used_by_object_ids_with_sink"."used_by" IN (2, 3)"#
+/// );
+/// assert_eq!(
+///     query.to_string(SqliteQueryBuilder),
+///     r#"WITH RECURSIVE "used_by_object_ids_with_sink" ("oid", "used_by") AS (SELECT "oid", "used_by" FROM "object_dependency" WHERE "object_dependency"."oid" = 1 UNION ALL SELECT "obj_dependency_with_sink"."oid", "obj_dependency_with_sink"."used_by" FROM (SELECT "oid", "used_by" FROM "object_dependency" UNION ALL SELECT "sink_id", "target_table" FROM "sink" WHERE "sink"."target_table" IS NOT NULL) AS "obj_dependency_with_sink" INNER JOIN "used_by_object_ids_with_sink" ON "used_by_object_ids_with_sink"."used_by" = "obj_dependency_with_sink"."oid" WHERE "used_by_object_ids_with_sink"."used_by" <> "used_by_object_ids_with_sink"."oid") SELECT COUNT("used_by_object_ids_with_sink"."used_by") FROM "used_by_object_ids_with_sink" WHERE "used_by_object_ids_with_sink"."used_by" IN (2, 3)"#
+/// );
+/// ```
+pub fn construct_sink_cycle_check_query(
+    target_table: ObjectId,
+    dependent_objects: Vec<ObjectId>,
+) -> WithQuery {
+    let cte_alias = Alias::new("used_by_object_ids_with_sink");
+    let depend_alias = Alias::new("obj_dependency_with_sink");
+
+    let mut base_query = SelectStatement::new()
+        .columns([
+            object_dependency::Column::Oid,
+            object_dependency::Column::UsedBy,
+        ])
+        .from(ObjectDependency)
+        .and_where(object_dependency::Column::Oid.eq(target_table))
+        .to_owned();
+
+    let query_sink_deps = SelectStatement::new()
+        .columns([sink::Column::SinkId, sink::Column::TargetTable])
+        .from(Sink)
+        .and_where(sink::Column::TargetTable.is_not_null())
+        .to_owned();
+
+    let cte_referencing = Query::select()
+        .column((depend_alias.clone(), object_dependency::Column::Oid))
+        .column((depend_alias.clone(), object_dependency::Column::UsedBy))
+        .from_subquery(
+            SelectStatement::new()
+                .columns([
+                    object_dependency::Column::Oid,
+                    object_dependency::Column::UsedBy,
+                ])
+                .from(ObjectDependency)
+                .union(UnionType::All, query_sink_deps)
+                .to_owned(),
+            depend_alias.clone(),
+        )
+        .inner_join(
+            cte_alias.clone(),
+            Expr::col((cte_alias.clone(), object_dependency::Column::UsedBy)).eq(Expr::col((
+                depend_alias.clone(),
+                object_dependency::Column::Oid,
+            ))),
+        )
+        .and_where(
+            Expr::col((cte_alias.clone(), object_dependency::Column::UsedBy)).ne(Expr::col((
+                cte_alias.clone(),
+                object_dependency::Column::Oid,
+            ))),
+        )
+        .to_owned();
+
+    let common_table_expr = CommonTableExpression::new()
+        .query(base_query.union(UnionType::All, cte_referencing).to_owned())
+        .columns([
+            object_dependency::Column::Oid,
+            object_dependency::Column::UsedBy,
+        ])
+        .table_name(cte_alias.clone())
+        .to_owned();
+
+    SelectStatement::new()
+        .expr(Expr::col((cte_alias.clone(), object_dependency::Column::UsedBy)).count())
+        .from(cte_alias.clone())
+        .and_where(
+            Expr::col((cte_alias.clone(), object_dependency::Column::UsedBy))
+                .is_in(dependent_objects),
+        )
+        .to_owned()
+        .with(
+            WithClause::new()
+                .recursive(true)
+                .cte(common_table_expr)
+                .to_owned(),
+        )
+        .to_owned()
+}
+
 #[derive(Clone, DerivePartialModel, FromQueryResult)]
 #[sea_orm(entity = "Object")]
 pub struct PartialObject {
@@ -175,6 +276,36 @@ where
     Ok(objects)
 }
 
+/// Check if create a sink with given dependent objects into the target table will cause a cycle, return true if it will.
+pub async fn check_sink_into_table_cycle<C>(
+    target_table: ObjectId,
+    dependent_objs: Vec<ObjectId>,
+    db: &C,
+) -> MetaResult<bool>
+where
+    C: ConnectionTrait,
+{
+    if dependent_objs.is_empty() {
+        return Ok(false);
+    }
+
+    let query = construct_sink_cycle_check_query(target_table, dependent_objs);
+    let (sql, values) = query.build_any(&*db.get_database_backend().get_query_builder());
+
+    let res = db
+        .query_one(Statement::from_sql_and_values(
+            db.get_database_backend(),
+            sql,
+            values,
+        ))
+        .await?
+        .unwrap();
+
+    let cnt: i64 = res.try_get_by(0)?;
+
+    Ok(cnt != 0)
+}
+
 /// `ensure_object_id` ensures the existence of target object in the cluster.
 pub async fn ensure_object_id<C>(
     object_type: ObjectType,

From 62d897ccc37004fa84e3d35631ee55e60ac35750 Mon Sep 17 00:00:00 2001
From: Zihao Xu <xzhseh@gmail.com>
Date: Fri, 23 Feb 2024 01:27:22 -0500
Subject: [PATCH 22/24] fix(optimizer): visit ternary ops when offset is
 specified for `tumble` (#15199)

---
 e2e_test/streaming/bug_fixes/issue_15198.slt | 23 ++++++++++++++++++++
 src/frontend/src/expr/utils.rs               | 22 ++++++++++++++-----
 2 files changed, 40 insertions(+), 5 deletions(-)
 create mode 100644 e2e_test/streaming/bug_fixes/issue_15198.slt

diff --git a/e2e_test/streaming/bug_fixes/issue_15198.slt b/e2e_test/streaming/bug_fixes/issue_15198.slt
new file mode 100644
index 0000000000000..a69aede18c2c9
--- /dev/null
+++ b/e2e_test/streaming/bug_fixes/issue_15198.slt
@@ -0,0 +1,23 @@
+# https://github.com/risingwavelabs/risingwave/issues/15198
+
+statement ok
+SET RW_IMPLICIT_FLUSH TO TRUE;
+
+statement ok
+create materialized view "tumble_with_offset"
+as (
+  with
+    input as (
+        select 1 as id, TO_TIMESTAMP('2024-01-01 01:30:02', 'YYYY-MM-DD HH24:MI:SS') as timestamps
+    )
+  select *
+  from tumble(input, timestamps, interval '1 DAY', '+6 HOURS')
+);
+
+query ITTT
+select * from tumble_with_offset;
+----
+1	2024-01-01 01:30:02+00:00	2023-12-31 06:00:00+00:00	2024-01-01 06:00:00+00:00
+
+statement ok
+drop materialized view tumble_with_offset;
diff --git a/src/frontend/src/expr/utils.rs b/src/frontend/src/expr/utils.rs
index 7f768dbb63994..9db25b3dc554e 100644
--- a/src/frontend/src/expr/utils.rs
+++ b/src/frontend/src/expr/utils.rs
@@ -498,11 +498,23 @@ impl WatermarkAnalyzer {
                 _ => WatermarkDerivation::None,
             },
             ExprType::Subtract | ExprType::TumbleStart => {
-                match self.visit_binary_op(func_call.inputs()) {
-                    (Constant, Constant) => Constant,
-                    (Watermark(idx), Constant) => Watermark(idx),
-                    (Nondecreasing, Constant) => Nondecreasing,
-                    _ => WatermarkDerivation::None,
+                if func_call.inputs().len() == 3 {
+                    // With `offset` specified
+                    // e.g., select * from tumble(t1, start, interval, offset);
+                    assert_eq!(ExprType::TumbleStart, func_call.func_type());
+                    match self.visit_ternary_op(func_call.inputs()) {
+                        (Constant, Constant, Constant) => Constant,
+                        (Watermark(idx), Constant, Constant) => Watermark(idx),
+                        (Nondecreasing, Constant, Constant) => Nondecreasing,
+                        _ => WatermarkDerivation::None,
+                    }
+                } else {
+                    match self.visit_binary_op(func_call.inputs()) {
+                        (Constant, Constant) => Constant,
+                        (Watermark(idx), Constant) => Watermark(idx),
+                        (Nondecreasing, Constant) => Nondecreasing,
+                        _ => WatermarkDerivation::None,
+                    }
                 }
             }
             ExprType::Multiply | ExprType::Divide | ExprType::Modulus => {

From 59ce8df5bc89474380a8a2a2ccab81d7393eb0bc Mon Sep 17 00:00:00 2001
From: TennyZhuang <zty0826@gmail.com>
Date: Fri, 23 Feb 2024 14:58:03 +0800
Subject: [PATCH 23/24] feat(stream): concurrent fetch for temporal join (take
 2) (#15115)

Signed-off-by: TennyZhuang <zty0826@gmail.com>
---
 src/stream/src/cache/managed_lru.rs      |   8 ++
 src/stream/src/executor/temporal_join.rs | 150 ++++++++++-------------
 2 files changed, 76 insertions(+), 82 deletions(-)

diff --git a/src/stream/src/cache/managed_lru.rs b/src/stream/src/cache/managed_lru.rs
index d91eb664d43a2..9773f3fb51bf0 100644
--- a/src/stream/src/cache/managed_lru.rs
+++ b/src/stream/src/cache/managed_lru.rs
@@ -156,6 +156,14 @@ impl<K: Hash + Eq + EstimateSize, V: EstimateSize, S: BuildHasher, A: Clone + Al
         self.inner.get(k)
     }
 
+    pub fn peek<Q>(&self, k: &Q) -> Option<&V>
+    where
+        K: Borrow<Q>,
+        Q: Hash + Eq + ?Sized,
+    {
+        self.inner.peek(k)
+    }
+
     pub fn peek_mut(&mut self, k: &K) -> Option<MutGuard<'_, V>> {
         let v = self.inner.peek_mut(k);
         v.map(|inner| {
diff --git a/src/stream/src/executor/temporal_join.rs b/src/stream/src/executor/temporal_join.rs
index 32a0c5747083b..da0ac7b45dbdc 100644
--- a/src/stream/src/executor/temporal_join.rs
+++ b/src/stream/src/executor/temporal_join.rs
@@ -15,14 +15,13 @@
 use std::alloc::Global;
 use std::collections::hash_map::Entry;
 use std::collections::HashMap;
-use std::ops::{Deref, DerefMut};
 use std::pin::pin;
 use std::sync::Arc;
 
 use either::Either;
 use futures::stream::{self, PollNext};
 use futures::{pin_mut, StreamExt, TryStreamExt};
-use futures_async_stream::try_stream;
+use futures_async_stream::{for_await, try_stream};
 use local_stats_alloc::{SharedStatsAlloc, StatsAlloc};
 use lru::DefaultHasher;
 use risingwave_common::array::{Op, StreamChunk};
@@ -108,99 +107,84 @@ impl JoinEntry {
     }
 }
 
-struct JoinEntryWrapper(Option<JoinEntry>);
-
-impl EstimateSize for JoinEntryWrapper {
-    fn estimated_heap_size(&self) -> usize {
-        self.0.estimated_heap_size()
-    }
-}
-
-impl JoinEntryWrapper {
-    const MESSAGE: &'static str = "the state should always be `Some`";
-
-    /// Take the value out of the wrapper. Panic if the value is `None`.
-    pub fn take(&mut self) -> JoinEntry {
-        self.0.take().expect(Self::MESSAGE)
-    }
-}
-
-impl Deref for JoinEntryWrapper {
-    type Target = JoinEntry;
-
-    fn deref(&self) -> &Self::Target {
-        self.0.as_ref().expect(Self::MESSAGE)
-    }
-}
-
-impl DerefMut for JoinEntryWrapper {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        self.0.as_mut().expect(Self::MESSAGE)
-    }
-}
-
 struct TemporalSide<K: HashKey, S: StateStore> {
     source: StorageTable<S>,
     table_stream_key_indices: Vec<usize>,
     table_output_indices: Vec<usize>,
-    cache: ManagedLruCache<K, JoinEntryWrapper, DefaultHasher, SharedStatsAlloc<Global>>,
+    cache: ManagedLruCache<K, JoinEntry, DefaultHasher, SharedStatsAlloc<Global>>,
     ctx: ActorContextRef,
     join_key_data_types: Vec<DataType>,
 }
 
 impl<K: HashKey, S: StateStore> TemporalSide<K, S> {
-    /// Lookup the temporal side table and return a `JoinEntry` which could be empty if there are no
-    /// matched records.
-    async fn lookup(&mut self, key: &K, epoch: HummockEpoch) -> StreamExecutorResult<JoinEntry> {
+    /// Fetch records from temporal side table and ensure the entry in the cache.
+    /// If already exists, the entry will be promoted.
+    async fn fetch_or_promote_keys(
+        &mut self,
+        keys: impl Iterator<Item = &K>,
+        epoch: HummockEpoch,
+    ) -> StreamExecutorResult<()> {
         let table_id_str = self.source.table_id().to_string();
         let actor_id_str = self.ctx.id.to_string();
         let fragment_id_str = self.ctx.id.to_string();
-        self.ctx
-            .streaming_metrics
-            .temporal_join_total_query_cache_count
-            .with_label_values(&[&table_id_str, &actor_id_str, &fragment_id_str])
-            .inc();
-
-        let res = if self.cache.contains(key) {
-            let mut state = self.cache.peek_mut(key).unwrap();
-            state.take()
-        } else {
-            // cache miss
+
+        let mut futs = Vec::with_capacity(keys.size_hint().1.unwrap_or(0));
+        for key in keys {
             self.ctx
                 .streaming_metrics
-                .temporal_join_cache_miss_count
+                .temporal_join_total_query_cache_count
                 .with_label_values(&[&table_id_str, &actor_id_str, &fragment_id_str])
                 .inc();
 
-            let pk_prefix = key.deserialize(&self.join_key_data_types)?;
-
-            let iter = self
-                .source
-                .batch_iter_with_pk_bounds(
-                    HummockReadEpoch::NoWait(epoch),
-                    &pk_prefix,
-                    ..,
-                    false,
-                    PrefetchOptions::default(),
-                )
-                .await?;
-
-            let mut entry = JoinEntry::default();
-
-            pin_mut!(iter);
-            while let Some(row) = iter.next_row().await? {
-                entry.insert(
-                    row.as_ref()
-                        .project(&self.table_stream_key_indices)
-                        .into_owned_row(),
-                    row.project(&self.table_output_indices).into_owned_row(),
-                );
+            if self.cache.get(key).is_none() {
+                self.ctx
+                    .streaming_metrics
+                    .temporal_join_cache_miss_count
+                    .with_label_values(&[&table_id_str, &actor_id_str, &fragment_id_str])
+                    .inc();
+
+                futs.push(async {
+                    let pk_prefix = key.deserialize(&self.join_key_data_types)?;
+
+                    let iter = self
+                        .source
+                        .batch_iter_with_pk_bounds(
+                            HummockReadEpoch::NoWait(epoch),
+                            &pk_prefix,
+                            ..,
+                            false,
+                            PrefetchOptions::default(),
+                        )
+                        .await?;
+
+                    let mut entry = JoinEntry::default();
+
+                    pin_mut!(iter);
+                    while let Some(row) = iter.next_row().await? {
+                        entry.insert(
+                            row.as_ref()
+                                .project(&self.table_stream_key_indices)
+                                .into_owned_row(),
+                            row.project(&self.table_output_indices).into_owned_row(),
+                        );
+                    }
+                    let key = key.clone();
+                    Ok((key, entry)) as StreamExecutorResult<_>
+                });
             }
+        }
 
-            entry
-        };
+        #[for_await]
+        for res in stream::iter(futs).buffered(16) {
+            let (key, entry) = res?;
+            self.cache.put(key, entry);
+        }
+
+        Ok(())
+    }
 
-        Ok(res)
+    fn force_peek(&self, key: &K) -> &JoinEntry {
+        self.cache.peek(key).expect("key should exists")
     }
 
     fn update(
@@ -230,10 +214,6 @@ impl<K: HashKey, S: StateStore> TemporalSide<K, S> {
         }
         Ok(())
     }
-
-    pub fn insert_back(&mut self, key: K, state: JoinEntry) {
-        self.cache.put(key, JoinEntryWrapper(Some(state)));
-    }
 }
 
 enum InternalMessage {
@@ -428,12 +408,20 @@ impl<K: HashKey, S: StateStore, const T: JoinTypePrimitive> TemporalJoinExecutor
                     );
                     let epoch = prev_epoch.expect("Chunk data should come after some barrier.");
                     let keys = K::build(&self.left_join_keys, chunk.data_chunk())?;
+                    let to_fetch_keys = chunk
+                        .visibility()
+                        .iter()
+                        .zip_eq_debug(keys.iter())
+                        .filter_map(|(vis, key)| if vis { Some(key) } else { None });
+                    self.right_table
+                        .fetch_or_promote_keys(to_fetch_keys, epoch)
+                        .await?;
                     for (r, key) in chunk.rows_with_holes().zip_eq_debug(keys.into_iter()) {
                         let Some((op, left_row)) = r else {
                             continue;
                         };
                         if key.null_bitmap().is_subset(&null_matched)
-                            && let join_entry = self.right_table.lookup(&key, epoch).await?
+                            && let join_entry = self.right_table.force_peek(&key)
                             && !join_entry.is_empty()
                         {
                             for right_row in join_entry.cached.values() {
@@ -455,8 +443,6 @@ impl<K: HashKey, S: StateStore, const T: JoinTypePrimitive> TemporalJoinExecutor
                                     }
                                 }
                             }
-                            // Insert back the state taken from ht.
-                            self.right_table.insert_back(key.clone(), join_entry);
                         } else if T == JoinType::LeftOuter {
                             if let Some(chunk) = builder.append_row_update(op, left_row) {
                                 yield Message::Chunk(chunk);

From 6033ee6c2a63bbb2d3c5147987c8e08b3b010de2 Mon Sep 17 00:00:00 2001
From: Noel Kwan <47273164+kwannoel@users.noreply.github.com>
Date: Fri, 23 Feb 2024 15:02:47 +0800
Subject: [PATCH 24/24] feat(cmd_all): create directories in `single_node` mode
 (#15176)

---
 src/cmd_all/src/bin/risingwave.rs |  1 +
 src/cmd_all/src/single_node.rs    | 16 +++++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/cmd_all/src/bin/risingwave.rs b/src/cmd_all/src/bin/risingwave.rs
index 2c167fc1bdc20..e9173abefe1df 100644
--- a/src/cmd_all/src/bin/risingwave.rs
+++ b/src/cmd_all/src/bin/risingwave.rs
@@ -239,6 +239,7 @@ fn standalone(opts: StandaloneOpts) {
 /// high level options to standalone mode node-level options.
 /// We will start a standalone instance, with all nodes in the same process.
 fn single_node(opts: SingleNodeOpts) {
+    opts.create_store_directories().unwrap();
     let opts = risingwave_cmd_all::map_single_node_opts_to_standalone_opts(&opts);
     let settings = risingwave_rt::LoggerSettings::from_opts(&opts)
         .with_target("risingwave_storage", Level::WARN)
diff --git a/src/cmd_all/src/single_node.rs b/src/cmd_all/src/single_node.rs
index b89f861f6e4fd..042a0feee9863 100644
--- a/src/cmd_all/src/single_node.rs
+++ b/src/cmd_all/src/single_node.rs
@@ -14,6 +14,7 @@
 
 use std::sync::LazyLock;
 
+use anyhow::Result;
 use clap::Parser;
 use home::home_dir;
 use risingwave_common::config::{AsyncStackTraceOption, MetaBackend};
@@ -64,7 +65,7 @@ pub struct SingleNodeOpts {
 
     /// The store directory used by meta store and object store.
     #[clap(long, env = "RW_SINGLE_NODE_STORE_DIRECTORY")]
-    store_directory: Option<String>,
+    pub store_directory: Option<String>,
 
     /// The address of the meta node.
     #[clap(long, env = "RW_SINGLE_NODE_META_ADDR")]
@@ -142,6 +143,7 @@ pub fn map_single_node_opts_to_standalone_opts(opts: &SingleNodeOpts) -> ParsedS
     }
 }
 
+// Defaults
 impl SingleNodeOpts {
     fn default_frontend_opts() -> FrontendOpts {
         FrontendOpts {
@@ -227,3 +229,15 @@ impl SingleNodeOpts {
         }
     }
 }
+
+impl SingleNodeOpts {
+    pub fn create_store_directories(&self) -> Result<()> {
+        let store_directory = self
+            .store_directory
+            .as_ref()
+            .unwrap_or_else(|| &*DEFAULT_STORE_DIRECTORY);
+        std::fs::create_dir_all(format!("{}/meta_store", store_directory))?;
+        std::fs::create_dir_all(format!("{}/state_store", store_directory))?;
+        Ok(())
+    }
+}