From 55f222d807a2dc869de3d45d883f4a54c8df435c Mon Sep 17 00:00:00 2001
From: xxchan <xxchan22f@gmail.com>
Date: Tue, 2 Jan 2024 15:27:17 +0800
Subject: [PATCH] feat: add kafka backfill frontend

---
 .git-blame-ignore-revs                        |   3 +
 Cargo.lock                                    |   1 +
 proto/catalog.proto                           |  13 +-
 proto/stream_plan.proto                       |  19 +
 src/common/src/util/iter_util.rs              |  26 +
 src/common/src/util/stream_graph_visitor.rs   |   3 +
 src/connector/src/source/base.rs              |   4 +
 src/connector/src/source/cdc/mod.rs           |   2 +-
 .../src/binder/relation/table_or_source.rs    |   6 +
 src/frontend/src/handler/create_source.rs     |  17 +-
 .../src/optimizer/plan_node/logical_source.rs |   2 +
 .../plan_node/logical_source_backfill.rs      | 485 ++++++++++++++++++
 src/frontend/src/optimizer/plan_node/mod.rs   |  15 +-
 .../plan_node/stream_cdc_table_scan.rs        |   9 +-
 .../plan_node/stream_source_backfill.rs       | 189 +++++++
 src/frontend/src/planner/relation.rs          |  12 +-
 src/frontend/src/stream_fragmenter/mod.rs     |  12 +-
 src/meta/src/barrier/command.rs               |  30 +-
 src/meta/src/barrier/mod.rs                   |   1 +
 src/meta/src/barrier/progress.rs              |   8 +
 src/meta/src/barrier/schedule.rs              |   4 +
 src/meta/src/controller/catalog.rs            |   4 +-
 src/meta/src/controller/fragment.rs           |  46 +-
 src/meta/src/controller/streaming_job.rs      |   3 +-
 src/meta/src/manager/catalog/database.rs      |   6 +-
 src/meta/src/manager/catalog/fragment.rs      |   6 +-
 src/meta/src/manager/catalog/mod.rs           |   9 +-
 src/meta/src/manager/metadata.rs              |   4 +-
 src/meta/src/model/stream.rs                  |  57 +-
 src/meta/src/rpc/ddl_controller.rs            |   9 +-
 src/meta/src/stream/scale.rs                  |   2 +-
 src/meta/src/stream/source_manager.rs         | 231 ++++++---
 src/meta/src/stream/stream_graph/actor.rs     |  41 +-
 src/meta/src/stream/stream_graph/fragment.rs  | 145 ++++--
 src/meta/src/stream/stream_graph/schedule.rs  |   1 +
 src/meta/src/stream/stream_manager.rs         |  19 +-
 src/prost/src/lib.rs                          |  55 ++
 src/stream/Cargo.toml                         |   3 +-
 src/stream/src/executor/exchange/output.rs    |  11 +-
 .../src/executor/source/executor_core.rs      |   3 +
 .../src/executor/source/fetch_executor.rs     |   2 +-
 .../src/executor/source/fs_source_executor.rs |   2 +-
 .../source/kafka_backfill_executor.rs         | 417 +++++----------
 .../source/kafka_backfill_state_table.rs      | 132 +++++
 src/stream/src/executor/source/mod.rs         |   2 +
 .../src/executor/source/source_executor.rs    |  14 +-
 .../executor/source/state_table_handler.rs    |  43 +-
 src/stream/src/from_proto/mod.rs              |   3 +
 src/stream/src/from_proto/source_backfill.rs  | 170 ++++++
 49 files changed, 1723 insertions(+), 578 deletions(-)
 create mode 100644 src/frontend/src/optimizer/plan_node/logical_source_backfill.rs
 create mode 100644 src/frontend/src/optimizer/plan_node/stream_source_backfill.rs
 create mode 100644 src/stream/src/executor/source/kafka_backfill_state_table.rs
 create mode 100644 src/stream/src/from_proto/source_backfill.rs

diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index 6efd862273624..b8ca322d767a8 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -39,3 +39,6 @@ d70dba827c303373f3220c9733f7c7443e5c2d37
 
 # chore: cargo +nightly fmt (#13162) (format let-chains)
 c583e2c6c054764249acf484438c7bf7197765f4
+
+# chore: replace all ProstXxx with PbXxx (#8621)
+6fd8821f2e053957b183d648bea9c95b6703941f
diff --git a/Cargo.lock b/Cargo.lock
index 05e3bf4a9c34b..e3d41a4f35efd 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -9915,6 +9915,7 @@ dependencies = [
  "auto_enums",
  "await-tree",
  "bytes",
+ "cfg-if",
  "criterion",
  "delta_btree_map",
  "educe 0.5.7",
diff --git a/proto/catalog.proto b/proto/catalog.proto
index ec7c68a3802ba..f376d4dc3bed7 100644
--- a/proto/catalog.proto
+++ b/proto/catalog.proto
@@ -62,9 +62,16 @@ message StreamSourceInfo {
   SchemaRegistryNameStrategy name_strategy = 10;
   optional string key_message_name = 11;
   plan_common.ExternalTableDesc external_table = 12;
-  // Whether the stream source is a cdc source streaming job.
-  // We need this field to differentiate the cdc source job until we fully implement risingwavelabs/rfcs#72.
-  bool cdc_source_job = 13;
+  // Whether the stream source has a streaming job.
+  // This is related with [RFC: Reusable Source Executor](https://github.com/risingwavelabs/rfcs/pull/72).
+  // Currently, the following sources have streaming jobs:
+  // - Direct CDC sources (mysql & postgresql)
+  // - MQ sources (Kafka, Pulsar, Kinesis, etc.)
+  bool has_streaming_job = 13;
+  // Only used when `has_streaming_job` is `true`.
+  // If `false`, `requires_singleton` will be set in the stream plan.
+  bool is_distributed = 15;
+  reserved "cdc_source_job"; // deprecated
   // Options specified by user in the FORMAT ENCODE clause.
   map<string, string> format_encode_options = 14;
 }
diff --git a/proto/stream_plan.proto b/proto/stream_plan.proto
index 0ccf6718ecad3..5101122f47372 100644
--- a/proto/stream_plan.proto
+++ b/proto/stream_plan.proto
@@ -205,6 +205,23 @@ message StreamFsFetchNode {
   StreamFsFetch node_inner = 1;
 }
 
+message SourceBackfillNode {
+  uint32 source_id = 1;
+  optional uint32 row_id_index = 3;
+  // XXX: is this all columns or only required columns?
+  repeated plan_common.ColumnCatalog columns = 4;
+  catalog.StreamSourceInfo info = 7;
+  string source_name = 8;
+  map<string, string> with_properties = 6;
+  // Streaming rate limit
+  // optional uint32 rate_limit = 9;
+
+  // fields above are the same as StreamSource
+
+  // `| partition_id | backfill_progress |`
+  catalog.Table state_table = 2;
+}
+
 message SinkDesc {
   reserved 4;
   reserved "columns";
@@ -758,6 +775,7 @@ message StreamNode {
     StreamFsFetchNode stream_fs_fetch = 138;
     StreamCdcScanNode stream_cdc_scan = 139;
     CdcFilterNode cdc_filter = 140;
+    SourceBackfillNode source_backfill = 141;
   }
   // The id for the operator. This is local per mview.
   // TODO: should better be a uint32.
@@ -852,6 +870,7 @@ enum FragmentTypeFlag {
   FRAGMENT_TYPE_FLAG_VALUES = 64;
   FRAGMENT_TYPE_FLAG_DML = 128;
   FRAGMENT_TYPE_FLAG_CDC_FILTER = 256;
+  FRAGMENT_TYPE_FLAG_SOURCE_BACKFILL = 512;
 }
 
 // The streaming context associated with a stream plan
diff --git a/src/common/src/util/iter_util.rs b/src/common/src/util/iter_util.rs
index 92f19a0ee46fc..7588171ad2f73 100644
--- a/src/common/src/util/iter_util.rs
+++ b/src/common/src/util/iter_util.rs
@@ -54,3 +54,29 @@ where
 {
     a.into_iter().zip_eq_fast(b)
 }
+
+pub trait IntoIteratorExt
+where
+    for<'a> &'a Self: IntoIterator,
+{
+    /// Shorter version of `self.iter().map(f).collect()`.
+    fn map_collect<A, B, F, BCollection>(&self, f: F) -> BCollection
+    where
+        F: FnMut(&A) -> B,
+        for<'a> &'a Self: IntoIterator<Item = &'a A>,
+        BCollection: FromIterator<B>,
+    {
+        self.into_iter().map(f).collect()
+    }
+
+    /// Shorter version of `self.iter().map(f).collect_vec()`.
+    fn map_to_vec<A, B, F>(&self, f: F) -> Vec<B>
+    where
+        F: FnMut(&A) -> B,
+        for<'a> &'a Self: IntoIterator<Item = &'a A>,
+    {
+        self.map_collect(f)
+    }
+}
+
+impl<T> IntoIteratorExt for T where for<'a> &'a Self: IntoIterator {}
diff --git a/src/common/src/util/stream_graph_visitor.rs b/src/common/src/util/stream_graph_visitor.rs
index ce2820752f120..c9518a03c2623 100644
--- a/src/common/src/util/stream_graph_visitor.rs
+++ b/src/common/src/util/stream_graph_visitor.rs
@@ -187,6 +187,9 @@ pub fn visit_stream_node_tables_inner<F>(
                     always!(source.state_table, "FsFetch");
                 }
             }
+            NodeBody::SourceBackfill(node) => {
+                always!(node.state_table, "SourceBackfill")
+            }
 
             // Sink
             NodeBody::Sink(node) => {
diff --git a/src/connector/src/source/base.rs b/src/connector/src/source/base.rs
index a6ddb359bacdb..6583aa5a49d1c 100644
--- a/src/connector/src/source/base.rs
+++ b/src/connector/src/source/base.rs
@@ -72,8 +72,10 @@ pub trait SourceProperties: TryFromHashmap + Clone + WithOptions {
     type SplitEnumerator: SplitEnumerator<Properties = Self, Split = Self::Split>;
     type SplitReader: SplitReader<Split = Self::Split, Properties = Self>;
 
+    /// Load additional info from `PbSource`. Currently only used by CDC.
     fn init_from_pb_source(&mut self, _source: &PbSource) {}
 
+    /// Load additional info from `ExternalTableDesc`. Currently only used by CDC.
     fn init_from_pb_cdc_table_desc(&mut self, _table_desc: &ExternalTableDesc) {}
 }
 
@@ -447,10 +449,12 @@ impl ConnectorProperties {
         matches!(self, ConnectorProperties::Kinesis(_))
     }
 
+    /// Load additional info from `PbSource`. Currently only used by CDC.
     pub fn init_from_pb_source(&mut self, source: &PbSource) {
         dispatch_source_prop!(self, prop, prop.init_from_pb_source(source))
     }
 
+    /// Load additional info from `ExternalTableDesc`. Currently only used by CDC.
     pub fn init_from_pb_cdc_table_desc(&mut self, cdc_table_desc: &ExternalTableDesc) {
         dispatch_source_prop!(self, prop, prop.init_from_pb_cdc_table_desc(cdc_table_desc))
     }
diff --git a/src/connector/src/source/cdc/mod.rs b/src/connector/src/source/cdc/mod.rs
index b3a2bc6554c60..03dc99ec6a9fd 100644
--- a/src/connector/src/source/cdc/mod.rs
+++ b/src/connector/src/source/cdc/mod.rs
@@ -136,7 +136,7 @@ where
         };
         self.table_schema = table_schema;
         if let Some(info) = source.info.as_ref() {
-            self.is_multi_table_shared = info.cdc_source_job;
+            self.is_multi_table_shared = info.has_streaming_job;
         }
     }
 
diff --git a/src/frontend/src/binder/relation/table_or_source.rs b/src/frontend/src/binder/relation/table_or_source.rs
index 8c16f14d7ce71..17c16b16dd5fc 100644
--- a/src/frontend/src/binder/relation/table_or_source.rs
+++ b/src/frontend/src/binder/relation/table_or_source.rs
@@ -58,6 +58,12 @@ impl From<&SourceCatalog> for BoundSource {
     }
 }
 
+impl BoundSource {
+    pub fn can_backfill(&self) -> bool {
+        self.catalog.info.has_streaming_job
+    }
+}
+
 impl Binder {
     /// Binds table or source, or logical view according to what we get from the catalog.
     pub fn bind_relation_by_name_inner(
diff --git a/src/frontend/src/handler/create_source.rs b/src/frontend/src/handler/create_source.rs
index 7721162eb17f9..664cdb1a650fb 100644
--- a/src/frontend/src/handler/create_source.rs
+++ b/src/frontend/src/handler/create_source.rs
@@ -474,7 +474,7 @@ fn bind_columns_from_source_for_cdc(
         row_encode: row_encode_to_prost(&source_schema.row_encode) as i32,
         format_encode_options,
         use_schema_registry: json_schema_infer_use_schema_registry(&schema_config),
-        cdc_source_job: true,
+        has_streaming_job: true,
         ..Default::default()
     };
     if !format_encode_options_to_consume.is_empty() {
@@ -1130,18 +1130,22 @@ pub async fn handle_create_source(
     ensure_table_constraints_supported(&stmt.constraints)?;
     let sql_pk_names = bind_sql_pk_names(&stmt.columns, &stmt.constraints)?;
 
-    // gated the feature with a session variable
     let create_cdc_source_job = if is_cdc_connector(&with_properties) {
         CdcTableType::from_properties(&with_properties).can_backfill()
     } else {
         false
     };
+    let has_streaming_job = create_cdc_source_job || is_kafka_connector(&with_properties);
 
-    let (columns_from_resolve_source, source_info) = if create_cdc_source_job {
+    let (columns_from_resolve_source, mut source_info) = if create_cdc_source_job {
         bind_columns_from_source_for_cdc(&session, &source_schema, &with_properties)?
     } else {
         bind_columns_from_source(&session, &source_schema, &with_properties).await?
     };
+    if has_streaming_job {
+        source_info.has_streaming_job = true;
+        source_info.is_distributed = !create_cdc_source_job;
+    }
     let columns_from_sql = bind_sql_columns(&stmt.columns)?;
 
     let mut columns = bind_all_columns(
@@ -1235,21 +1239,18 @@ pub async fn handle_create_source(
 
     let catalog_writer = session.catalog_writer()?;
 
-    if create_cdc_source_job {
-        // create a streaming job for the cdc source, which will mark as *singleton* in the Fragmenter
+    if has_streaming_job {
         let graph = {
             let context = OptimizerContext::from_handler_args(handler_args);
-            // cdc source is an append-only source in plain json format
             let source_node = LogicalSource::new(
                 Some(Rc::new(SourceCatalog::from(&source))),
                 columns.clone(),
                 row_id_index,
-                false,
+                false, // Do not gen RowID. Gen RowID after backfill node instead.
                 false,
                 context.into(),
             )?;
 
-            // generate stream graph for cdc source job
             let stream_plan = source_node.to_stream(&mut ToStreamContext::new(false))?;
             let mut graph = build_graph(stream_plan)?;
             graph.parallelism =
diff --git a/src/frontend/src/optimizer/plan_node/logical_source.rs b/src/frontend/src/optimizer/plan_node/logical_source.rs
index f86f31c1e0765..08c8ae1059c0e 100644
--- a/src/frontend/src/optimizer/plan_node/logical_source.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_source.rs
@@ -565,6 +565,8 @@ impl ToStream for LogicalSource {
         {
             plan_prefix = Some(self.rewrite_new_s3_plan()?);
         }
+
+        // TODO: after SourceBackfill is added, we shouldn't put generated columns/row id here, and put them after backfill instead.
         plan = if self.core.for_table {
             dispatch_new_s3_plan(self.rewrite_to_stream_batch_source(), plan_prefix)
         } else {
diff --git a/src/frontend/src/optimizer/plan_node/logical_source_backfill.rs b/src/frontend/src/optimizer/plan_node/logical_source_backfill.rs
new file mode 100644
index 0000000000000..a713011710c6a
--- /dev/null
+++ b/src/frontend/src/optimizer/plan_node/logical_source_backfill.rs
@@ -0,0 +1,485 @@
+// Copyright 2024 RisingWave Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::cmp::{max, min};
+use std::ops::Bound;
+use std::ops::Bound::{Excluded, Included, Unbounded};
+use std::rc::Rc;
+
+
+use itertools::Itertools;
+use pretty_xmlish::{Pretty, XmlNode};
+use risingwave_common::catalog::{
+    ColumnCatalog, Schema, KAFKA_TIMESTAMP_COLUMN_NAME,
+};
+use risingwave_common::error::Result;
+use risingwave_connector::source::DataType;
+use risingwave_pb::plan_common::column_desc::GeneratedOrDefaultColumn;
+use risingwave_pb::plan_common::GeneratedColumnDesc;
+
+use super::generic::GenericPlanRef;
+use super::stream_watermark_filter::StreamWatermarkFilter;
+use super::utils::{childless_record, Distill};
+use super::{
+    generic, BatchSource, ColPrunable, ExprRewritable, Logical, LogicalFilter,
+    LogicalProject, PlanBase, PlanRef, PredicatePushdown, StreamRowIdGen, ToBatch, ToStream,
+};
+use crate::catalog::source_catalog::SourceCatalog;
+use crate::expr::{Expr, ExprImpl, ExprRewriter, ExprType, ExprVisitor, InputRef};
+use crate::optimizer::optimizer_context::OptimizerContextRef;
+use crate::optimizer::plan_node::expr_visitable::ExprVisitable;
+
+use crate::optimizer::plan_node::utils::column_names_pretty;
+use crate::optimizer::plan_node::{
+    ColumnPruningContext, PredicatePushdownContext, RewriteStreamContext,
+    StreamSourceBackfill, ToStreamContext,
+};
+use crate::optimizer::property::Distribution::HashShard;
+
+use crate::utils::{ColIndexMapping, Condition, IndexRewriter};
+
+/// `LogicalSourceBackfill` returns contents of a table or other equivalent object
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct LogicalSourceBackfill {
+    pub base: PlanBase<Logical>,
+    pub core: generic::Source,
+
+    // TODO: generated columns aren't handled yet.
+    /// Expressions to output. This field presents and will be turned to a `Project` when
+    /// converting to a physical plan, only if there are generated columns.
+    output_exprs: Option<Vec<ExprImpl>>,
+}
+
+impl LogicalSourceBackfill {
+    pub fn new(
+        source_catalog: Option<Rc<SourceCatalog>>,
+        column_catalog: Vec<ColumnCatalog>,
+        row_id_index: Option<usize>,
+        gen_row_id: bool,
+        for_table: bool,
+        ctx: OptimizerContextRef,
+    ) -> Result<Self> {
+        let kafka_timestamp_range = (Bound::Unbounded, Bound::Unbounded);
+        let core = generic::Source {
+            catalog: source_catalog,
+            column_catalog,
+            row_id_index,
+            gen_row_id,
+            for_table,
+            ctx,
+            kafka_timestamp_range,
+        };
+
+        let base = PlanBase::new_logical_with_core(&core);
+
+        let output_exprs = Self::derive_output_exprs_from_generated_columns(&core.column_catalog)?;
+
+        Ok(LogicalSourceBackfill {
+            base,
+            core,
+            output_exprs,
+        })
+    }
+
+    pub fn with_catalog(
+        source_catalog: Rc<SourceCatalog>,
+        for_table: bool,
+        ctx: OptimizerContextRef,
+    ) -> Result<Self> {
+        let column_catalogs = source_catalog.columns.clone();
+        let row_id_index = source_catalog.row_id_index;
+        let gen_row_id = source_catalog.append_only;
+
+        Self::new(
+            Some(source_catalog),
+            column_catalogs,
+            row_id_index,
+            gen_row_id,
+            for_table,
+            ctx,
+        )
+    }
+
+    pub fn derive_output_exprs_from_generated_columns(
+        columns: &[ColumnCatalog],
+    ) -> Result<Option<Vec<ExprImpl>>> {
+        if !columns.iter().any(|c| c.is_generated()) {
+            return Ok(None);
+        }
+
+        let col_mapping = {
+            let mut mapping = vec![None; columns.len()];
+            let mut cur = 0;
+            for (idx, column) in columns.iter().enumerate() {
+                if !column.is_generated() {
+                    mapping[idx] = Some(cur);
+                    cur += 1;
+                } else {
+                    mapping[idx] = None;
+                }
+            }
+            ColIndexMapping::new(mapping, columns.len())
+        };
+
+        let mut rewriter = IndexRewriter::new(col_mapping);
+        let mut exprs = Vec::with_capacity(columns.len());
+        let mut cur = 0;
+        for column in columns {
+            let column_desc = &column.column_desc;
+            let ret_data_type = column_desc.data_type.clone();
+
+            if let Some(GeneratedOrDefaultColumn::GeneratedColumn(generated_column)) =
+                &column_desc.generated_or_default_column
+            {
+                let GeneratedColumnDesc { expr } = generated_column;
+                // TODO(yuhao): avoid this `from_expr_proto`.
+                let proj_expr =
+                    rewriter.rewrite_expr(ExprImpl::from_expr_proto(expr.as_ref().unwrap())?);
+                let casted_expr = proj_expr.cast_assign(ret_data_type)?;
+                exprs.push(casted_expr);
+            } else {
+                let input_ref = InputRef {
+                    data_type: ret_data_type,
+                    index: cur,
+                };
+                cur += 1;
+                exprs.push(ExprImpl::InputRef(Box::new(input_ref)));
+            }
+        }
+
+        Ok(Some(exprs))
+    }
+
+    /// `row_id_index` in source node should rule out generated column
+    #[must_use]
+    fn rewrite_row_id_idx(columns: &[ColumnCatalog], row_id_index: Option<usize>) -> Option<usize> {
+        row_id_index.map(|idx| {
+            let mut cnt = 0;
+            for col in columns.iter().take(idx + 1) {
+                if col.is_generated() {
+                    cnt += 1;
+                }
+            }
+            idx - cnt
+        })
+    }
+
+    pub fn source_catalog(&self) -> Rc<SourceCatalog> {
+        self.core
+            .catalog
+            .clone()
+            .expect("source catalog should exist for LogicalSourceBackfill")
+    }
+
+    fn clone_with_kafka_timestamp_range(&self, range: (Bound<i64>, Bound<i64>)) -> Self {
+        let mut core = self.core.clone();
+        core.kafka_timestamp_range = range;
+        Self {
+            base: self.base.clone(),
+            core,
+            output_exprs: self.output_exprs.clone(),
+        }
+    }
+
+    /// The columns in stream/batch source node indicate the actual columns it will produce,
+    /// instead of the columns defined in source catalog. The difference is generated columns.
+    #[must_use]
+    fn rewrite_to_stream_batch_source(&self) -> generic::Source {
+        let column_catalog = self.core.column_catalog.clone();
+        // Filter out the generated columns.
+        let row_id_index = Self::rewrite_row_id_idx(&column_catalog, self.core.row_id_index);
+        let source_column_catalogs = column_catalog
+            .into_iter()
+            .filter(|c| !c.is_generated())
+            .collect_vec();
+        generic::Source {
+            catalog: self.core.catalog.clone(),
+            column_catalog: source_column_catalogs,
+            row_id_index,
+            ctx: self.core.ctx.clone(),
+            ..self.core
+        }
+    }
+}
+
+impl_plan_tree_node_for_leaf! {LogicalSourceBackfill}
+impl Distill for LogicalSourceBackfill {
+    fn distill<'a>(&self) -> XmlNode<'a> {
+        let src = Pretty::from(self.source_catalog().name.clone());
+        let time = Pretty::debug(&self.core.kafka_timestamp_range);
+        let fields = vec![
+            ("source", src),
+            ("columns", column_names_pretty(self.schema())),
+            ("time_range", time),
+        ];
+
+        childless_record("LogicalSourceBackfill", fields)
+    }
+}
+
+impl ColPrunable for LogicalSourceBackfill {
+    fn prune_col(&self, required_cols: &[usize], _ctx: &mut ColumnPruningContext) -> PlanRef {
+        let mapping = ColIndexMapping::with_remaining_columns(required_cols, self.schema().len());
+        LogicalProject::with_mapping(self.clone().into(), mapping).into()
+    }
+}
+
+impl ExprRewritable for LogicalSourceBackfill {
+    fn has_rewritable_expr(&self) -> bool {
+        self.output_exprs.is_some()
+    }
+
+    fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef {
+        let mut output_exprs = self.output_exprs.clone();
+
+        for expr in output_exprs.iter_mut().flatten() {
+            *expr = r.rewrite_expr(expr.clone());
+        }
+
+        Self {
+            output_exprs,
+            ..self.clone()
+        }
+        .into()
+    }
+}
+
+impl ExprVisitable for LogicalSourceBackfill {
+    fn visit_exprs(&self, v: &mut dyn ExprVisitor) {
+        self.output_exprs
+            .iter()
+            .flatten()
+            .for_each(|e| v.visit_expr(e));
+    }
+}
+
+/// A util function to extract kafka offset timestamp range.
+///
+/// Currently we only support limiting kafka offset timestamp range using literals, e.g. we only
+/// support expressions like `_rw_kafka_timestamp <= '2022-10-11 1:00:00+00:00'`.
+///
+/// # Parameters
+///
+/// * `expr`: Expression to be consumed.
+/// * `range`: Original timestamp range, if `expr` can be recognized, we will update `range`.
+/// * `schema`: Input schema.
+///
+/// # Return Value
+///
+/// If `expr` can be recognized and consumed by this function, then we return `None`.
+/// Otherwise `expr` is returned.
+fn expr_to_kafka_timestamp_range(
+    expr: ExprImpl,
+    range: &mut (Bound<i64>, Bound<i64>),
+    schema: &Schema,
+) -> Option<ExprImpl> {
+    let merge_upper_bound = |first, second| -> Bound<i64> {
+        match (first, second) {
+            (first, Unbounded) => first,
+            (Unbounded, second) => second,
+            (Included(f1), Included(f2)) => Included(min(f1, f2)),
+            (Included(f1), Excluded(f2)) => {
+                if f1 < f2 {
+                    Included(f1)
+                } else {
+                    Excluded(f2)
+                }
+            }
+            (Excluded(f1), Included(f2)) => {
+                if f2 < f1 {
+                    Included(f2)
+                } else {
+                    Excluded(f1)
+                }
+            }
+            (Excluded(f1), Excluded(f2)) => Excluded(min(f1, f2)),
+        }
+    };
+
+    let merge_lower_bound = |first, second| -> Bound<i64> {
+        match (first, second) {
+            (first, Unbounded) => first,
+            (Unbounded, second) => second,
+            (Included(f1), Included(f2)) => Included(max(f1, f2)),
+            (Included(f1), Excluded(f2)) => {
+                if f1 > f2 {
+                    Included(f1)
+                } else {
+                    Excluded(f2)
+                }
+            }
+            (Excluded(f1), Included(f2)) => {
+                if f2 > f1 {
+                    Included(f2)
+                } else {
+                    Excluded(f1)
+                }
+            }
+            (Excluded(f1), Excluded(f2)) => Excluded(max(f1, f2)),
+        }
+    };
+
+    let extract_timestampz_literal = |expr: &ExprImpl| -> Result<Option<(i64, bool)>> {
+        match expr {
+            ExprImpl::FunctionCall(function_call) if function_call.inputs().len() == 2 => {
+                match (&function_call.inputs()[0], &function_call.inputs()[1]) {
+                    (ExprImpl::InputRef(input_ref), literal)
+                        if let Some(datum) = literal.try_fold_const().transpose()?
+                            && schema.fields[input_ref.index].name
+                                == KAFKA_TIMESTAMP_COLUMN_NAME
+                            && literal.return_type() == DataType::Timestamptz =>
+                    {
+                        Ok(Some((
+                            datum.unwrap().into_timestamptz().timestamp_millis(),
+                            false,
+                        )))
+                    }
+                    (literal, ExprImpl::InputRef(input_ref))
+                        if let Some(datum) = literal.try_fold_const().transpose()?
+                            && schema.fields[input_ref.index].name
+                                == KAFKA_TIMESTAMP_COLUMN_NAME
+                            && literal.return_type() == DataType::Timestamptz =>
+                    {
+                        Ok(Some((
+                            datum.unwrap().into_timestamptz().timestamp_millis(),
+                            true,
+                        )))
+                    }
+                    _ => Ok(None),
+                }
+            }
+            _ => Ok(None),
+        }
+    };
+
+    match &expr {
+        ExprImpl::FunctionCall(function_call) => {
+            if let Ok(Some((timestampz_literal, reverse))) = extract_timestampz_literal(&expr) {
+                match function_call.func_type() {
+                    ExprType::GreaterThan => {
+                        if reverse {
+                            range.1 = merge_upper_bound(range.1, Excluded(timestampz_literal));
+                        } else {
+                            range.0 = merge_lower_bound(range.0, Excluded(timestampz_literal));
+                        }
+
+                        None
+                    }
+                    ExprType::GreaterThanOrEqual => {
+                        if reverse {
+                            range.1 = merge_upper_bound(range.1, Included(timestampz_literal));
+                        } else {
+                            range.0 = merge_lower_bound(range.0, Included(timestampz_literal));
+                        }
+                        None
+                    }
+                    ExprType::Equal => {
+                        range.0 = merge_lower_bound(range.0, Included(timestampz_literal));
+                        range.1 = merge_upper_bound(range.1, Included(timestampz_literal));
+                        None
+                    }
+                    ExprType::LessThan => {
+                        if reverse {
+                            range.0 = merge_lower_bound(range.0, Excluded(timestampz_literal));
+                        } else {
+                            range.1 = merge_upper_bound(range.1, Excluded(timestampz_literal));
+                        }
+                        None
+                    }
+                    ExprType::LessThanOrEqual => {
+                        if reverse {
+                            range.0 = merge_lower_bound(range.0, Included(timestampz_literal));
+                        } else {
+                            range.1 = merge_upper_bound(range.1, Included(timestampz_literal));
+                        }
+                        None
+                    }
+                    _ => Some(expr),
+                }
+            } else {
+                Some(expr)
+            }
+        }
+        _ => Some(expr),
+    }
+}
+
+impl PredicatePushdown for LogicalSourceBackfill {
+    fn predicate_pushdown(
+        &self,
+        predicate: Condition,
+        _ctx: &mut PredicatePushdownContext,
+    ) -> PlanRef {
+        let mut range = self.core.kafka_timestamp_range;
+
+        let mut new_conjunctions = Vec::with_capacity(predicate.conjunctions.len());
+        for expr in predicate.conjunctions {
+            if let Some(e) = expr_to_kafka_timestamp_range(expr, &mut range, self.base.schema()) {
+                // Not recognized, so push back
+                new_conjunctions.push(e);
+            }
+        }
+
+        let new_source = self.clone_with_kafka_timestamp_range(range).into();
+
+        if new_conjunctions.is_empty() {
+            new_source
+        } else {
+            LogicalFilter::create(
+                new_source,
+                Condition {
+                    conjunctions: new_conjunctions,
+                },
+            )
+        }
+    }
+}
+
+impl ToBatch for LogicalSourceBackfill {
+    fn to_batch(&self) -> Result<PlanRef> {
+        // TODO:
+        let source = BatchSource::new(self.core.clone());
+        Ok(source.into())
+    }
+}
+
+impl ToStream for LogicalSourceBackfill {
+    fn to_stream(&self, _ctx: &mut ToStreamContext) -> Result<PlanRef> {
+        let mut plan = StreamSourceBackfill::new(self.rewrite_to_stream_batch_source()).into();
+
+        let catalog = self.source_catalog();
+        if !catalog.watermark_descs.is_empty() && !self.core.for_table {
+            plan = StreamWatermarkFilter::new(plan, catalog.watermark_descs.clone()).into();
+        }
+
+        assert!(!(self.core.gen_row_id && self.core.for_table));
+        if let Some(row_id_index) = self.core.row_id_index
+            && self.core.gen_row_id
+        {
+            plan = StreamRowIdGen::new_with_dist(plan, row_id_index, HashShard(vec![row_id_index]))
+                .into();
+        }
+        Ok(plan)
+    }
+
+    fn logical_rewrite_for_stream(
+        &self,
+        _ctx: &mut RewriteStreamContext,
+    ) -> Result<(PlanRef, ColIndexMapping)> {
+        Ok((
+            self.clone().into(),
+            ColIndexMapping::identity(self.schema().len()),
+        ))
+    }
+}
diff --git a/src/frontend/src/optimizer/plan_node/mod.rs b/src/frontend/src/optimizer/plan_node/mod.rs
index a0475c4ae092e..977d4c00b16a6 100644
--- a/src/frontend/src/optimizer/plan_node/mod.rs
+++ b/src/frontend/src/optimizer/plan_node/mod.rs
@@ -676,8 +676,8 @@ impl dyn PlanNode {
 impl dyn PlanNode {
     /// Serialize the plan node and its children to a stream plan proto.
     ///
-    /// Note that [`StreamTableScan`] has its own implementation of `to_stream_prost`. We have a
-    /// hook inside to do some ad-hoc thing for [`StreamTableScan`].
+    /// Note that some operators has their own implementation of `to_stream_prost`. We have a
+    /// hook inside to do some ad-hoc things.
     pub fn to_stream_prost(
         &self,
         state: &mut BuildFragmentGraphState,
@@ -690,6 +690,9 @@ impl dyn PlanNode {
         if let Some(stream_cdc_table_scan) = self.as_stream_cdc_table_scan() {
             return stream_cdc_table_scan.adhoc_to_stream_prost(state);
         }
+        if let Some(stream_source_backfill) = self.as_stream_source_backfill() {
+            return stream_source_backfill.adhoc_to_stream_prost(state);
+        }
         if let Some(stream_share) = self.as_stream_share() {
             return stream_share.adhoc_to_stream_prost(state);
         }
@@ -824,6 +827,7 @@ mod logical_project_set;
 mod logical_scan;
 mod logical_share;
 mod logical_source;
+mod logical_source_backfill;
 mod logical_sys_scan;
 mod logical_table_function;
 mod logical_topn;
@@ -853,6 +857,7 @@ mod stream_simple_agg;
 mod stream_sink;
 mod stream_sort;
 mod stream_source;
+mod stream_source_backfill;
 mod stream_stateless_simple_agg;
 mod stream_table_scan;
 mod stream_topn;
@@ -915,6 +920,7 @@ pub use logical_project_set::LogicalProjectSet;
 pub use logical_scan::LogicalScan;
 pub use logical_share::LogicalShare;
 pub use logical_source::LogicalSource;
+pub use logical_source_backfill::LogicalSourceBackfill;
 pub use logical_sys_scan::LogicalSysScan;
 pub use logical_table_function::LogicalTableFunction;
 pub use logical_topn::LogicalTopN;
@@ -946,6 +952,7 @@ pub use stream_simple_agg::StreamSimpleAgg;
 pub use stream_sink::StreamSink;
 pub use stream_sort::StreamEowcSort;
 pub use stream_source::StreamSource;
+pub use stream_source_backfill::StreamSourceBackfill;
 pub use stream_stateless_simple_agg::StreamStatelessSimpleAgg;
 pub use stream_table_scan::StreamTableScan;
 pub use stream_temporal_join::StreamTemporalJoin;
@@ -987,6 +994,7 @@ macro_rules! for_all_plan_nodes {
             , { Logical, CdcScan }
             , { Logical, SysScan }
             , { Logical, Source }
+            , { Logical, SourceBackfill }
             , { Logical, Insert }
             , { Logical, Delete }
             , { Logical, Update }
@@ -1040,6 +1048,7 @@ macro_rules! for_all_plan_nodes {
             , { Stream, CdcTableScan }
             , { Stream, Sink }
             , { Stream, Source }
+            , { Stream, SourceBackfill }
             , { Stream, HashJoin }
             , { Stream, Exchange }
             , { Stream, HashAgg }
@@ -1083,6 +1092,7 @@ macro_rules! for_logical_plan_nodes {
             , { Logical, CdcScan }
             , { Logical, SysScan }
             , { Logical, Source }
+            , { Logical, SourceBackfill }
             , { Logical, Insert }
             , { Logical, Delete }
             , { Logical, Update }
@@ -1156,6 +1166,7 @@ macro_rules! for_stream_plan_nodes {
             , { Stream, CdcTableScan }
             , { Stream, Sink }
             , { Stream, Source }
+            , { Stream, SourceBackfill }
             , { Stream, HashAgg }
             , { Stream, SimpleAgg }
             , { Stream, StatelessSimpleAgg }
diff --git a/src/frontend/src/optimizer/plan_node/stream_cdc_table_scan.rs b/src/frontend/src/optimizer/plan_node/stream_cdc_table_scan.rs
index 65eaba0525d04..8a1df949a3f11 100644
--- a/src/frontend/src/optimizer/plan_node/stream_cdc_table_scan.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_cdc_table_scan.rs
@@ -132,6 +132,7 @@ impl StreamNode for StreamCdcTableScan {
 }
 
 impl StreamCdcTableScan {
+    /// plan: merge -> filter -> exchange(simple) -> `stream_scan`
     pub fn adhoc_to_stream_prost(
         &self,
         state: &mut BuildFragmentGraphState,
@@ -241,10 +242,10 @@ impl StreamCdcTableScan {
             .collect_vec();
 
         tracing::debug!(
-            "output_column_ids: {:?}, upstream_column_ids: {:?}, output_indices: {:?}",
-            self.core.output_column_ids(),
-            upstream_column_ids,
-            output_indices
+            output_column_ids=?self.core.output_column_ids(),
+            ?upstream_column_ids,
+            ?output_indices,
+            "stream cdc table scan output indices"
         );
 
         let stream_scan_body = PbNodeBody::StreamCdcScan(StreamCdcScanNode {
diff --git a/src/frontend/src/optimizer/plan_node/stream_source_backfill.rs b/src/frontend/src/optimizer/plan_node/stream_source_backfill.rs
new file mode 100644
index 0000000000000..e5f2b8c221611
--- /dev/null
+++ b/src/frontend/src/optimizer/plan_node/stream_source_backfill.rs
@@ -0,0 +1,189 @@
+// Copyright 2024 RisingWave Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+use std::rc::Rc;
+
+use fixedbitset::FixedBitSet;
+use itertools::Itertools;
+use pretty_xmlish::{Pretty, XmlNode};
+use risingwave_common::catalog::Field;
+use risingwave_common::types::DataType;
+use risingwave_common::util::sort_util::OrderType;
+use risingwave_pb::stream_plan::stream_node::{NodeBody, PbNodeBody};
+use risingwave_pb::stream_plan::PbStreamNode;
+
+use super::stream::prelude::*;
+use super::utils::TableCatalogBuilder;
+use super::{PlanBase, PlanRef};
+use crate::catalog::source_catalog::SourceCatalog;
+use crate::optimizer::plan_node::expr_visitable::ExprVisitable;
+use crate::optimizer::plan_node::utils::{childless_record, Distill};
+use crate::optimizer::plan_node::{generic, ExprRewritable, StreamNode};
+use crate::optimizer::property::Distribution;
+use crate::scheduler::SchedulerResult;
+use crate::stream_fragmenter::BuildFragmentGraphState;
+use crate::{Explain, TableCatalog, WithOptions};
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct StreamSourceBackfill {
+    pub base: PlanBase<Stream>,
+    core: generic::Source,
+}
+
+impl_plan_tree_node_for_leaf! { StreamSourceBackfill }
+
+impl StreamSourceBackfill {
+    pub fn new(source: generic::Source) -> Self {
+        let base = PlanBase::new_stream_with_core(
+            &source,
+            Distribution::SomeShard,
+            source.catalog.as_ref().map_or(true, |s| s.append_only),
+            false,
+            FixedBitSet::with_capacity(source.column_catalog.len()),
+        );
+
+        Self { base, core: source }
+    }
+
+    fn get_columns(&self) -> Vec<&str> {
+        self.core
+            .column_catalog
+            .iter()
+            .map(|column| column.name())
+            .collect()
+    }
+
+    pub fn source_catalog(&self) -> Rc<SourceCatalog> {
+        self.core
+            .catalog
+            .clone()
+            .expect("source backfill should have source cataglog")
+    }
+
+    pub fn infer_internal_table_catalog() -> TableCatalog {
+        // note that source's internal table is to store partition_id -> offset mapping and its
+        // schema is irrelevant to input schema
+        // On the premise of ensuring that the materialized_source data can be cleaned up, keep the
+        // state in source.
+        // Source state doesn't maintain retention_seconds, internal_table_subset function only
+        // returns retention_seconds so default is used here
+        let mut builder = TableCatalogBuilder::new(WithOptions::new(HashMap::default()));
+
+        let key = Field {
+            data_type: DataType::Varchar,
+            name: "partition_id".to_string(),
+            sub_fields: vec![],
+            type_name: "".to_string(),
+        };
+        let value = Field {
+            data_type: DataType::Jsonb,
+            name: "backfill_progress".to_string(),
+            sub_fields: vec![],
+            type_name: "".to_string(),
+        };
+
+        let ordered_col_idx = builder.add_column(&key);
+        builder.add_column(&value);
+        builder.add_order_column(ordered_col_idx, OrderType::ascending());
+
+        builder.build(vec![], 1)
+    }
+
+    pub fn adhoc_to_stream_prost(
+        &self,
+        state: &mut BuildFragmentGraphState,
+    ) -> SchedulerResult<PbStreamNode> {
+        use risingwave_pb::stream_plan::*;
+
+        let stream_key = self
+            .stream_key()
+            .unwrap_or_else(|| {
+                panic!(
+                    "should always have a stream key in the stream plan but not, sub plan: {}",
+                    PlanRef::from(self.clone()).explain_to_string()
+                )
+            })
+            .iter()
+            .map(|x| *x as u32)
+            .collect_vec();
+
+        let source_catalog = self.source_catalog();
+        let source_inner = SourceBackfillNode {
+            source_id: source_catalog.id,
+            source_name: source_catalog.name.clone(),
+            state_table: Some(
+                Self::infer_internal_table_catalog()
+                    .with_id(state.gen_table_id_wrapped())
+                    .to_internal_table_prost(),
+            ),
+            info: Some(source_catalog.info.clone()),
+            // XXX: what's the usage of this?
+            row_id_index: self.core.row_id_index.map(|index| index as _),
+            columns: self
+                .core
+                .column_catalog
+                .iter()
+                .map(|c| c.to_protobuf())
+                .collect_vec(),
+            with_properties: source_catalog.with_properties.clone().into_iter().collect(),
+            // rate_limit: self.base.ctx().overwrite_options().streaming_rate_limit,
+        };
+
+        let stream_scan_body = PbNodeBody::SourceBackfill(source_inner);
+
+        let fields = self.schema().to_prost();
+        // plan: merge -> backfill
+        Ok(PbStreamNode {
+            fields: fields.clone(),
+            input: vec![
+                // The merge node body will be filled by the `ActorBuilder` on the meta service.
+                PbStreamNode {
+                    node_body: Some(PbNodeBody::Merge(Default::default())),
+                    identity: "Upstream".into(),
+                    fields,
+                    stream_key: vec![], // not used
+                    ..Default::default()
+                },
+            ],
+            node_body: Some(stream_scan_body),
+            stream_key,
+            operator_id: self.base.id().0 as u64,
+            identity: self.distill_to_string(),
+            append_only: self.append_only(),
+        })
+    }
+}
+
+impl Distill for StreamSourceBackfill {
+    fn distill<'a>(&self) -> XmlNode<'a> {
+        let columns = self
+            .get_columns()
+            .iter()
+            .map(|ele| Pretty::from(ele.to_string()))
+            .collect();
+        let col = Pretty::Array(columns);
+        childless_record("StreamSourceBackfill", vec![("columns", col)])
+    }
+}
+
+impl ExprRewritable for StreamSourceBackfill {}
+
+impl ExprVisitable for StreamSourceBackfill {}
+
+impl StreamNode for StreamSourceBackfill {
+    fn to_stream_prost_body(&self, _state: &mut BuildFragmentGraphState) -> NodeBody {
+        unreachable!("stream source backfill cannot be converted into a prost body -- call `adhoc_to_stream_prost` instead.")
+    }
+}
diff --git a/src/frontend/src/planner/relation.rs b/src/frontend/src/planner/relation.rs
index 42fdc83a3f933..62414cb2ab005 100644
--- a/src/frontend/src/planner/relation.rs
+++ b/src/frontend/src/planner/relation.rs
@@ -27,7 +27,8 @@ use crate::binder::{
 use crate::expr::{Expr, ExprImpl, ExprType, FunctionCall, InputRef};
 use crate::optimizer::plan_node::{
     LogicalApply, LogicalHopWindow, LogicalJoin, LogicalProject, LogicalScan, LogicalShare,
-    LogicalSource, LogicalSysScan, LogicalTableFunction, LogicalValues, PlanRef,
+    LogicalSource, LogicalSourceBackfill, LogicalSysScan, LogicalTableFunction, LogicalValues,
+    PlanRef,
 };
 use crate::optimizer::property::Cardinality;
 use crate::planner::Planner;
@@ -85,7 +86,14 @@ impl Planner {
     }
 
     pub(super) fn plan_source(&mut self, source: BoundSource) -> Result<PlanRef> {
-        Ok(LogicalSource::with_catalog(Rc::new(source.catalog), false, self.ctx())?.into())
+        if source.can_backfill() {
+            Ok(
+                LogicalSourceBackfill::with_catalog(Rc::new(source.catalog), false, self.ctx())?
+                    .into(),
+            )
+        } else {
+            Ok(LogicalSource::with_catalog(Rc::new(source.catalog), false, self.ctx())?.into())
+        }
     }
 
     pub(super) fn plan_join(&mut self, join: BoundJoin) -> Result<PlanRef> {
diff --git a/src/frontend/src/stream_fragmenter/mod.rs b/src/frontend/src/stream_fragmenter/mod.rs
index 344aa103deeb4..8d2185539523b 100644
--- a/src/frontend/src/stream_fragmenter/mod.rs
+++ b/src/frontend/src/stream_fragmenter/mod.rs
@@ -264,9 +264,9 @@ fn build_fragment(
 
             if let Some(source) = node.source_inner.as_ref()
                 && let Some(source_info) = source.info.as_ref()
-                && source_info.cdc_source_job
+                && source_info.has_streaming_job
+                && !source_info.is_distributed
             {
-                tracing::debug!("mark cdc source job as singleton");
                 current_fragment.requires_singleton = true;
             }
         }
@@ -294,6 +294,7 @@ fn build_fragment(
         }
 
         NodeBody::StreamCdcScan(_) => {
+            // XXX: Should we use a different flag for CDC scan?
             current_fragment.fragment_type_mask |= FragmentTypeFlag::StreamScan as u32;
             // the backfill algorithm is not parallel safe
             current_fragment.requires_singleton = true;
@@ -309,6 +310,13 @@ fn build_fragment(
                 .upstream_table_ids
                 .push(node.upstream_source_id);
         }
+        NodeBody::SourceBackfill(node) => {
+            current_fragment.fragment_type_mask |= FragmentTypeFlag::SourceBackfill as u32;
+            // memorize upstream source id for later use
+            let source_id = node.source_id;
+            state.dependent_table_ids.insert(source_id.into());
+            current_fragment.upstream_table_ids.push(source_id);
+        }
 
         NodeBody::Now(_) => {
             // TODO: Remove this and insert a `BarrierRecv` instead.
diff --git a/src/meta/src/barrier/command.rs b/src/meta/src/barrier/command.rs
index f388944ff008e..0aed7a8aa9480 100644
--- a/src/meta/src/barrier/command.rs
+++ b/src/meta/src/barrier/command.rs
@@ -30,9 +30,8 @@ use risingwave_pb::stream_plan::barrier_mutation::Mutation;
 use risingwave_pb::stream_plan::throttle_mutation::RateLimit;
 use risingwave_pb::stream_plan::update_mutation::*;
 use risingwave_pb::stream_plan::{
-    AddMutation, BarrierMutation, CombinedMutation, Dispatcher, Dispatchers, FragmentTypeFlag,
-    PauseMutation, ResumeMutation, SourceChangeSplitMutation, StopMutation, ThrottleMutation,
-    UpdateMutation,
+    AddMutation, BarrierMutation, CombinedMutation, Dispatcher, Dispatchers, PauseMutation,
+    ResumeMutation, SourceChangeSplitMutation, StopMutation, ThrottleMutation, UpdateMutation,
 };
 use risingwave_pb::stream_service::{DropActorsRequest, WaitEpochCommitRequest};
 use uuid::Uuid;
@@ -689,24 +688,11 @@ impl CommandContext {
     pub fn actors_to_track(&self) -> HashSet<ActorId> {
         match &self.command {
             Command::CreateStreamingJob {
-                dispatchers,
-                table_fragments,
-                ..
-            } => {
-                // cdc backfill table job doesn't need to be tracked
-                if table_fragments.fragments().iter().any(|fragment| {
-                    fragment.fragment_type_mask & FragmentTypeFlag::CdcFilter as u32 != 0
-                }) {
-                    Default::default()
-                } else {
-                    dispatchers
-                        .values()
-                        .flatten()
-                        .flat_map(|dispatcher| dispatcher.downstream_actor_id.iter().copied())
-                        .chain(table_fragments.values_actor_ids())
-                        .collect()
-                }
-            }
+                table_fragments, ..
+            } => table_fragments
+                .tracking_progress_actor_ids()
+                .into_iter()
+                .collect(),
             _ => Default::default(),
         }
     }
@@ -953,7 +939,7 @@ impl CommandContext {
 
                 // Extract the fragments that include source operators.
                 let source_fragments = table_fragments.stream_source_fragments();
-
+                // TODO: handle backfill?
                 self.barrier_manager_context
                     .source_manager
                     .apply_source_change(
diff --git a/src/meta/src/barrier/mod.rs b/src/meta/src/barrier/mod.rs
index 30f1962ba3061..15526e141a8f3 100644
--- a/src/meta/src/barrier/mod.rs
+++ b/src/meta/src/barrier/mod.rs
@@ -839,6 +839,7 @@ impl GlobalBarrierManager {
                     }
                     commands
                 };
+                tracing::trace!("finished_commands: {}", finished_commands.len());
 
                 for command in finished_commands {
                     self.checkpoint_control.stash_command_to_finish(command);
diff --git a/src/meta/src/barrier/progress.rs b/src/meta/src/barrier/progress.rs
index 0c753a3c3f025..8c2b9d5e32641 100644
--- a/src/meta/src/barrier/progress.rs
+++ b/src/meta/src/barrier/progress.rs
@@ -206,6 +206,11 @@ impl TrackingJob {
     pub(crate) fn notify_finished(self) {
         match self {
             TrackingJob::New(command) => {
+                tracing::trace!(
+                    "notify finished, command: {:?}, curr_epoch: {:?}",
+                    command.context.command,
+                    command.context.curr_epoch
+                );
                 command
                     .notifiers
                     .into_iter()
@@ -368,6 +373,7 @@ impl CreateMviewProgressTracker {
         version_stats: &HummockVersionStats,
     ) -> Option<TrackingJob> {
         let actors = command.context.actors_to_track();
+        tracing::trace!("add actors to track: {:?}", actors);
         if actors.is_empty() {
             // The command can be finished immediately.
             return Some(TrackingJob::New(command));
@@ -426,6 +432,7 @@ impl CreateMviewProgressTracker {
             upstream_total_key_count,
             definition,
         );
+        tracing::trace!("add progress: {:?}", progress);
         if *ddl_type == DdlType::Sink {
             // We return the original tracking job immediately.
             // This is because sink can be decoupled with backfill progress.
@@ -450,6 +457,7 @@ impl CreateMviewProgressTracker {
         progress: &CreateMviewProgress,
         version_stats: &HummockVersionStats,
     ) -> Option<TrackingJob> {
+        tracing::trace!("update progress: {:?}", progress);
         let actor = progress.backfill_actor_id;
         let Some(table_id) = self.actor_map.get(&actor).copied() else {
             // On restart, backfill will ALWAYS notify CreateMviewProgressTracker,
diff --git a/src/meta/src/barrier/schedule.rs b/src/meta/src/barrier/schedule.rs
index aab3234d620cb..a66673b7fff4d 100644
--- a/src/meta/src/barrier/schedule.rs
+++ b/src/meta/src/barrier/schedule.rs
@@ -278,21 +278,25 @@ impl BarrierScheduler {
         let mut infos = Vec::with_capacity(contexts.len());
 
         for (injected_rx, collect_rx, finish_rx) in contexts {
+            tracing::trace!("waiting for command to be injected");
             // Wait for this command to be injected, and record the result.
             let info = injected_rx
                 .await
                 .map_err(|e| anyhow!("failed to inject barrier: {}", e))?;
             infos.push(info);
+            tracing::trace!("injected_rx finished");
 
             // Throw the error if it occurs when collecting this barrier.
             collect_rx
                 .await
                 .map_err(|e| anyhow!("failed to collect barrier: {}", e))??;
+            tracing::trace!("collect_rx finished");
 
             // Wait for this command to be finished.
             finish_rx
                 .await
                 .map_err(|e| anyhow!("failed to finish command: {}", e))?;
+            tracing::trace!("finish_rx finished");
         }
 
         Ok(infos)
diff --git a/src/meta/src/controller/catalog.rs b/src/meta/src/controller/catalog.rs
index 70fc1c52a0fce..d472161b30021 100644
--- a/src/meta/src/controller/catalog.rs
+++ b/src/meta/src/controller/catalog.rs
@@ -1422,7 +1422,7 @@ impl CatalogController {
             .map(|obj| obj.oid)
             .collect_vec();
 
-        // cdc source streaming job.
+        // source streaming job.
         if object_type == ObjectType::Source {
             let source_info: Option<StreamSourceInfo> = Source::find_by_id(object_id)
                 .select_only()
@@ -1432,7 +1432,7 @@ impl CatalogController {
                 .await?
                 .ok_or_else(|| MetaError::catalog_id_not_found("source", object_id))?;
             if let Some(source_info) = source_info
-                && source_info.into_inner().cdc_source_job
+                && source_info.into_inner().has_streaming_job
             {
                 to_drop_streaming_jobs.push(object_id);
             }
diff --git a/src/meta/src/controller/fragment.rs b/src/meta/src/controller/fragment.rs
index 0de826fef9f86..fa4e52bbaf322 100644
--- a/src/meta/src/controller/fragment.rs
+++ b/src/meta/src/controller/fragment.rs
@@ -39,8 +39,7 @@ use risingwave_pb::meta::{
 use risingwave_pb::source::PbConnectorSplits;
 use risingwave_pb::stream_plan::stream_node::NodeBody;
 use risingwave_pb::stream_plan::{
-    PbDispatchStrategy, PbFragmentTypeFlag, PbStreamActor, PbStreamContext, PbStreamNode,
-    StreamSource,
+    PbDispatchStrategy, PbFragmentTypeFlag, PbStreamActor, PbStreamContext,
 };
 use sea_orm::sea_query::{Expr, Value};
 use sea_orm::ActiveValue::Set;
@@ -1160,23 +1159,6 @@ impl CatalogController {
         Ok(chain_fragments)
     }
 
-    /// Find the external stream source info inside the stream node, if any.
-    fn find_stream_source(stream_node: &PbStreamNode) -> Option<&StreamSource> {
-        if let Some(NodeBody::Source(source)) = &stream_node.node_body {
-            if let Some(inner) = &source.source_inner {
-                return Some(inner);
-            }
-        }
-
-        for child in &stream_node.input {
-            if let Some(source) = Self::find_stream_source(child) {
-                return Some(source);
-            }
-        }
-
-        None
-    }
-
     pub async fn load_source_fragment_ids(
         &self,
     ) -> MetaResult<HashMap<SourceId, BTreeSet<FragmentId>>> {
@@ -1195,9 +1177,9 @@ impl CatalogController {
 
         let mut source_fragment_ids = HashMap::new();
         for (fragment_id, _, stream_node) in fragments {
-            if let Some(source) = Self::find_stream_source(stream_node.inner_ref()) {
+            if let Some(source_id) = stream_node.inner_ref().find_stream_source() {
                 source_fragment_ids
-                    .entry(source.source_id as SourceId)
+                    .entry(source_id as SourceId)
                     .or_insert_with(BTreeSet::new)
                     .insert(fragment_id);
             }
@@ -1205,31 +1187,33 @@ impl CatalogController {
         Ok(source_fragment_ids)
     }
 
-    pub async fn get_stream_source_fragment_ids(
+    pub async fn load_backfill_fragment_ids(
         &self,
-        job_id: ObjectId,
-    ) -> MetaResult<HashMap<SourceId, BTreeSet<FragmentId>>> {
+    ) -> MetaResult<HashMap<SourceId, BTreeSet<(FragmentId, FragmentId)>>> {
         let inner = self.inner.read().await;
-        let mut fragments: Vec<(FragmentId, i32, StreamNode)> = Fragment::find()
+        let mut fragments: Vec<(FragmentId, Vec<FragmentId>, i32, StreamNode)> = Fragment::find()
             .select_only()
             .columns([
                 fragment::Column::FragmentId,
+                fragment::Column::UpstreamFragmentId,
                 fragment::Column::FragmentTypeMask,
                 fragment::Column::StreamNode,
             ])
-            .filter(fragment::Column::JobId.eq(job_id))
             .into_tuple()
             .all(&inner.db)
             .await?;
-        fragments.retain(|(_, mask, _)| *mask & PbFragmentTypeFlag::Source as i32 != 0);
+        fragments.retain(|(_, _, mask, _)| *mask & PbFragmentTypeFlag::SourceBackfill as i32 != 0);
 
         let mut source_fragment_ids = HashMap::new();
-        for (fragment_id, _, stream_node) in fragments {
-            if let Some(source) = Self::find_stream_source(stream_node.inner_ref()) {
+        for (fragment_id, upstream_fragment_id, _, stream_node) in fragments {
+            if let Some(source_id) = stream_node.inner_ref().find_source_backfill() {
+                if upstream_fragment_id.len() != 1 {
+                    bail!("SourceBackfill should have only one upstream fragment, found {} for fragment {}", upstream_fragment_id.len(), fragment_id);
+                }
                 source_fragment_ids
-                    .entry(source.source_id as SourceId)
+                    .entry(source_id as SourceId)
                     .or_insert_with(BTreeSet::new)
-                    .insert(fragment_id);
+                    .insert((fragment_id, upstream_fragment_id[0]));
             }
         }
         Ok(source_fragment_ids)
diff --git a/src/meta/src/controller/streaming_job.rs b/src/meta/src/controller/streaming_job.rs
index 576c5c3c40699..636f188cc186e 100644
--- a/src/meta/src/controller/streaming_job.rs
+++ b/src/meta/src/controller/streaming_job.rs
@@ -683,7 +683,7 @@ impl CatalogController {
             if let Some(table_id) = source.optional_associated_table_id {
                 vec![table_id]
             } else if let Some(source_info) = &source.source_info
-                && source_info.inner_ref().cdc_source_job
+                && source_info.inner_ref().has_streaming_job
             {
                 vec![source_id]
             } else {
@@ -714,6 +714,7 @@ impl CatalogController {
             .all(&txn)
             .await?;
 
+        // TODO: limit source backfill?
         fragments.retain_mut(|(_, fragment_type_mask, stream_node)| {
             let mut found = false;
             if *fragment_type_mask & PbFragmentTypeFlag::Source as i32 != 0 {
diff --git a/src/meta/src/manager/catalog/database.rs b/src/meta/src/manager/catalog/database.rs
index a9c7459d745d9..7cd623badbd26 100644
--- a/src/meta/src/manager/catalog/database.rs
+++ b/src/meta/src/manager/catalog/database.rs
@@ -361,11 +361,13 @@ impl DatabaseManager {
             .chain(self.indexes.keys().copied())
             .chain(self.sources.keys().copied())
             .chain(
-                // filter cdc source jobs
                 self.sources
                     .iter()
                     .filter(|(_, source)| {
-                        source.info.as_ref().is_some_and(|info| info.cdc_source_job)
+                        source
+                            .info
+                            .as_ref()
+                            .is_some_and(|info| info.has_streaming_job)
                     })
                     .map(|(id, _)| id)
                     .copied(),
diff --git a/src/meta/src/manager/catalog/fragment.rs b/src/meta/src/manager/catalog/fragment.rs
index 97dbd325f591e..aa9a605cee0ec 100644
--- a/src/meta/src/manager/catalog/fragment.rs
+++ b/src/meta/src/manager/catalog/fragment.rs
@@ -1010,7 +1010,7 @@ impl FragmentManager {
     pub async fn get_running_actors_and_upstream_fragment_of_fragment(
         &self,
         fragment_id: FragmentId,
-    ) -> MetaResult<(Vec<StreamActor>, Vec<FragmentId>)> {
+    ) -> MetaResult<HashSet<(ActorId, Vec<ActorId>)>> {
         let map = &self.core.read().await.table_fragments;
 
         for table_fragment in map.values() {
@@ -1021,9 +1021,9 @@ impl FragmentManager {
                     .filter(|a| {
                         table_fragment.actor_status[&a.actor_id].state == ActorState::Running as i32
                     })
-                    .cloned()
+                    .map(|a| (a.actor_id, a.upstream_actor_id.clone()))
                     .collect();
-                return Ok((running_actors, fragment.upstream_fragment_ids.clone()));
+                return Ok(running_actors);
             }
         }
 
diff --git a/src/meta/src/manager/catalog/mod.rs b/src/meta/src/manager/catalog/mod.rs
index 89294fdcd43df..276c892a6448d 100644
--- a/src/meta/src/manager/catalog/mod.rs
+++ b/src/meta/src/manager/catalog/mod.rs
@@ -1145,7 +1145,7 @@ impl CatalogManager {
         let mut all_sink_ids: HashSet<SinkId> = HashSet::default();
         let mut all_source_ids: HashSet<SourceId> = HashSet::default();
         let mut all_view_ids: HashSet<ViewId> = HashSet::default();
-        let mut all_cdc_source_ids: HashSet<SourceId> = HashSet::default();
+        let mut all_streaming_job_source_ids: HashSet<SourceId> = HashSet::default();
 
         let relations_depend_on = |relation_id: RelationId| -> Vec<RelationInfo> {
             let tables_depend_on = tables
@@ -1408,11 +1408,10 @@ impl CatalogManager {
                         continue;
                     }
 
-                    // cdc source streaming job
                     if let Some(info) = source.info
-                        && info.cdc_source_job
+                        && info.has_streaming_job
                     {
-                        all_cdc_source_ids.insert(source.id);
+                        all_streaming_job_source_ids.insert(source.id);
                         let source_table_fragments = fragment_manager
                             .select_table_fragments_by_table_id(&source.id.into())
                             .await?;
@@ -1669,7 +1668,7 @@ impl CatalogManager {
             .into_iter()
             .map(|id| id.into())
             .chain(all_sink_ids.into_iter().map(|id| id.into()))
-            .chain(all_cdc_source_ids.into_iter().map(|id| id.into()))
+            .chain(all_streaming_job_source_ids.into_iter().map(|id| id.into()))
             .collect_vec();
 
         Ok((version, catalog_deleted_ids))
diff --git a/src/meta/src/manager/metadata.rs b/src/meta/src/manager/metadata.rs
index 30726984a1d99..0d805f201e1b8 100644
--- a/src/meta/src/manager/metadata.rs
+++ b/src/meta/src/manager/metadata.rs
@@ -372,10 +372,10 @@ impl MetadataManager {
         }
     }
 
-    pub async fn get_running_actors_and_upstream_fragment_of_fragment(
+    pub async fn get_running_actors_and_upstream_actors_of_fragment(
         &self,
         id: FragmentId,
-    ) -> MetaResult<(Vec<PbStreamActor>, Vec<FragmentId>)> {
+    ) -> MetaResult<HashSet<(ActorId, Vec<ActorId>)>> {
         match self {
             MetadataManager::V1(mgr) => {
                 mgr.fragment_manager
diff --git a/src/meta/src/model/stream.rs b/src/meta/src/model/stream.rs
index 54993d8fee805..3a96f8a142c3f 100644
--- a/src/meta/src/model/stream.rs
+++ b/src/meta/src/model/stream.rs
@@ -30,7 +30,7 @@ use risingwave_pb::meta::{PbTableFragments, PbTableParallelism};
 use risingwave_pb::plan_common::PbExprContext;
 use risingwave_pb::stream_plan::stream_node::NodeBody;
 use risingwave_pb::stream_plan::{
-    FragmentTypeFlag, PbFragmentTypeFlag, PbStreamContext, StreamActor, StreamNode, StreamSource,
+    FragmentTypeFlag, PbFragmentTypeFlag, PbStreamContext, StreamActor, StreamNode,
 };
 
 use super::{ActorId, FragmentId};
@@ -337,7 +337,7 @@ impl TableFragments {
     }
 
     /// Returns the actor ids with the given fragment type.
-    fn filter_actor_ids(&self, check_type: impl Fn(u32) -> bool) -> Vec<ActorId> {
+    pub fn filter_actor_ids(&self, check_type: impl Fn(u32) -> bool) -> Vec<ActorId> {
         self.fragments
             .values()
             .filter(|fragment| check_type(fragment.get_fragment_type_mask()))
@@ -367,10 +367,12 @@ impl TableFragments {
         })
     }
 
-    /// Returns values actor ids.
-    pub fn values_actor_ids(&self) -> Vec<ActorId> {
+    /// Returns actor ids that need to be tracked when creating MV.
+    pub fn tracking_progress_actor_ids(&self) -> Vec<ActorId> {
         Self::filter_actor_ids(self, |fragment_type_mask| {
-            (fragment_type_mask & FragmentTypeFlag::Values as u32) != 0
+            (fragment_type_mask
+                & (FragmentTypeFlag::Values as u32 | FragmentTypeFlag::StreamScan as u32))
+                != 0
         })
     }
 
@@ -411,23 +413,6 @@ impl TableFragments {
         .collect()
     }
 
-    /// Find the external stream source info inside the stream node, if any.
-    pub fn find_stream_source(stream_node: &StreamNode) -> Option<&StreamSource> {
-        if let Some(NodeBody::Source(source)) = stream_node.node_body.as_ref() {
-            if let Some(inner) = &source.source_inner {
-                return Some(inner);
-            }
-        }
-
-        for child in &stream_node.input {
-            if let Some(source) = Self::find_stream_source(child) {
-                return Some(source);
-            }
-        }
-
-        None
-    }
-
     /// Extract the fragments that include source executors that contains an external stream source,
     /// grouping by source id.
     pub fn stream_source_fragments(&self) -> HashMap<SourceId, BTreeSet<FragmentId>> {
@@ -435,10 +420,7 @@ impl TableFragments {
 
         for fragment in self.fragments() {
             for actor in &fragment.actors {
-                if let Some(source_id) =
-                    TableFragments::find_stream_source(actor.nodes.as_ref().unwrap())
-                        .map(|s| s.source_id)
-                {
+                if let Some(source_id) = actor.nodes.as_ref().unwrap().find_stream_source() {
                     source_fragments
                         .entry(source_id)
                         .or_insert(BTreeSet::new())
@@ -451,6 +433,29 @@ impl TableFragments {
         source_fragments
     }
 
+    pub fn source_backfill_fragments(
+        &self,
+    ) -> MetadataModelResult<HashMap<SourceId, BTreeSet<(FragmentId, FragmentId)>>> {
+        let mut source_fragments = HashMap::new();
+
+        for fragment in self.fragments() {
+            for actor in &fragment.actors {
+                if let Some(source_id) = actor.nodes.as_ref().unwrap().find_source_backfill() {
+                    if fragment.upstream_fragment_ids.len() != 1 {
+                        return Err(anyhow::anyhow!("SourceBackfill should have only one upstream fragment, found {:?} for fragment {}", fragment.upstream_fragment_ids, fragment.fragment_id).into());
+                    }
+                    source_fragments
+                        .entry(source_id)
+                        .or_insert(BTreeSet::new())
+                        .insert((fragment.fragment_id, fragment.upstream_fragment_ids[0]));
+
+                    break;
+                }
+            }
+        }
+        Ok(source_fragments)
+    }
+
     /// Resolve dependent table
     fn resolve_dependent_table(stream_node: &StreamNode, table_ids: &mut HashMap<TableId, usize>) {
         let table_id = match stream_node.node_body.as_ref() {
diff --git a/src/meta/src/rpc/ddl_controller.rs b/src/meta/src/rpc/ddl_controller.rs
index fa160e0658b47..1dd9faed259bb 100644
--- a/src/meta/src/rpc/ddl_controller.rs
+++ b/src/meta/src/rpc/ddl_controller.rs
@@ -485,6 +485,7 @@ impl DdlController {
             unimplemented!("support drop source in v2");
         };
         // 1. Drop source in catalog.
+        // If the source has a streaming job, it's also dropped here.
         let (version, streaming_job_ids) = mgr
             .catalog_manager
             .drop_relation(
@@ -1287,8 +1288,12 @@ impl DdlController {
             .get_upstream_root_fragments(fragment_graph.dependent_table_ids())
             .await?;
 
-        let upstream_actors: HashMap<_, _> = upstream_root_fragments
+        // XXX: do we need to filter here?
+        let upstream_mview_actors: HashMap<_, _> = upstream_root_fragments
             .iter()
+            // .filter(|(_, fragment)| {
+            //     fragment.fragment_type_mask & FragmentTypeFlag::Mview as u32 != 0
+            // })
             .map(|(&table_id, fragment)| {
                 (
                     table_id,
@@ -1365,7 +1370,7 @@ impl DdlController {
 
         let ctx = CreateStreamingJobContext {
             dispatchers,
-            upstream_mview_actors: upstream_actors,
+            upstream_mview_actors,
             internal_tables,
             building_locations,
             existing_locations,
diff --git a/src/meta/src/stream/scale.rs b/src/meta/src/stream/scale.rs
index 00a8c18885dcc..9e0ac3ca1c935 100644
--- a/src/meta/src/stream/scale.rs
+++ b/src/meta/src/stream/scale.rs
@@ -606,7 +606,7 @@ impl ScaleController {
 
             if (fragment.get_fragment_type_mask() & FragmentTypeFlag::Source as u32) != 0 {
                 let stream_node = fragment.actors.first().unwrap().get_nodes().unwrap();
-                if TableFragments::find_stream_source(stream_node).is_some() {
+                if stream_node.find_stream_source().is_some() {
                     stream_source_fragment_ids.insert(*fragment_id);
                 }
             }
diff --git a/src/meta/src/stream/source_manager.rs b/src/meta/src/stream/source_manager.rs
index 95f5c219351a3..ed1dd74e45350 100644
--- a/src/meta/src/stream/source_manager.rs
+++ b/src/meta/src/stream/source_manager.rs
@@ -30,6 +30,7 @@ use risingwave_connector::source::{
 };
 use risingwave_pb::catalog::Source;
 use risingwave_pb::source::{ConnectorSplit, ConnectorSplits};
+use risingwave_pb::stream_plan::Dispatcher;
 use risingwave_rpc_client::ConnectorClient;
 use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender};
 use tokio::sync::{oneshot, Mutex};
@@ -228,8 +229,8 @@ pub struct SourceManagerCore {
     managed_sources: HashMap<SourceId, ConnectorSourceWorkerHandle>,
     /// Fragments associated with each source
     source_fragments: HashMap<SourceId, BTreeSet<FragmentId>>,
-    /// Revert index for source_fragments
-    fragment_sources: HashMap<FragmentId, SourceId>,
+    /// `source_id` -> `(fragment_id, upstream_fragment_id)`
+    backfill_fragments: HashMap<SourceId, BTreeSet<(FragmentId, FragmentId)>>,
 
     /// Splits assigned per actor
     actor_splits: HashMap<ActorId, Vec<SplitImpl>>,
@@ -240,20 +241,14 @@ impl SourceManagerCore {
         metadata_manager: MetadataManager,
         managed_sources: HashMap<SourceId, ConnectorSourceWorkerHandle>,
         source_fragments: HashMap<SourceId, BTreeSet<FragmentId>>,
+        backfill_fragments: HashMap<SourceId, BTreeSet<(FragmentId, FragmentId)>>,
         actor_splits: HashMap<ActorId, Vec<SplitImpl>>,
     ) -> Self {
-        let mut fragment_sources = HashMap::new();
-        for (source_id, fragment_ids) in &source_fragments {
-            for fragment_id in fragment_ids {
-                fragment_sources.insert(*fragment_id, *source_id);
-            }
-        }
-
         Self {
             metadata_manager,
             managed_sources,
             source_fragments,
-            fragment_sources,
+            backfill_fragments,
             actor_splits,
         }
     }
@@ -267,12 +262,13 @@ impl SourceManagerCore {
         let mut split_assignment: SplitAssignment = HashMap::new();
 
         for (source_id, handle) in &self.managed_sources {
-            let fragment_ids = match self.source_fragments.get(source_id) {
+            let source_fragment_ids = match self.source_fragments.get(source_id) {
                 Some(fragment_ids) if !fragment_ids.is_empty() => fragment_ids,
                 _ => {
                     continue;
                 }
             };
+            let backfill_fragment_ids = self.backfill_fragments.get(source_id);
 
             let Some(discovered_splits) = handle.discovered_splits().await else {
                 continue;
@@ -281,21 +277,18 @@ impl SourceManagerCore {
                 tracing::warn!("No splits discovered for source {}", source_id);
             }
 
-            let mut source_fragments = vec![];
-            let mut backfill_fragments = vec![];
-
-            for fragment_id in fragment_ids {
-                let (actors, upstream_fragment_ids) = match self
+            for &fragment_id in source_fragment_ids {
+                let actors = match self
                     .metadata_manager
-                    .get_running_actors_and_upstream_fragment_of_fragment(*fragment_id)
+                    .get_running_actors_of_fragment(fragment_id)
                     .await
                 {
-                    Ok((actors, upstream_fragment_ids)) => {
+                    Ok(actors) => {
                         if actors.is_empty() {
                             tracing::warn!("No actors found for fragment {}", fragment_id);
                             continue;
                         }
-                        (actors, upstream_fragment_ids)
+                        actors
                     }
                     Err(err) => {
                         tracing::warn!("Failed to get the actor of the fragment {}, maybe the fragment doesn't exist anymore", err.to_string());
@@ -303,35 +296,9 @@ impl SourceManagerCore {
                     }
                 };
 
-                if !upstream_fragment_ids.is_empty() {
-                    debug_assert!(
-                        upstream_fragment_ids.len() == 1,
-                        "source backfill fragment should have exactly one upstream fragment, fragment_id: {fragment_id}, upstream_fragment_ids: {upstream_fragment_ids:?}"
-                    );
-                    for actor in &actors {
-                        debug_assert!(
-                            actor.upstream_actor_id.len() == 1,
-                            "source backfill actor should have exactly one upstream actor, fragment_id: {fragment_id}, actor: {actor:?}"
-                        );
-                    }
-                    backfill_fragments.push((*fragment_id, upstream_fragment_ids[0], actors));
-                } else {
-                    for actor in &actors {
-                        debug_assert!(
-                            actor.upstream_actor_id.is_empty(),
-                            "source actor should not have upstream actors, fragment_id: {fragment_id}, actor: {actor:?}"
-                        );
-                    }
-                    source_fragments.push((*fragment_id, actors));
-                }
-            }
-
-            // assign splits for source fragments first
-            for (fragment_id, actors) in source_fragments {
                 let prev_actor_splits: HashMap<_, _> = actors
                     .into_iter()
-                    .map(|actor| {
-                        let actor_id = actor.actor_id;
+                    .map(|actor_id| {
                         (
                             actor_id,
                             self.actor_splits
@@ -354,28 +321,41 @@ impl SourceManagerCore {
                 }
             }
 
-            // align splits for backfill fragments with its upstream source fragment
-            for (fragment_id, upstream_fragment_id, actors) in backfill_fragments {
-                let upstream_assignment = split_assignment
-                    .get(&upstream_fragment_id)
-                    .unwrap_or_else(||panic!(
-                        "source backfill fragment's upstream fragment should have assignment, fragment_id: {fragment_id}, upstream_fragment_id: {upstream_fragment_id}, split_assignment: {split_assignment:?}"));
-                split_assignment.insert(
-                    fragment_id,
-                    actors
-                        .into_iter()
-                        .map(|a| {
-                            let actor_id = a.actor_id;
-                            (
-                                actor_id,
-                                upstream_assignment
-                                    .get(&actor_id)
-                                    .cloned()
-                                    .unwrap_or_else(||panic!("source backfill actor should have upstream actor, fragment_id: {fragment_id}, upstream_fragment_id: {upstream_fragment_id}, actor: {a:?}, upstream_assignment: {upstream_assignment:?}")),
-                            )
-                        })
-                        .collect(),
-                );
+            if let Some(backfill_fragment_ids) = backfill_fragment_ids {
+                // align splits for backfill fragments with its upstream source fragment
+                for (fragment_id, upstream_fragment_id) in backfill_fragment_ids {
+                    let Some(upstream_assignment) = split_assignment.get(upstream_fragment_id)
+                    else {
+                        // upstream fragment unchanged, do not update backfill fragment too
+                        continue;
+                    };
+                    let actors = match self
+                        .metadata_manager
+                        .get_running_actors_and_upstream_actors_of_fragment(*fragment_id)
+                        .await
+                    {
+                        Ok(actors) => {
+                            if actors.is_empty() {
+                                tracing::warn!("No actors found for fragment {}", fragment_id);
+                                continue;
+                            }
+                            actors
+                        }
+                        Err(err) => {
+                            tracing::warn!("Failed to get the actor of the fragment {}, maybe the fragment doesn't exist anymore", err.to_string());
+                            continue;
+                        }
+                    };
+                    split_assignment.insert(
+                        *fragment_id,
+                        align_backfill_splits(
+                            actors,
+                            upstream_assignment,
+                            *fragment_id,
+                            *upstream_fragment_id,
+                        )?,
+                    );
+                }
             }
         }
 
@@ -390,10 +370,6 @@ impl SourceManagerCore {
     ) {
         if let Some(source_fragments) = source_fragments {
             for (source_id, mut fragment_ids) in source_fragments {
-                for fragment_id in &fragment_ids {
-                    self.fragment_sources.insert(*fragment_id, source_id);
-                }
-
                 self.source_fragments
                     .entry(source_id)
                     .or_default()
@@ -432,10 +408,6 @@ impl SourceManagerCore {
                     entry.remove();
                 }
             }
-
-            for fragment_id in &fragment_ids {
-                self.fragment_sources.remove(fragment_id);
-            }
         }
 
         for actor_id in actor_splits {
@@ -491,6 +463,7 @@ impl Default for SplitDiffOptions {
 ///
 /// If an actor has an upstream actor, it should be a backfill executor,
 /// and its splits should be aligned with the upstream actor. `reassign_splits` should not be used in this case.
+/// Use `align_backfill_splits` instead.
 ///
 /// - `fragment_id`: just for logging
 fn reassign_splits<T>(
@@ -586,6 +559,32 @@ where
     )
 }
 
+fn align_backfill_splits(
+    backfill_actors: impl IntoIterator<Item = (ActorId, Vec<ActorId>)>,
+    upstream_assignment: &HashMap<ActorId, Vec<SplitImpl>>,
+    fragment_id: FragmentId,
+    upstream_fragment_id: FragmentId,
+) -> anyhow::Result<HashMap<ActorId, Vec<SplitImpl>>> {
+    backfill_actors
+                .into_iter()
+                .map(|(actor_id, upstream_actor_id)| {
+                    let err = || anyhow::anyhow!("source backfill actor should have upstream actor, fragment_id: {fragment_id}, upstream_fragment_id: {upstream_fragment_id}, actor_id: {actor_id}, upstream_assignment: {upstream_assignment:?}, upstream_actor_id: {upstream_actor_id:?}");
+                    if upstream_actor_id.len() != 1 {
+                        return Err(err());
+                    }
+                    let Some(splits ) = upstream_assignment
+                    .get(&upstream_actor_id[0])
+                     else {
+                        return Err(err());
+                     };
+                    Ok((
+                        actor_id,
+                        splits.clone(),
+                    ))
+                })
+                .collect()
+}
+
 impl SourceManager {
     const DEFAULT_SOURCE_TICK_INTERVAL: Duration = Duration::from_secs(10);
     const DEFAULT_SOURCE_TICK_TIMEOUT: Duration = Duration::from_secs(10);
@@ -611,6 +610,7 @@ impl SourceManager {
 
         let mut actor_splits = HashMap::new();
         let mut source_fragments = HashMap::new();
+        let mut backfill_fragments = HashMap::new();
 
         match &metadata_manager {
             MetadataManager::V1(mgr) => {
@@ -622,6 +622,7 @@ impl SourceManager {
                     .values()
                 {
                     source_fragments.extend(table_fragments.stream_source_fragments());
+                    backfill_fragments.extend(table_fragments.source_backfill_fragments()?);
                     actor_splits.extend(table_fragments.actor_splits.clone());
                 }
             }
@@ -638,6 +639,21 @@ impl SourceManager {
                         )
                     })
                     .collect();
+                backfill_fragments = mgr
+                    .catalog_controller
+                    .load_backfill_fragment_ids()
+                    .await?
+                    .into_iter()
+                    .map(|(source_id, fragment_ids)| {
+                        (
+                            source_id as SourceId,
+                            fragment_ids
+                                .into_iter()
+                                .map(|(id, up_id)| (id as _, up_id as _))
+                                .collect(),
+                        )
+                    })
+                    .collect();
                 actor_splits = mgr
                     .catalog_controller
                     .load_actor_splits()
@@ -662,6 +678,7 @@ impl SourceManager {
             metadata_manager,
             managed_sources,
             source_fragments,
+            backfill_fragments,
             actor_splits,
         ));
 
@@ -806,6 +823,68 @@ impl SourceManager {
         Ok(assigned)
     }
 
+    pub async fn allocate_splits_for_backfill(
+        &self,
+        table_id: &TableId,
+        dispatchers: &HashMap<ActorId, Vec<Dispatcher>>,
+    ) -> MetaResult<SplitAssignment> {
+        let core = self.core.lock().await;
+        let table_fragments = core
+            .metadata_manager
+            .get_job_fragments_by_id(table_id)
+            .await?;
+
+        let upstream_assignment = &core.actor_splits;
+        let source_backfill_fragments = table_fragments.source_backfill_fragments()?;
+        tracing::debug!(
+            ?source_backfill_fragments,
+            ?table_fragments,
+            "allocate_splits_for_backfill source backfill fragments"
+        );
+
+        let mut assigned = HashMap::new();
+
+        for (_source_id, fragments) in source_backfill_fragments {
+            for (fragment_id, upstream_fragment_id) in fragments {
+                let upstream_actors = core
+                    .metadata_manager
+                    .get_running_actors_of_fragment(upstream_fragment_id)
+                    .await?;
+                let mut backfill_actors = vec![];
+                for upstream_actor in upstream_actors {
+                    if let Some(dispatchers) = dispatchers.get(&upstream_actor) {
+                        let err = || {
+                            anyhow::anyhow!(
+                            "source backfill fragment's upstream fragment should have one dispatcher, fragment_id: {fragment_id}, upstream_fragment_id: {upstream_fragment_id}, upstream_actor: {upstream_actor}, dispatchers: {dispatchers:?}",
+                            fragment_id = fragment_id,
+                            upstream_fragment_id = upstream_fragment_id,
+                            upstream_actor = upstream_actor,
+                            dispatchers = dispatchers
+                        )
+                        };
+                        if dispatchers.len() != 1 || dispatchers[0].downstream_actor_id.len() != 1 {
+                            return Err(err().into());
+                        }
+
+                        backfill_actors
+                            .push((dispatchers[0].downstream_actor_id[0], vec![upstream_actor]));
+                    }
+                }
+                assigned.insert(
+                    fragment_id,
+                    align_backfill_splits(
+                        backfill_actors,
+                        upstream_assignment,
+                        fragment_id,
+                        upstream_fragment_id,
+                    )?,
+                );
+            }
+        }
+
+        Ok(assigned)
+    }
+
     /// register connector worker for source.
     pub async fn register_source(&self, source: &Source) -> anyhow::Result<()> {
         let mut core = self.core.lock().await;
diff --git a/src/meta/src/stream/stream_graph/actor.rs b/src/meta/src/stream/stream_graph/actor.rs
index c42c2f5a51425..52a14af0629c6 100644
--- a/src/meta/src/stream/stream_graph/actor.rs
+++ b/src/meta/src/stream/stream_graph/actor.rs
@@ -122,7 +122,7 @@ impl ActorBuilder {
     /// During this process, the following things will be done:
     /// 1. Replace the logical `Exchange` in node's input with `Merge`, which can be executed on the
     /// compute nodes.
-    /// 2. Fill the upstream mview info of the `Merge` node under the `StreamScan` node.
+    /// 2. Fill the upstream mview info of the `Merge` node under the other "leaf" nodes.
     fn rewrite(&self) -> MetaResult<StreamNode> {
         self.rewrite_inner(&self.nodes, 0)
     }
@@ -254,6 +254,44 @@ impl ActorBuilder {
                 })
             }
 
+            // "Leaf" node `SourceBackfill`.
+            NodeBody::SourceBackfill(source_backfill) => {
+                let input = stream_node.get_input();
+                assert_eq!(input.len(), 1);
+
+                let merge_node = &input[0];
+                assert_matches!(merge_node.node_body, Some(NodeBody::Merge(_)));
+
+                let upstream_source_id = source_backfill.source_id;
+
+                // Index the upstreams by the an external edge ID.
+                let upstreams = &self.upstreams[&EdgeId::UpstreamExternal {
+                    upstream_table_id: upstream_source_id.into(),
+                    downstream_fragment_id: self.fragment_id,
+                }];
+
+                let upstream_actor_id = upstreams.actors.as_global_ids();
+
+                // rewrite the input of `SourceBackfill`
+                let input = vec![
+                    // Fill the merge node body with correct upstream info.
+                    StreamNode {
+                        node_body: Some(NodeBody::Merge(MergeNode {
+                            upstream_actor_id,
+                            upstream_fragment_id: upstreams.fragment_id.as_global_id(),
+                            upstream_dispatcher_type: DispatcherType::NoShuffle as _,
+                            fields: merge_node.fields.clone(),
+                        })),
+                        ..merge_node.clone()
+                    },
+                ];
+
+                Ok(StreamNode {
+                    input,
+                    ..stream_node.clone()
+                })
+            }
+
             // For other nodes, visit the children recursively.
             _ => {
                 let mut new_stream_node = stream_node.clone();
@@ -622,6 +660,7 @@ impl ActorGraphBuildState {
 
 /// The result of a built actor graph. Will be further embedded into the `Context` for building
 /// actors on the compute nodes.
+#[derive(Debug)]
 pub struct ActorGraphBuildResult {
     /// The graph of sealed fragments, including all actors.
     pub graph: BTreeMap<FragmentId, Fragment>,
diff --git a/src/meta/src/stream/stream_graph/fragment.rs b/src/meta/src/stream/stream_graph/fragment.rs
index 937dadda21fb8..e4b7dc7b1b3b1 100644
--- a/src/meta/src/stream/stream_graph/fragment.rs
+++ b/src/meta/src/stream/stream_graph/fragment.rs
@@ -24,7 +24,7 @@ use risingwave_common::bail;
 use risingwave_common::catalog::{
     generate_internal_table_name_with_type, TableId, CDC_SOURCE_COLUMN_NUM,
 };
-use risingwave_common::util::iter_util::ZipEqFast;
+use risingwave_common::util::iter_util::{IntoIteratorExt, ZipEqFast};
 use risingwave_common::util::stream_graph_visitor;
 use risingwave_pb::catalog::Table;
 use risingwave_pb::ddl_service::TableJobType;
@@ -54,7 +54,8 @@ pub(super) struct BuildingFragment {
     /// The ID of the job if it's materialized in this fragment.
     table_id: Option<u32>,
 
-    /// The required columns of each upstream table.
+    /// The required column IDs of each upstream table.
+    /// Will be converted to indices when building the edge connected to the upstream.
     ///
     /// For shared CDC source on table, its `vec![]`, since the output is fixed.
     upstream_table_columns: HashMap<TableId, Vec<i32>>,
@@ -177,6 +178,15 @@ impl BuildingFragment {
                     stream_scan.upstream_column_ids.clone(),
                 ),
                 NodeBody::CdcFilter(cdc_filter) => (cdc_filter.upstream_source_id.into(), vec![]),
+                NodeBody::SourceBackfill(backfill) => (
+                    backfill.source_id.into(),
+                    // FIXME: only pass required columns instead of all columns here
+                    backfill
+                        .columns
+                        .iter()
+                        .map(|c| c.column_desc.as_ref().unwrap().column_id)
+                        .collect(),
+                ),
                 _ => return,
             };
             table_columns
@@ -187,7 +197,7 @@ impl BuildingFragment {
         assert_eq!(
             table_columns.len(),
             fragment.upstream_table_ids.len(),
-            "fragment type: {}",
+            "fragment type: {:b}",
             fragment.fragment_type_mask
         );
 
@@ -286,7 +296,7 @@ impl StreamFragmentEdge {
 /// This only includes nodes and edges of the current job itself. It will be converted to [`CompleteStreamFragmentGraph`] later,
 /// that contains the additional information of pre-existing
 /// fragments, which are connected to the graph's top-most or bottom-most fragments.
-#[derive(Default)]
+#[derive(Default, Debug)]
 pub struct StreamFragmentGraph {
     /// stores all the fragments in the graph.
     fragments: HashMap<GlobalFragmentId, BuildingFragment>,
@@ -513,7 +523,7 @@ pub(super) enum EitherFragment {
     /// An internal fragment that is being built for the current streaming job.
     Building(BuildingFragment),
 
-    /// An existing fragment that is external but connected to the fragments being built.
+    /// An existing fragment that is external but connected to the fragments being built.!!!!!!!!!!!!!
     Existing(Fragment),
 }
 
@@ -525,6 +535,7 @@ pub(super) enum EitherFragment {
 ///   `Materialize` node will be included in this structure.
 /// - if we're going to replace the plan of a table with downstream mviews, the downstream fragments
 ///   containing the `StreamScan` nodes will be included in this structure.
+#[derive(Debug)]
 pub struct CompleteStreamFragmentGraph {
     /// The fragment graph of the streaming job being built.
     building_graph: StreamFragmentGraph,
@@ -655,50 +666,96 @@ impl CompleteStreamFragmentGraph {
                             (source_job_id, edge)
                         }
                         DdlType::MaterializedView | DdlType::Sink | DdlType::Index => {
-                            // handle MV on MV
+                            // handle MV on MV/Source
 
                             // Build the extra edges between the upstream `Materialize` and the downstream `StreamScan`
                             // of the new materialized view.
-                            let mview_fragment = upstream_root_fragments
+                            let upstream_fragment = upstream_root_fragments
                                 .get(&upstream_table_id)
                                 .context("upstream materialized view fragment not found")?;
-                            let mview_id = GlobalFragmentId::new(mview_fragment.fragment_id);
-
-                            // Resolve the required output columns from the upstream materialized view.
-                            let (dist_key_indices, output_indices) = {
-                                let nodes = mview_fragment.actors[0].get_nodes().unwrap();
-                                let mview_node =
-                                    nodes.get_node_body().unwrap().as_materialize().unwrap();
-                                let all_column_ids = mview_node.column_ids();
-                                let dist_key_indices = mview_node.dist_key_indices();
-                                let output_indices = output_columns
-                                    .iter()
-                                    .map(|c| {
-                                        all_column_ids
-                                            .iter()
-                                            .position(|&id| id == *c)
-                                            .map(|i| i as u32)
-                                    })
-                                    .collect::<Option<Vec<_>>>()
-                                    .context(
-                                        "column not found in the upstream materialized view",
-                                    )?;
-                                (dist_key_indices, output_indices)
-                            };
-                            let dispatch_strategy = mv_on_mv_dispatch_strategy(
-                                uses_arrangement_backfill,
-                                dist_key_indices,
-                                output_indices,
-                            );
-                            let edge = StreamFragmentEdge {
-                                id: EdgeId::UpstreamExternal {
-                                    upstream_table_id,
-                                    downstream_fragment_id: id,
-                                },
-                                dispatch_strategy,
-                            };
-
-                            (mview_id, edge)
+                            let upstream_root_fragment_id =
+                                GlobalFragmentId::new(upstream_fragment.fragment_id);
+
+                            if upstream_fragment.fragment_type_mask & FragmentTypeFlag::Mview as u32
+                                != 0
+                            {
+                                // Resolve the required output columns from the upstream materialized view.
+                                let (dist_key_indices, output_indices) = {
+                                    let nodes = upstream_fragment.actors[0].get_nodes().unwrap();
+                                    let mview_node =
+                                        nodes.get_node_body().unwrap().as_materialize().unwrap();
+                                    let all_column_ids = mview_node.column_ids();
+                                    let dist_key_indices = mview_node.dist_key_indices();
+                                    let output_indices = output_columns
+                                        .map_collect::<_, _, _, Option<Vec<_>>>(|c| {
+                                            all_column_ids
+                                                .iter()
+                                                .position(|&id| id == *c)
+                                                .map(|i| i as u32)
+                                        })
+                                        .context(
+                                            "column not found in the upstream materialized view",
+                                        )?;
+                                    (dist_key_indices, output_indices)
+                                };
+                                let dispatch_strategy = mv_on_mv_dispatch_strategy(
+                                    uses_arrangement_backfill,
+                                    dist_key_indices,
+                                    output_indices,
+                                );
+                                let edge = StreamFragmentEdge {
+                                    id: EdgeId::UpstreamExternal {
+                                        upstream_table_id,
+                                        downstream_fragment_id: id,
+                                    },
+                                    dispatch_strategy,
+                                };
+
+                                (upstream_root_fragment_id, edge)
+                            } else if upstream_fragment.fragment_type_mask
+                                & FragmentTypeFlag::Source as u32
+                                != 0
+                            {
+                                let source_fragment = upstream_root_fragments
+                                    .get(&upstream_table_id)
+                                    .context("upstream source fragment not found")?;
+                                let source_job_id =
+                                    GlobalFragmentId::new(source_fragment.fragment_id);
+
+                                let output_indices = {
+                                    let nodes = upstream_fragment.actors[0].get_nodes().unwrap();
+                                    let source_node =
+                                        nodes.get_node_body().unwrap().as_source().unwrap();
+
+                                    let all_column_ids = source_node.column_ids().unwrap();
+                                    output_columns
+                                        .map_collect::<_, _, _, Option<Vec<_>>>(|c| {
+                                            all_column_ids
+                                                .iter()
+                                                .position(|&id| id == *c)
+                                                .map(|i| i as u32)
+                                        })
+                                        .context("column not found in the upstream source node")?
+                                };
+
+                                let edge = StreamFragmentEdge {
+                                    id: EdgeId::UpstreamExternal {
+                                        upstream_table_id,
+                                        downstream_fragment_id: id,
+                                    },
+                                    // We always use `NoShuffle` for the exchange between the upstream
+                                    // `Source` and the downstream `StreamScan` of the new MV.
+                                    dispatch_strategy: DispatchStrategy {
+                                        r#type: DispatcherType::NoShuffle as _,
+                                        dist_key_indices: vec![], // not used for `NoShuffle`
+                                        output_indices,
+                                    },
+                                };
+
+                                (source_job_id, edge)
+                            } else {
+                                bail!("the upstream fragment should be a MView or Source, got fragment type: {:b}", upstream_fragment.fragment_type_mask)
+                            }
                         }
                         DdlType::Source | DdlType::Table(_) => {
                             bail!("the streaming job shouldn't have an upstream fragment, ddl_type: {:?}", ddl_type)
diff --git a/src/meta/src/stream/stream_graph/schedule.rs b/src/meta/src/stream/stream_graph/schedule.rs
index ed2dac5be0e06..1ae24ec1b7d51 100644
--- a/src/meta/src/stream/stream_graph/schedule.rs
+++ b/src/meta/src/stream/stream_graph/schedule.rs
@@ -326,6 +326,7 @@ impl Scheduler {
 
 /// [`Locations`] represents the parallel unit and worker locations of the actors.
 #[cfg_attr(test, derive(Default))]
+#[derive(Debug)]
 pub struct Locations {
     /// actor location map.
     pub actor_locations: BTreeMap<ActorId, ParallelUnit>,
diff --git a/src/meta/src/stream/stream_manager.rs b/src/meta/src/stream/stream_manager.rs
index 19daad681506e..7c38477eaa49c 100644
--- a/src/meta/src/stream/stream_manager.rs
+++ b/src/meta/src/stream/stream_manager.rs
@@ -354,6 +354,7 @@ impl GlobalStreamManager {
                         }
                     }
                     CreatingState::Created => {
+                        tracing::debug!(id=?table_id, "streaming job created");
                         self.creating_job_info.delete_job(table_id).await;
                         return Ok(());
                     }
@@ -435,6 +436,7 @@ impl GlobalStreamManager {
                         actor_id: actors.clone(),
                     })
                     .await?;
+                tracing::debug!("build actors finished");
 
                 Ok(()) as MetaResult<()>
             })
@@ -507,7 +509,7 @@ impl GlobalStreamManager {
                 .await?;
 
             let dummy_table_id = table_fragments.table_id();
-
+            // TODO: need change?
             let init_split_assignment =
                 self.source_manager.allocate_splits(&dummy_table_id).await?;
 
@@ -524,7 +526,16 @@ impl GlobalStreamManager {
 
         let table_id = table_fragments.table_id();
 
-        let init_split_assignment = self.source_manager.allocate_splits(&table_id).await?;
+        // Here we need to consider:
+        // - Source with streaming job (backfill-able source)
+        // - Table with connector
+        // - MV on backfill-able source
+        let mut init_split_assignment = self.source_manager.allocate_splits(&table_id).await?;
+        init_split_assignment.extend(
+            self.source_manager
+                .allocate_splits_for_backfill(&table_id, &dispatchers)
+                .await?,
+        );
 
         let command = Command::CreateStreamingJob {
             table_fragments,
@@ -535,7 +546,7 @@ impl GlobalStreamManager {
             ddl_type,
             replace_table: replace_table_command,
         };
-
+        tracing::trace!(?command, "sending first barrier for creating streaming job");
         if let Err(err) = self.barrier_scheduler.run_command(command).await {
             if create_type == CreateType::Foreground {
                 let mut table_ids = HashSet::from_iter(std::iter::once(table_id));
@@ -569,7 +580,7 @@ impl GlobalStreamManager {
             .await?;
 
         let dummy_table_id = table_fragments.table_id();
-
+        // TODO: need change?
         let init_split_assignment = self.source_manager.allocate_splits(&dummy_table_id).await?;
 
         if let Err(err) = self
diff --git a/src/prost/src/lib.rs b/src/prost/src/lib.rs
index dafa3a568780f..57094cbc8abed 100644
--- a/src/prost/src/lib.rs
+++ b/src/prost/src/lib.rs
@@ -190,6 +190,61 @@ impl stream_plan::MaterializeNode {
     }
 }
 
+impl stream_plan::SourceNode {
+    pub fn column_ids(&self) -> Option<Vec<i32>> {
+        Some(
+            self.source_inner
+                .as_ref()?
+                .columns
+                .iter()
+                .map(|c| c.get_column_desc().unwrap().column_id)
+                .collect(),
+        )
+    }
+}
+
+impl stream_plan::StreamNode {
+    /// Find the external stream source info inside the stream node, if any.
+    ///
+    /// Returns `source_id`.
+    pub fn find_stream_source(&self) -> Option<u32> {
+        if let Some(crate::stream_plan::stream_node::NodeBody::Source(source)) =
+            self.node_body.as_ref()
+        {
+            if let Some(inner) = &source.source_inner {
+                return Some(inner.source_id);
+            }
+        }
+
+        for child in &self.input {
+            if let Some(source) = child.find_stream_source() {
+                return Some(source);
+            }
+        }
+
+        None
+    }
+
+    /// Find the external stream source info inside the stream node, if any.
+    ///
+    /// Returns `source_id`.
+    pub fn find_source_backfill(&self) -> Option<u32> {
+        if let Some(crate::stream_plan::stream_node::NodeBody::SourceBackfill(source)) =
+            self.node_body.as_ref()
+        {
+            return Some(source.source_id);
+        }
+
+        for child in &self.input {
+            if let Some(source) = child.find_source_backfill() {
+                return Some(source);
+            }
+        }
+
+        None
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use crate::data::{data_type, DataType};
diff --git a/src/stream/Cargo.toml b/src/stream/Cargo.toml
index dcc2a6f3a4cb5..68570c2240330 100644
--- a/src/stream/Cargo.toml
+++ b/src/stream/Cargo.toml
@@ -22,6 +22,7 @@ async-trait = "0.1"
 auto_enums = "0.8"
 await-tree = { workspace = true }
 bytes = "1"
+cfg-if = "1"
 delta_btree_map = { path = "../utils/delta_btree_map" }
 educe = "0.5"
 either = "1"
@@ -55,6 +56,7 @@ risingwave_rpc_client = { workspace = true }
 risingwave_source = { workspace = true }
 risingwave_storage = { workspace = true }
 rw_futures_util = { workspace = true }
+serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 smallvec = "1"
 static_assertions = "1"
@@ -90,7 +92,6 @@ risingwave_hummock_sdk = { workspace = true, features = ["enable_test_epoch"] }
 risingwave_hummock_test = { path = "../storage/hummock_test", features = [
     "test",
 ] }
-serde = { version = "1.0", features = ["derive"] }
 serde_yaml = "0.9"
 tracing-test = "0.2"
 
diff --git a/src/stream/src/executor/exchange/output.rs b/src/stream/src/executor/exchange/output.rs
index 7b85633bdd18c..d94e4cfbac841 100644
--- a/src/stream/src/executor/exchange/output.rs
+++ b/src/stream/src/executor/exchange/output.rs
@@ -67,6 +67,13 @@ impl LocalOutput {
     }
 }
 
+impl Drop for LocalOutput {
+    fn drop(&mut self) {
+        let backtrace = std::backtrace::Backtrace::capture();
+        tracing::debug!(actor_id=?self.actor_id, "dropping LocalOutput, backtrace: {}", backtrace);
+    }
+}
+
 #[async_trait]
 impl Output for LocalOutput {
     async fn send(&mut self, message: Message) -> StreamResult<()> {
@@ -76,7 +83,7 @@ impl Output for LocalOutput {
             .await
             .map_err(|SendError(message)| {
                 anyhow!(
-                    "failed to send message to actor {}: {:?}",
+                    "failed to send message to actor {}, message: {:?}",
                     self.actor_id,
                     message
                 )
@@ -130,7 +137,7 @@ impl Output for RemoteOutput {
             .await
             .map_err(|SendError(message)| {
                 anyhow!(
-                    "failed to send message to actor {}: {:#?}",
+                    "failed to send message to actor {}, message: {:?}",
                     self.actor_id,
                     message
                 )
diff --git a/src/stream/src/executor/source/executor_core.rs b/src/stream/src/executor/source/executor_core.rs
index 82f151bc9ce31..7ea82017c4da4 100644
--- a/src/stream/src/executor/source/executor_core.rs
+++ b/src/stream/src/executor/source/executor_core.rs
@@ -41,6 +41,9 @@ pub struct StreamSourceCore<S: StateStore> {
     pub(crate) split_state_store: SourceStateTableHandler<S>,
 
     /// In-memory cache for the splits.
+    ///
+    /// Source messages will only write the cache.
+    /// It is read on split change and rebuild stream reader on error.
     pub(crate) state_cache: HashMap<SplitId, SplitImpl>,
 }
 
diff --git a/src/stream/src/executor/source/fetch_executor.rs b/src/stream/src/executor/source/fetch_executor.rs
index 3aa885cfffe1b..1c7254fe5bc3b 100644
--- a/src/stream/src/executor/source/fetch_executor.rs
+++ b/src/stream/src/executor/source/fetch_executor.rs
@@ -293,7 +293,7 @@ impl<S: StateStore, Src: OpendalSource> FsFetchExecutor<S, Src> {
                                             )
                                         })
                                         .collect();
-                                    state_store_handler.take_snapshot(file_assignment).await?;
+                                    state_store_handler.set_states(file_assignment).await?;
                                     state_store_handler.state_store.try_flush().await?;
                                 }
                                 _ => unreachable!(),
diff --git a/src/stream/src/executor/source/fs_source_executor.rs b/src/stream/src/executor/source/fs_source_executor.rs
index 6275ef5d116f6..584bdd2992958 100644
--- a/src/stream/src/executor/source/fs_source_executor.rs
+++ b/src/stream/src/executor/source/fs_source_executor.rs
@@ -236,7 +236,7 @@ impl<S: StateStore> FsSourceExecutor<S> {
 
         if !incompleted.is_empty() {
             tracing::debug!(actor_id = self.actor_ctx.id, incompleted = ?incompleted, "take snapshot");
-            core.split_state_store.take_snapshot(incompleted).await?
+            core.split_state_store.set_states(incompleted).await?
         }
 
         if !completed.is_empty() {
diff --git a/src/stream/src/executor/source/kafka_backfill_executor.rs b/src/stream/src/executor/source/kafka_backfill_executor.rs
index d1aef5a6bbf2e..00ca63d433c01 100644
--- a/src/stream/src/executor/source/kafka_backfill_executor.rs
+++ b/src/stream/src/executor/source/kafka_backfill_executor.rs
@@ -12,11 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+// FIXME: rebuild_stream_reader_from_error
+
 use std::assert_matches::assert_matches;
 use std::cmp::Ordering;
 use std::fmt::Formatter;
 use std::pin::pin;
-use std::time::Duration;
 
 use anyhow::anyhow;
 use either::Either;
@@ -24,35 +25,54 @@ use futures::stream::{select_with_strategy, AbortHandle, Abortable};
 use futures::StreamExt;
 use futures_async_stream::try_stream;
 use risingwave_common::buffer::BitmapBuilder;
-use risingwave_common::metrics::GLOBAL_ERROR_METRICS;
-use risingwave_common::row::Row;
+use risingwave_common::row::{Row, RowExt};
 use risingwave_common::system_param::local_manager::SystemParamsReaderRef;
+use risingwave_common::types::JsonbVal;
 use risingwave_connector::source::{
-    BoxSourceWithStateStream, ConnectorState, SourceContext, SourceCtrlOpts, SplitId,
-    SplitMetaData, StreamChunkWithState,
+    BoxSourceWithStateStream, ConnectorState, SourceContext, SourceCtrlOpts, SplitMetaData,
+    StreamChunkWithState,
 };
 use risingwave_connector::ConnectorParams;
+use risingwave_pb::plan_common::AdditionalColumnType;
 use risingwave_source::source_desc::{SourceDesc, SourceDescBuilder};
 use risingwave_storage::StateStore;
-use thiserror_ext::AsReport;
-use tokio::sync::mpsc::UnboundedReceiver;
-use tokio::time::Instant;
+use serde::{Deserialize, Serialize};
 
 use super::executor_core::StreamSourceCore;
+use super::kafka_backfill_state_table::BackfillStateTableHandler;
 use crate::executor::monitor::StreamingMetrics;
-use crate::executor::stream_reader::StreamReaderWithPause;
 use crate::executor::*;
 
-type ExecutorSplitId = String;
+pub type SplitId = String;
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub enum BackfillState {
+    /// `None` means not started yet. It's the initial state.
+    Backfilling(Option<String>),
+    /// Backfill is stopped at this offset. Source needs to filter out messages before this offset.
+    SourceCachingUp(String),
+    Finished,
+}
+pub type BackfillStates = HashMap<SplitId, BackfillState>;
+
+impl BackfillState {
+    // TODO: use a more compact encoding?
+    pub fn encode_to_json(self) -> JsonbVal {
+        serde_json::to_value(self).unwrap().into()
+    }
+
+    pub fn restore_from_json(value: JsonbVal) -> anyhow::Result<Self> {
+        serde_json::from_value(value.take()).map_err(|e| anyhow!(e))
+    }
+}
 
 /// A constant to multiply when calculating the maximum time to wait for a barrier. This is due to
 /// some latencies in network and cost in meta.
 const WAIT_BARRIER_MULTIPLE_TIMES: u128 = 5;
 
 pub struct KafkaBackfillExecutorWrapper<S: StateStore> {
-    inner: KafkaBackfillExecutor<S>,
+    pub inner: KafkaBackfillExecutor<S>,
     /// Upstream changelog stream which may contain metadata columns, e.g. `_rw_offset`
-    input: Box<dyn Executor>,
+    pub input: Box<dyn Executor>,
 }
 
 pub struct KafkaBackfillExecutor<S: StateStore> {
@@ -60,7 +80,9 @@ pub struct KafkaBackfillExecutor<S: StateStore> {
     info: ExecutorInfo,
 
     /// Streaming source for external
-    stream_source_core: Option<StreamSourceCore<S>>,
+    // FIXME: some fields e.g. its state table is not used. We might need to refactor
+    stream_source_core: StreamSourceCore<S>,
+    backfill_state_store: BackfillStateTableHandler<S>,
 
     /// Metrics for monitor.
     metrics: Arc<StreamingMetrics>,
@@ -82,19 +104,20 @@ impl<S: StateStore> KafkaBackfillExecutor<S> {
     pub fn new(
         actor_ctx: ActorContextRef,
         info: ExecutorInfo,
-        stream_source_core: Option<StreamSourceCore<S>>,
+        stream_source_core: StreamSourceCore<S>,
         metrics: Arc<StreamingMetrics>,
         // barrier_receiver: UnboundedReceiver<Barrier>,
         system_params: SystemParamsReaderRef,
         source_ctrl_opts: SourceCtrlOpts,
         connector_params: ConnectorParams,
+        backfill_state_store: BackfillStateTableHandler<S>,
     ) -> Self {
         Self {
             actor_ctx,
             info,
             stream_source_core,
+            backfill_state_store,
             metrics,
-            // barrier_receiver: Some(barrier_receiver),
             system_params,
             source_ctrl_opts,
             connector_params,
@@ -105,10 +128,7 @@ impl<S: StateStore> KafkaBackfillExecutor<S> {
         &self,
         source_desc: &SourceDesc,
         state: ConnectorState,
-    ) -> StreamExecutorResult<(
-        BoxSourceWithStateStream,
-        HashMap<ExecutorSplitId, AbortHandle>,
-    )> {
+    ) -> StreamExecutorResult<(BoxSourceWithStateStream, HashMap<SplitId, AbortHandle>)> {
         let column_ids = source_desc
             .columns
             .iter()
@@ -116,7 +136,7 @@ impl<S: StateStore> KafkaBackfillExecutor<S> {
             .collect_vec();
         let source_ctx = SourceContext::new_with_suppressor(
             self.actor_ctx.id,
-            self.stream_source_core.as_ref().unwrap().source_id,
+            self.stream_source_core.source_id,
             self.actor_ctx.fragment_id,
             source_desc.metrics.clone(),
             self.source_ctrl_opts.clone(),
@@ -150,240 +170,32 @@ impl<S: StateStore> KafkaBackfillExecutor<S> {
         }
     }
 
-    async fn apply_split_change<const BIASED: bool>(
-        &mut self,
-        source_desc: &SourceDesc,
-        stream: &mut StreamReaderWithPause<BIASED, StreamChunkWithState>,
-        split_assignment: &HashMap<ActorId, Vec<SplitImpl>>,
-    ) -> StreamExecutorResult<Option<Vec<SplitImpl>>> {
-        // self.metrics
-        //     .source_split_change_count
-        //     .with_label_values(
-        //         &self
-        //             .get_metric_labels()
-        //             .iter()
-        //             .map(AsRef::as_ref)
-        //             .collect::<Vec<&str>>(),
-        //     )
-        //     .inc();
-        if let Some(target_splits) = split_assignment.get(&self.actor_ctx.id).cloned() {
-            if let Some(target_state) = self.update_state_if_changed(Some(target_splits)).await? {
-                tracing::info!(
-                    actor_id = self.actor_ctx.id,
-                    state = ?target_state,
-                    "apply split change"
-                );
-
-                self.replace_stream_reader_with_target_state(
-                    source_desc,
-                    stream,
-                    target_state.clone(),
-                )
-                .await?;
-
-                return Ok(Some(target_state));
-            }
-        }
-
-        Ok(None)
-    }
-
-    // Note: `update_state_if_changed` will modify `state_cache`
-    async fn update_state_if_changed(
-        &mut self,
-        state: ConnectorState,
-    ) -> StreamExecutorResult<ConnectorState> {
-        let core = self.stream_source_core.as_mut().unwrap();
-
-        let target_splits: HashMap<_, _> = state
-            .unwrap()
-            .into_iter()
-            .map(|split| (split.id(), split))
-            .collect();
-
-        let mut target_state: Vec<SplitImpl> = Vec::with_capacity(target_splits.len());
-
-        let mut split_changed = false;
-
-        for (split_id, split) in &target_splits {
-            if let Some(s) = core.state_cache.get(split_id) {
-                // existing split, no change, clone from cache
-                target_state.push(s.clone())
-            } else {
-                split_changed = true;
-                // write new assigned split to state cache. snapshot is base on cache.
-
-                let initial_state = if let Some(recover_state) = core
-                    .split_state_store
-                    .try_recover_from_state_store(split)
-                    .await?
-                {
-                    recover_state
-                } else {
-                    split.clone()
-                };
-
-                core.state_cache
-                    .entry(split.id())
-                    .or_insert_with(|| initial_state.clone());
-
-                target_state.push(initial_state);
-            }
-        }
-
-        // state cache may be stale
-        for existing_split_id in core.stream_source_splits.keys() {
-            if !target_splits.contains_key(existing_split_id) {
-                tracing::info!("split dropping detected: {}", existing_split_id);
-                split_changed = true;
-            }
-        }
-
-        Ok(split_changed.then_some(target_state))
-    }
-
-    /// Rebuild stream if there is a err in stream
-    async fn rebuild_stream_reader_from_error<const BIASED: bool>(
-        &mut self,
-        source_desc: &SourceDesc,
-        stream: &mut StreamReaderWithPause<BIASED, StreamChunkWithState>,
-        split_info: &mut [SplitImpl],
-        e: StreamExecutorError,
-    ) -> StreamExecutorResult<()> {
-        let core = self.stream_source_core.as_mut().unwrap();
-        tracing::warn!(
-            "stream source reader error, actor: {:?}, source: {:?}",
-            self.actor_ctx.id,
-            core.source_id,
-        );
-        GLOBAL_ERROR_METRICS.user_source_reader_error.report([
-            "SourceReaderError".to_owned(),
-            e.to_report_string(),
-            "KafkaBackfillExecutor".to_owned(),
-            self.actor_ctx.id.to_string(),
-            core.source_id.to_string(),
-        ]);
-        // fetch the newest offset, either it's in cache (before barrier)
-        // or in state table (just after barrier)
-        let target_state = if core.state_cache.is_empty() {
-            for ele in &mut *split_info {
-                if let Some(recover_state) = core
-                    .split_state_store
-                    .try_recover_from_state_store(ele)
-                    .await?
-                {
-                    *ele = recover_state;
-                }
-            }
-            split_info.to_owned()
-        } else {
-            core.state_cache
-                .values()
-                .map(|split_impl| split_impl.to_owned())
-                .collect_vec()
-        };
-
-        self.replace_stream_reader_with_target_state(source_desc, stream, target_state)
-            .await
-    }
-
-    async fn replace_stream_reader_with_target_state<const BIASED: bool>(
-        &mut self,
-        source_desc: &SourceDesc,
-        stream: &mut StreamReaderWithPause<BIASED, StreamChunkWithState>,
-        target_state: Vec<SplitImpl>,
-    ) -> StreamExecutorResult<()> {
-        tracing::info!(
-            "actor {:?} apply source split change to {:?}",
-            self.actor_ctx.id,
-            target_state
-        );
-
-        // Replace the source reader with a new one of the new state.
-
-        let (reader, abort_handles) = self
-            .build_stream_source_reader(source_desc, Some(target_state.clone()))
-            .await?;
-
-        stream.replace_data_stream(reader);
-
-        Ok(())
-    }
-
-    async fn take_snapshot_and_clear_cache(
-        &mut self,
-        epoch: EpochPair,
-        target_state: Option<Vec<SplitImpl>>,
-        should_trim_state: bool,
-    ) -> StreamExecutorResult<()> {
-        let core = self.stream_source_core.as_mut().unwrap();
-
-        let mut cache = core
-            .state_cache
-            .values()
-            .map(|split_impl| split_impl.to_owned())
-            .collect_vec();
-
-        if let Some(target_splits) = target_state {
-            let target_split_ids: HashSet<_> =
-                target_splits.iter().map(|split| split.id()).collect();
-
-            cache.retain(|split| target_split_ids.contains(&split.id()));
-
-            let dropped_splits = core
-                .stream_source_splits
-                .extract_if(|split_id, _| !target_split_ids.contains(split_id))
-                .map(|(_, split)| split)
-                .collect_vec();
-
-            if should_trim_state && !dropped_splits.is_empty() {
-                // trim dropped splits' state
-                core.split_state_store.trim_state(&dropped_splits).await?;
-            }
-
-            core.stream_source_splits = target_splits
-                .into_iter()
-                .map(|split| (split.id(), split))
-                .collect();
-        }
-
-        if !cache.is_empty() {
-            tracing::debug!(actor_id = self.actor_ctx.id, state = ?cache, "take snapshot");
-            core.split_state_store.take_snapshot(cache).await?
-        }
-        // commit anyway, even if no message saved
-        core.split_state_store.state_store.commit(epoch).await?;
-
-        core.state_cache.clear();
-
-        Ok(())
-    }
-
-    async fn try_flush_data(&mut self) -> StreamExecutorResult<()> {
-        let core = self.stream_source_core.as_mut().unwrap();
-        core.split_state_store.state_store.try_flush().await?;
-
-        Ok(())
-    }
-
     #[try_stream(ok = Message, error = StreamExecutorError)]
     async fn execute(mut self, input: BoxedExecutor) {
-        // TODO: these can be inferred in frontend by checking additional_column_type
-        let split_column_idx = 1;
-        let offset_column_idx = 2;
-
         let mut input = input.execute();
 
         // Poll the upstream to get the first barrier.
         let barrier = expect_first_barrier(&mut input).await?;
+        tracing::debug!("KafkaBackfillExecutor got first barrier: {barrier:?}");
 
-        let mut core = self.stream_source_core.unwrap();
+        let mut core = self.stream_source_core;
 
         // Build source description from the builder.
         let source_desc_builder: SourceDescBuilder = core.source_desc_builder.take().unwrap();
         let source_desc = source_desc_builder
             .build()
             .map_err(StreamExecutorError::connector_error)?;
+        let split_column_idx = source_desc
+            .columns
+            .iter()
+            .position(|c| matches!(c.additional_column_type, AdditionalColumnType::Partition))
+            .expect("kafka source should have partition column");
+        let offset_column_idx = source_desc
+            .columns
+            .iter()
+            .position(|c| matches!(c.additional_column_type, AdditionalColumnType::Offset))
+            .expect("kafka source should have offset column");
+        tracing::debug!(?split_column_idx, ?offset_column_idx);
 
         let mut boot_state = Vec::default();
         if let Some(mutation) = barrier.mutation.as_ref() {
@@ -405,27 +217,39 @@ impl<S: StateStore> KafkaBackfillExecutor<S> {
                 _ => {}
             }
         }
-        let mut latest_split_info = boot_state.clone();
+        let latest_split_info = boot_state.clone();
+        tracing::debug!("latest_split_info: {:?}", latest_split_info);
+
+        self.backfill_state_store.init_epoch(barrier.epoch);
 
-        core.split_state_store.init_epoch(barrier.epoch);
+        let mut backfill_states: BackfillStates = HashMap::new();
 
-        for ele in &mut boot_state {
-            if let Some(recover_state) = core
-                .split_state_store
+        let mut unfinished_splits = vec![];
+        for ele in boot_state {
+            let split_id = ele.id().to_string();
+            let (split, backfill_state) = self
+                .backfill_state_store
                 .try_recover_from_state_store(ele)
-                .await?
-            {
-                *ele = recover_state;
+                .await?;
+
+            backfill_states.insert(split_id, backfill_state);
+            if split.is_some() {
+                unfinished_splits.push(split.unwrap());
             }
         }
+        let need_backfill = backfill_states
+            .values()
+            .any(|state| !matches!(state, BackfillState::Finished));
+        tracing::debug!("KafkaBackfillExecutor backfill_state: {backfill_states:?}");
 
         // init in-memory split states with persisted state if any
-        core.init_split_state(boot_state.clone());
+        core.init_split_state(unfinished_splits.clone());
 
         // Return the ownership of `stream_source_core` to the source executor.
-        self.stream_source_core = Some(core);
+        self.stream_source_core = core;
 
-        let recover_state: ConnectorState = (!boot_state.is_empty()).then_some(boot_state);
+        let recover_state: ConnectorState =
+            (!unfinished_splits.is_empty()).then_some(unfinished_splits);
         tracing::info!(actor_id = self.actor_ctx.id, state = ?recover_state, "start with state");
         let (source_chunk_reader, abort_handles) = self
             .build_stream_source_reader(&source_desc, recover_state)
@@ -456,33 +280,26 @@ impl<S: StateStore> KafkaBackfillExecutor<S> {
             |_: &mut ()| futures::stream::PollNext::Left,
         );
 
-        #[derive(Debug)]
-        enum BackfillState {
-            /// `None` means not started yet. It's the initial state.
-            Backfilling(Option<String>),
-            /// Backfill is stopped at this offset. Source needs to filter out messages before this offset.
-            SourceCachingUp(String),
-            Finished,
-        }
-
-        let split_ids = abort_handles.keys();
-        // TODO: recover from state store
-        let mut backfill_state: HashMap<ExecutorSplitId, BackfillState> = split_ids
-            .map(|k| (k.clone(), BackfillState::Backfilling(None)))
-            .collect();
-        let need_backfill = backfill_state
-            .values()
-            .any(|state| !matches!(state, BackfillState::Finished));
-
         if need_backfill {
             #[for_await]
             'backfill_loop: for either in &mut backfill_stream {
+                tracing::debug!("KafkaBackfillExecutor got msg either: {either:?}");
+
                 match either {
                     // Upstream
                     Either::Left(msg) => {
                         match msg? {
                             Message::Barrier(barrier) => {
-                                // TODO: handle split change etc. & Persist progress.
+                                // TODO: handle split change etc.
+
+                                self.backfill_state_store
+                                    .set_states(backfill_states.clone())
+                                    .await?;
+                                self.backfill_state_store
+                                    .state_store
+                                    .commit(barrier.epoch)
+                                    .await?;
+
                                 yield Message::Barrier(barrier);
                             }
                             Message::Chunk(chunk) => {
@@ -490,12 +307,11 @@ impl<S: StateStore> KafkaBackfillExecutor<S> {
                                 // Note: We assume offset from the source is monotonically increasing for the algorithm to work correctly.
                                 let mut new_vis = BitmapBuilder::zeroed(chunk.visibility().len());
                                 for (i, (_, row)) in chunk.rows().enumerate() {
-                                    let split =
-                                        row.datum_at(split_column_idx).unwrap().into_int64();
+                                    tracing::debug!(row = %row.display());
+                                    let split = row.datum_at(split_column_idx).unwrap().into_utf8();
                                     let offset =
-                                        row.datum_at(offset_column_idx).unwrap().into_int64();
-                                    let backfill_state =
-                                        backfill_state.get_mut(&split.to_string()).unwrap();
+                                        row.datum_at(offset_column_idx).unwrap().into_utf8();
+                                    let backfill_state = backfill_states.get_mut(split).unwrap();
                                     match backfill_state {
                                         BackfillState::Backfilling(backfill_offset) => {
                                             new_vis.set(i, false);
@@ -509,10 +325,7 @@ impl<S: StateStore> KafkaBackfillExecutor<S> {
                                                 Ordering::Equal => {
                                                     // backfilling for this split is finished just right.
                                                     *backfill_state = BackfillState::Finished;
-                                                    abort_handles
-                                                        .get(&split.to_string())
-                                                        .unwrap()
-                                                        .abort();
+                                                    abort_handles.get(split).unwrap().abort();
                                                 }
                                                 Ordering::Greater => {
                                                     // backfilling for this split produced more data.
@@ -562,7 +375,7 @@ impl<S: StateStore> KafkaBackfillExecutor<S> {
                                     yield Message::Chunk(new_chunk);
                                 }
                                 // TODO: maybe use a counter to optimize this
-                                if backfill_state
+                                if backfill_states
                                     .values()
                                     .all(|state| matches!(state, BackfillState::Finished))
                                 {
@@ -584,18 +397,16 @@ impl<S: StateStore> KafkaBackfillExecutor<S> {
                         let split_offset_mapping =
                             split_offset_mapping.expect("kafka source should have offsets");
 
-                        let state: HashMap<_, _> = split_offset_mapping
+                        let _state: HashMap<_, _> = split_offset_mapping
                             .iter()
                             .flat_map(|(split_id, offset)| {
                                 let origin_split_impl = self
                                     .stream_source_core
-                                    .as_mut()
-                                    .unwrap()
                                     .stream_source_splits
                                     .get_mut(split_id);
 
                                 // update backfill progress
-                                let prev_state = backfill_state.insert(
+                                let prev_state = backfill_states.insert(
                                     split_id.to_string(),
                                     BackfillState::Backfilling(Some(offset.to_string())),
                                 );
@@ -613,14 +424,14 @@ impl<S: StateStore> KafkaBackfillExecutor<S> {
                             })
                             .try_collect()?;
 
-                        self.stream_source_core
-                            .as_mut()
-                            .unwrap()
-                            .state_cache
-                            .extend(state);
+                        // self.stream_source_core
+                        //     .as_mut()
+                        //     .unwrap()
+                        //     .state_cache
+                        //     .extend(state);
 
                         yield Message::Chunk(chunk);
-                        self.try_flush_data().await?;
+                        // self.try_flush_data().await?;
                     }
                 }
             }
@@ -629,7 +440,11 @@ impl<S: StateStore> KafkaBackfillExecutor<S> {
         // All splits finished backfilling. Now we only forward the source data.
         #[for_await]
         for msg in input {
-            match msg? {
+            let msg = msg?;
+
+            tracing::debug!("KafkaBackfillExecutor got msg: {msg:?}");
+
+            match msg {
                 Message::Barrier(barrier) => {
                     // TODO: How to handle a split change here?
                     // We might need to persist its state. Is is possible that we need to backfill?
@@ -646,10 +461,11 @@ impl<S: StateStore> KafkaBackfillExecutor<S> {
     }
 }
 
-fn compare_kafka_offset(a: Option<&String>, b: i64) -> Ordering {
+fn compare_kafka_offset(a: Option<&String>, b: &str) -> Ordering {
     match a {
         Some(a) => {
             let a = a.parse::<i64>().unwrap();
+            let b = b.parse::<i64>().unwrap();
             a.cmp(&b)
         }
         None => Ordering::Less,
@@ -676,14 +492,11 @@ impl<S: StateStore> Executor for KafkaBackfillExecutorWrapper<S> {
 
 impl<S: StateStore> Debug for KafkaBackfillExecutor<S> {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        if let Some(core) = &self.stream_source_core {
-            f.debug_struct("KafkaBackfillExecutor")
-                .field("source_id", &core.source_id)
-                .field("column_ids", &core.column_ids)
-                .field("pk_indices", &self.info.pk_indices)
-                .finish()
-        } else {
-            f.debug_struct("KafkaBackfillExecutor").finish()
-        }
+        let core = &self.stream_source_core;
+        f.debug_struct("KafkaBackfillExecutor")
+            .field("source_id", &core.source_id)
+            .field("column_ids", &core.column_ids)
+            .field("pk_indices", &self.info.pk_indices)
+            .finish()
     }
 }
diff --git a/src/stream/src/executor/source/kafka_backfill_state_table.rs b/src/stream/src/executor/source/kafka_backfill_state_table.rs
new file mode 100644
index 0000000000000..d478140cc4287
--- /dev/null
+++ b/src/stream/src/executor/source/kafka_backfill_state_table.rs
@@ -0,0 +1,132 @@
+// Copyright 2024 RisingWave Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use risingwave_common::constants::hummock::PROPERTIES_RETENTION_SECOND_KEY;
+use risingwave_common::row::{OwnedRow, Row};
+use risingwave_common::types::{JsonbVal, ScalarImpl, ScalarRef, ScalarRefImpl};
+use risingwave_common::util::epoch::EpochPair;
+use risingwave_common::{bail, row};
+use risingwave_connector::source::{SplitImpl, SplitMetaData};
+use risingwave_pb::catalog::PbTable;
+use risingwave_storage::StateStore;
+
+use super::kafka_backfill_executor::{BackfillState, BackfillStates, SplitId};
+use crate::common::table::state_table::StateTable;
+use crate::executor::error::StreamExecutorError;
+use crate::executor::StreamExecutorResult;
+
+pub struct BackfillStateTableHandler<S: StateStore> {
+    pub state_store: StateTable<S>,
+}
+
+impl<S: StateStore> BackfillStateTableHandler<S> {
+    pub async fn from_table_catalog(table_catalog: &PbTable, store: S) -> Self {
+        // The state of source should not be cleaned up by retention_seconds
+        assert!(!table_catalog
+            .properties
+            .contains_key(&String::from(PROPERTIES_RETENTION_SECOND_KEY)));
+
+        Self {
+            state_store: StateTable::from_table_catalog(table_catalog, store, None).await,
+        }
+    }
+
+    pub fn init_epoch(&mut self, epoch: EpochPair) {
+        self.state_store.init_epoch(epoch);
+    }
+
+    fn string_to_scalar(rhs: impl Into<String>) -> ScalarImpl {
+        ScalarImpl::Utf8(rhs.into().into_boxed_str())
+    }
+
+    pub(crate) async fn get(&self, key: &SplitId) -> StreamExecutorResult<Option<OwnedRow>> {
+        self.state_store
+            .get_row(row::once(Some(Self::string_to_scalar(key))))
+            .await
+            .map_err(StreamExecutorError::from)
+    }
+
+    pub async fn set(&mut self, key: SplitId, value: JsonbVal) -> StreamExecutorResult<()> {
+        let row = [
+            Some(Self::string_to_scalar(&key)),
+            Some(ScalarImpl::Jsonb(value)),
+        ];
+        match self.get(&key).await? {
+            Some(prev_row) => {
+                self.state_store.update(prev_row, row);
+            }
+            None => {
+                self.state_store.insert(row);
+            }
+        }
+        Ok(())
+    }
+
+    pub async fn delete(&mut self, key: &SplitId) -> StreamExecutorResult<()> {
+        if let Some(prev_row) = self.get(key).await? {
+            self.state_store.delete(prev_row);
+        }
+
+        Ok(())
+    }
+
+    pub async fn set_states(&mut self, states: BackfillStates) -> StreamExecutorResult<()> {
+        if states.is_empty() {
+            // TODO should be a clear Error Code
+            bail!("states require not null");
+        } else {
+            for (split_id, state) in states {
+                self.set(split_id, state.encode_to_json()).await?;
+            }
+        }
+        Ok(())
+    }
+
+    // pub async fn trim_state(&mut self, to_trim: &[SplitImpl]) -> StreamExecutorResult<()> {
+    //     for split in to_trim {
+    //         tracing::info!("trimming source state for split {}", split.id());
+    //         self.delete(split.id()).await?;
+    //     }
+
+    //     Ok(())
+    // }
+
+    /// `None` means no need to read from the split anymore (backfill finished)
+    pub async fn try_recover_from_state_store(
+        &mut self,
+        mut stream_source_split: SplitImpl,
+    ) -> StreamExecutorResult<(Option<SplitImpl>, BackfillState)> {
+        Ok(
+            match self.get(&stream_source_split.id().to_string()).await? {
+                None => (Some(stream_source_split), BackfillState::Backfilling(None)),
+                Some(row) => match row.datum_at(1) {
+                    Some(ScalarRefImpl::Jsonb(jsonb_ref)) => {
+                        let state = BackfillState::restore_from_json(jsonb_ref.to_owned_scalar())?;
+                        let new_split = match &state {
+                            BackfillState::Backfilling(None) => Some(stream_source_split),
+                            BackfillState::Backfilling(Some(offset)) => {
+                                stream_source_split.update_in_place(offset.clone())?;
+                                Some(stream_source_split)
+                            }
+                            BackfillState::SourceCachingUp(_) => None,
+                            BackfillState::Finished => None,
+                        };
+                        (new_split, state)
+                    }
+                    _ => unreachable!(),
+                },
+            },
+        )
+    }
+}
diff --git a/src/stream/src/executor/source/mod.rs b/src/stream/src/executor/source/mod.rs
index dd9fb470e2f32..82ed243535136 100644
--- a/src/stream/src/executor/source/mod.rs
+++ b/src/stream/src/executor/source/mod.rs
@@ -23,6 +23,8 @@ pub mod fetch_executor;
 pub use fetch_executor::*;
 
 pub mod kafka_backfill_executor;
+pub mod kafka_backfill_state_table;
+pub use kafka_backfill_state_table::BackfillStateTableHandler;
 pub mod source_executor;
 
 pub mod list_executor;
diff --git a/src/stream/src/executor/source/source_executor.rs b/src/stream/src/executor/source/source_executor.rs
index 065dce58cfd3d..3d4ad686c05d4 100644
--- a/src/stream/src/executor/source/source_executor.rs
+++ b/src/stream/src/executor/source/source_executor.rs
@@ -170,7 +170,7 @@ impl<S: StateStore> SourceExecutor<S> {
         Ok(None)
     }
 
-    // Note: `update_state_if_changed` will modify `state_cache`
+    /// Note: `update_state_if_changed` will modify `state_cache`
     async fn update_state_if_changed(
         &mut self,
         state: ConnectorState,
@@ -291,7 +291,9 @@ impl<S: StateStore> SourceExecutor<S> {
         Ok(())
     }
 
-    async fn take_snapshot_and_clear_cache(
+    /// - `target_state`: the new split info from barrier. `None` if no split update.
+    /// - `should_trim_state`: whether to trim state for dropped splits.
+    async fn persist_state_and_clear_cache(
         &mut self,
         epoch: EpochPair,
         target_state: Option<Vec<SplitImpl>>,
@@ -330,16 +332,17 @@ impl<S: StateStore> SourceExecutor<S> {
 
         if !cache.is_empty() {
             tracing::debug!(actor_id = self.actor_ctx.id, state = ?cache, "take snapshot");
-            core.split_state_store.take_snapshot(cache).await?
+            core.split_state_store.set_states(cache).await?
         }
+
         // commit anyway, even if no message saved
         core.split_state_store.state_store.commit(epoch).await?;
-
         core.state_cache.clear();
 
         Ok(())
     }
 
+    /// try mem table spill
     async fn try_flush_data(&mut self) -> StreamExecutorResult<()> {
         let core = self.stream_source_core.as_mut().unwrap();
         core.split_state_store.state_store.try_flush().await?;
@@ -517,7 +520,8 @@ impl<S: StateStore> SourceExecutor<S> {
                                     latest_split_info = target_state.clone();
                                 }
 
-                                self.take_snapshot_and_clear_cache(
+                                // We clear cache on barrier because ..?
+                                self.persist_state_and_clear_cache(
                                     epoch,
                                     target_state,
                                     should_trim_state,
diff --git a/src/stream/src/executor/source/state_table_handler.rs b/src/stream/src/executor/source/state_table_handler.rs
index 31f20ddb9d7fa..e3f7b80e6880e 100644
--- a/src/stream/src/executor/source/state_table_handler.rs
+++ b/src/stream/src/executor/source/state_table_handler.rs
@@ -12,13 +12,23 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+cfg_if::cfg_if! {
+    if #[cfg(test)] {
+        use risingwave_common::catalog::{DatabaseId, SchemaId};
+        use risingwave_pb::catalog::table::TableType;
+        use risingwave_pb::common::{PbColumnOrder, PbDirection, PbNullsAre, PbOrderType};
+        use risingwave_pb::data::data_type::TypeName;
+        use risingwave_pb::data::DataType;
+        use risingwave_pb::plan_common::{ColumnCatalog, ColumnDesc};
+    }
+}
+
 use std::collections::HashSet;
 use std::ops::{Bound, Deref};
 use std::sync::Arc;
 
 use futures::{pin_mut, StreamExt};
 use risingwave_common::buffer::Bitmap;
-use risingwave_common::catalog::{DatabaseId, SchemaId};
 use risingwave_common::constants::hummock::PROPERTIES_RETENTION_SECOND_KEY;
 use risingwave_common::hash::VirtualNode;
 use risingwave_common::row::{OwnedRow, Row};
@@ -27,12 +37,7 @@ use risingwave_common::util::epoch::EpochPair;
 use risingwave_common::{bail, row};
 use risingwave_connector::source::{SplitId, SplitImpl, SplitMetaData};
 use risingwave_hummock_sdk::key::next_key;
-use risingwave_pb::catalog::table::TableType;
 use risingwave_pb::catalog::PbTable;
-use risingwave_pb::common::{PbColumnOrder, PbDirection, PbNullsAre, PbOrderType};
-use risingwave_pb::data::data_type::TypeName;
-use risingwave_pb::data::DataType;
-use risingwave_pb::plan_common::{ColumnCatalog, ColumnDesc};
 use risingwave_storage::store::PrefetchOptions;
 use risingwave_storage::StateStore;
 
@@ -140,10 +145,10 @@ impl<S: StateStore> SourceStateTableHandler<S> {
 
     /// set all complete
     /// can only used by `FsSourceExecutor`
-    pub(crate) async fn set_all_complete<SS>(&mut self, states: Vec<SS>) -> StreamExecutorResult<()>
-    where
-        SS: SplitMetaData,
-    {
+    pub(crate) async fn set_all_complete(
+        &mut self,
+        states: Vec<SplitImpl>,
+    ) -> StreamExecutorResult<()> {
         if states.is_empty() {
             // TODO should be a clear Error Code
             bail!("states require not null");
@@ -180,11 +185,7 @@ impl<S: StateStore> SourceStateTableHandler<S> {
         Ok(())
     }
 
-    /// This function provides the ability to persist the source state
-    /// and needs to be invoked by the ``SourceReader`` to call it,
-    /// and will return the error when the dependent ``StateStore`` handles the error.
-    /// The caller should ensure that the passed parameters are not empty.
-    pub async fn take_snapshot<SS>(&mut self, states: Vec<SS>) -> StreamExecutorResult<()>
+    pub async fn set_states<SS>(&mut self, states: Vec<SS>) -> StreamExecutorResult<()>
     where
         SS: SplitMetaData,
     {
@@ -200,10 +201,7 @@ impl<S: StateStore> SourceStateTableHandler<S> {
         Ok(())
     }
 
-    pub async fn trim_state<SS>(&mut self, to_trim: &[SS]) -> StreamExecutorResult<()>
-    where
-        SS: SplitMetaData,
-    {
+    pub async fn trim_state(&mut self, to_trim: &[SplitImpl]) -> StreamExecutorResult<()> {
         for split in to_trim {
             tracing::info!("trimming source state for split {}", split.id());
             self.delete(split.id()).await?;
@@ -228,8 +226,9 @@ impl<S: StateStore> SourceStateTableHandler<S> {
     }
 }
 
-// align with schema defined in `LogicalSource::infer_internal_table_catalog`. The function is used
-// for test purpose and should not be used in production.
+/// align with schema defined in `LogicalSource::infer_internal_table_catalog`. The function is used
+/// for test purpose and should not be used in production.
+#[cfg(test)]
 pub fn default_source_internal_table(id: u32) -> PbTable {
     let make_column = |column_type: TypeName, column_id: i32| -> ColumnCatalog {
         ColumnCatalog {
@@ -325,7 +324,7 @@ pub(crate) mod tests {
 
         state_table_handler.init_epoch(epoch_1);
         state_table_handler
-            .take_snapshot(vec![split_impl.clone()])
+            .set_states(vec![split_impl.clone()])
             .await?;
         state_table_handler.state_store.commit(epoch_2).await?;
 
diff --git a/src/stream/src/from_proto/mod.rs b/src/stream/src/from_proto/mod.rs
index 9a9e83c0a328f..5f3f710341336 100644
--- a/src/stream/src/from_proto/mod.rs
+++ b/src/stream/src/from_proto/mod.rs
@@ -42,6 +42,7 @@ mod simple_agg;
 mod sink;
 mod sort;
 mod source;
+mod source_backfill;
 mod stateless_simple_agg;
 mod stream_cdc_scan;
 mod stream_scan;
@@ -84,6 +85,7 @@ use self::simple_agg::*;
 use self::sink::*;
 use self::sort::*;
 use self::source::*;
+use self::source_backfill::*;
 use self::stateless_simple_agg::*;
 use self::stream_cdc_scan::*;
 use self::stream_scan::*;
@@ -172,5 +174,6 @@ pub async fn create_executor(
         NodeBody::EowcOverWindow => EowcOverWindowExecutorBuilder,
         NodeBody::OverWindow => OverWindowExecutorBuilder,
         NodeBody::StreamFsFetch => FsFetchExecutorBuilder,
+        NodeBody::SourceBackfill => KafkaBackfillExecutorBuilder,
     }
 }
diff --git a/src/stream/src/from_proto/source_backfill.rs b/src/stream/src/from_proto/source_backfill.rs
new file mode 100644
index 0000000000000..e129a7c362975
--- /dev/null
+++ b/src/stream/src/from_proto/source_backfill.rs
@@ -0,0 +1,170 @@
+// Copyright 2024 RisingWave Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use risingwave_common::catalog::{
+    default_key_column_name_version_mapping, ColumnId, TableId, KAFKA_TIMESTAMP_COLUMN_NAME,
+};
+use risingwave_connector::source::SourceCtrlOpts;
+use risingwave_pb::data::data_type::TypeName as PbTypeName;
+use risingwave_pb::plan_common::{
+    AdditionalColumnType, ColumnDescVersion, FormatType, PbEncodeType,
+};
+use risingwave_pb::stream_plan::SourceBackfillNode;
+use risingwave_source::source_desc::SourceDescBuilder;
+
+use super::*;
+use crate::executor::kafka_backfill_executor::{
+    KafkaBackfillExecutor, KafkaBackfillExecutorWrapper,
+};
+use crate::executor::source::StreamSourceCore;
+use crate::executor::state_table_handler::SourceStateTableHandler;
+use crate::executor::BackfillStateTableHandler;
+
+pub struct KafkaBackfillExecutorBuilder;
+
+impl ExecutorBuilder for KafkaBackfillExecutorBuilder {
+    type Node = SourceBackfillNode;
+
+    async fn new_boxed_executor(
+        params: ExecutorParams,
+        node: &Self::Node,
+        store: impl StateStore,
+        _stream: &mut LocalStreamManagerCore,
+    ) -> StreamResult<BoxedExecutor> {
+        let [input]: [_; 1] = params.input.try_into().unwrap();
+
+        // let (sender, barrier_receiver) = unbounded_channel();
+        // stream
+        //     .context
+        //     .barrier_manager()
+        //     .register_sender(params.actor_context.id, sender);
+        let system_params = params.env.system_params_manager_ref().get_params();
+
+        let source_id = TableId::new(node.source_id);
+        let source_name = node.source_name.clone();
+        let source_info = node.get_info()?;
+
+        let mut source_columns = node.columns.clone();
+
+        {
+            // compatible code: introduced in https://github.com/risingwavelabs/risingwave/pull/13707
+            // for upsert and (avro | protobuf) overwrite the `_rw_key` column's ColumnDesc.additional_column_type to Key
+            if source_info.format() == FormatType::Upsert
+                && (source_info.row_encode() == PbEncodeType::Avro
+                    || source_info.row_encode() == PbEncodeType::Protobuf)
+            {
+                let _ = source_columns.iter_mut().map(|c| {
+                    let _ = c.column_desc.as_mut().map(|desc| {
+                        let is_bytea = desc
+                            .get_column_type()
+                            .map(|col_type| col_type.type_name == PbTypeName::Bytea as i32)
+                            .unwrap();
+                        if desc.name == default_key_column_name_version_mapping(
+                                    &desc.version()
+                                )
+                                    && is_bytea
+                                    // the column is from a legacy version
+                                    && desc.version == ColumnDescVersion::Unspecified as i32
+                        {
+                            desc.additional_column_type = AdditionalColumnType::Key as i32;
+                        }
+                    });
+                });
+            }
+        }
+
+        {
+            // compatible code: handle legacy column `_rw_kafka_timestamp`
+            // the column is auto added for all kafka source to empower batch query on source
+            // solution: rewrite the column `additional_column_type` to Timestamp
+
+            let _ = source_columns.iter_mut().map(|c| {
+                let _ = c.column_desc.as_mut().map(|desc| {
+                    let is_timestamp = desc
+                        .get_column_type()
+                        .map(|col_type| col_type.type_name == PbTypeName::Timestamptz as i32)
+                        .unwrap();
+                    if desc.name == KAFKA_TIMESTAMP_COLUMN_NAME
+                                && is_timestamp
+                                // the column is from a legacy version
+                                && desc.version == ColumnDescVersion::Unspecified as i32
+                    {
+                        desc.additional_column_type = AdditionalColumnType::Timestamp as i32;
+                    }
+                });
+            });
+        }
+
+        let source_desc_builder = SourceDescBuilder::new(
+            source_columns.clone(),
+            params.env.source_metrics(),
+            node.row_id_index.map(|x| x as _),
+            node.with_properties.clone(),
+            source_info.clone(),
+            params.env.connector_params(),
+            params.env.config().developer.connector_message_buffer_size,
+            // `pk_indices` is used to ensure that a message will be skipped instead of parsed
+            // with null pk when the pk column is missing.
+            //
+            // Currently pk_indices for source is always empty since pk information is not
+            // passed via `StreamSource` so null pk may be emitted to downstream.
+            //
+            // TODO: use the correct information to fill in pk_dicies.
+            // We should consdier add back the "pk_column_ids" field removed by #8841 in
+            // StreamSource
+            params.info.pk_indices.clone(),
+        );
+
+        let source_ctrl_opts = SourceCtrlOpts {
+            chunk_size: params.env.config().developer.chunk_size,
+        };
+
+        let source_column_ids: Vec<_> = source_columns
+            .iter()
+            .map(|column| ColumnId::from(column.get_column_desc().unwrap().column_id))
+            .collect();
+
+        // FIXME: remove this. It's wrong
+        let state_table_handler = SourceStateTableHandler::from_table_catalog(
+            node.state_table.as_ref().unwrap(),
+            store.clone(),
+        )
+        .await;
+        let backfill_state_table = BackfillStateTableHandler::from_table_catalog(
+            node.state_table.as_ref().unwrap(),
+            store.clone(),
+        )
+        .await;
+        let stream_source_core = StreamSourceCore::new(
+            source_id,
+            source_name,
+            source_column_ids,
+            source_desc_builder,
+            state_table_handler,
+        );
+
+        let exec = KafkaBackfillExecutor::new(
+            params.actor_context.clone(),
+            params.info.clone(),
+            stream_source_core,
+            params.executor_stats.clone(),
+            // barrier_receiver,
+            system_params,
+            source_ctrl_opts.clone(),
+            params.env.connector_params(),
+            backfill_state_table,
+        );
+        Ok(KafkaBackfillExecutorWrapper { inner: exec, input }.boxed())
+    }
+}