From 6cc9f8d0886c007f64d541ba26129bd7434446df Mon Sep 17 00:00:00 2001 From: Bugen Zhao Date: Mon, 23 Sep 2024 18:07:07 +0800 Subject: [PATCH] do not expose `vnode_count` term to user, use `max_parallelism` instead Signed-off-by: Bugen Zhao --- src/common/src/session_config/mod.rs | 17 ++++++++++++++--- src/frontend/src/stream_fragmenter/mod.rs | 2 +- src/meta/src/rpc/ddl_controller.rs | 6 +++++- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/common/src/session_config/mod.rs b/src/common/src/session_config/mod.rs index 5509284e80dde..813172ac20a64 100644 --- a/src/common/src/session_config/mod.rs +++ b/src/common/src/session_config/mod.rs @@ -140,8 +140,11 @@ pub struct SessionConfig { #[parameter(default = "UTC", check_hook = check_timezone)] timezone: String, - /// If `STREAMING_PARALLELISM` is non-zero, CREATE MATERIALIZED VIEW/TABLE/INDEX will use it as - /// streaming parallelism. + /// The execution parallelism for streaming queries, including tables, materialized views, indexes, + /// and sinks. Defaults to 0, which means they will be scheduled adaptively based on the cluster size. + /// + /// If a non-zero value is set, streaming queries will be scheduled to use a fixed number of parallelism. + /// Note that the value will be bounded at `STREAMING_MAX_PARALLELISM`. #[serde_as(as = "DisplayFromStr")] #[parameter(default = ConfigNonZeroU64::default())] streaming_parallelism: ConfigNonZeroU64, @@ -300,8 +303,16 @@ pub struct SessionConfig { #[parameter(default = false)] bypass_cluster_limits: bool, + /// The maximum number of parallelism a streaming query can use. Defaults to 256. + /// + /// Compared to `STREAMING_PARALLELISM`, which configures the initial parallelism, this configures + /// the maximum parallelism a streaming query can use in the future, if the cluster size changes or + /// users manually change the parallelism with `ALTER .. SET PARALLELISM`. + /// + /// It's not always a good idea to set this to a very large number, as it may cause performance + /// degradation when performing range scans on the table or the materialized view. #[parameter(default = VirtualNode::COUNT_FOR_COMPAT, check_hook = check_vnode_count)] - vnode_count: usize, + streaming_max_parallelism: usize, } fn check_timezone(val: &str) -> Result<(), String> { diff --git a/src/frontend/src/stream_fragmenter/mod.rs b/src/frontend/src/stream_fragmenter/mod.rs index 35faeb2c6a49a..b671d0792e073 100644 --- a/src/frontend/src/stream_fragmenter/mod.rs +++ b/src/frontend/src/stream_fragmenter/mod.rs @@ -144,7 +144,7 @@ pub fn build_graph(plan_node: PlanRef) -> SchedulerResult max_parallelism; if parallelism_limited { - tracing::warn!("Too many parallelism, use {} instead", max_parallelism); + // TODO(var-vnode): may return error here? + tracing::warn!( + "Too many parallelism, use max parallelism {} instead", + max_parallelism + ); } let parallelism = parallelism.min(max_parallelism);