From ab943edd1bce4d589953eac0a9d68c0c40cb7474 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Thu, 28 Sep 2023 16:43:38 +0800 Subject: [PATCH 1/5] allow udf and scalar fn Signed-off-by: Ruihang Xia --- src/query/src/dist_plan/commutativity.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/query/src/dist_plan/commutativity.rs b/src/query/src/dist_plan/commutativity.rs index 6bfcee5a16d6..4258ef54de1f 100644 --- a/src/query/src/dist_plan/commutativity.rs +++ b/src/query/src/dist_plan/commutativity.rs @@ -140,7 +140,9 @@ impl Categorizer { | Expr::Negative(_) | Expr::Between(_) | Expr::Sort(_) - | Expr::Exists(_) => Commutativity::Commutative, + | Expr::Exists(_) + | Expr::ScalarFunction(_) + | Expr::ScalarUDF(_) => Commutativity::Commutative, Expr::Like(_) | Expr::SimilarTo(_) @@ -150,8 +152,6 @@ impl Categorizer { | Expr::Case(_) | Expr::Cast(_) | Expr::TryCast(_) - | Expr::ScalarFunction(_) - | Expr::ScalarUDF(_) | Expr::AggregateFunction(_) | Expr::WindowFunction(_) | Expr::AggregateUDF(_) From 244e51c6b06b831adf0feef8ba0ebf053a19cdf6 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Sat, 7 Oct 2023 17:12:34 +0800 Subject: [PATCH 2/5] put CountWildcardRule before dist planner Signed-off-by: Ruihang Xia --- src/query/src/query_engine/state.rs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/query/src/query_engine/state.rs b/src/query/src/query_engine/state.rs index e882a6811e84..2cecca3e179f 100644 --- a/src/query/src/query_engine/state.rs +++ b/src/query/src/query_engine/state.rs @@ -34,7 +34,8 @@ use datafusion::physical_optimizer::PhysicalOptimizerRule; use datafusion::physical_plan::ExecutionPlan; use datafusion::physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner}; use datafusion_expr::LogicalPlan as DfLogicalPlan; -use datafusion_optimizer::analyzer::Analyzer; +use datafusion_optimizer::analyzer::count_wildcard_rule::CountWildcardRule; +use datafusion_optimizer::analyzer::{Analyzer, AnalyzerRule}; use datafusion_optimizer::optimizer::Optimizer; use promql::extension_plan::PromExtensionPlanner; use substrait::extension_serializer::ExtensionSerializer; @@ -88,6 +89,8 @@ impl QueryEngineState { } analyzer.rules.insert(0, Arc::new(TypeConversionRule)); analyzer.rules.insert(0, Arc::new(StringNormalizationRule)); + Self::remove_analyzer_rule(&mut analyzer.rules, CountWildcardRule {}.name()); + analyzer.rules.insert(0, Arc::new(CountWildcardRule {})); let mut optimizer = Optimizer::new(); optimizer.rules.push(Arc::new(OrderHintRule)); @@ -132,6 +135,19 @@ impl QueryEngineState { } } + fn remove_analyzer_rule(rules: &mut Vec>, name: &str) { + let mut index_to_move = None; + for (i, rule) in rules.iter().enumerate() { + if rule.name() == name { + index_to_move = Some(i); + break; + } + } + if let Some(index) = index_to_move { + let _ = rules.remove(index); + } + } + fn remove_physical_optimize_rule( rules: &mut Vec>, name: &str, From d1c981321b5ecbfe329ed4efe4e8dcd9756a9771 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Sun, 8 Oct 2023 16:47:45 +0800 Subject: [PATCH 3/5] bump datafusion to fix first_value/last_value Signed-off-by: Ruihang Xia --- Cargo.lock | 18 +++++++++--------- Cargo.toml | 14 +++++++------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a94bd04889cd..2ee249b1bc27 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2476,7 +2476,7 @@ dependencies = [ [[package]] name = "datafusion" version = "27.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" dependencies = [ "ahash 0.8.3", "arrow", @@ -2524,7 +2524,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "27.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" dependencies = [ "arrow", "arrow-array", @@ -2538,7 +2538,7 @@ dependencies = [ [[package]] name = "datafusion-execution" version = "27.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" dependencies = [ "dashmap", "datafusion-common", @@ -2555,7 +2555,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "27.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" dependencies = [ "ahash 0.8.3", "arrow", @@ -2569,7 +2569,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "27.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" dependencies = [ "arrow", "async-trait", @@ -2586,7 +2586,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "27.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" dependencies = [ "ahash 0.8.3", "arrow", @@ -2621,7 +2621,7 @@ dependencies = [ [[package]] name = "datafusion-row" version = "27.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" dependencies = [ "arrow", "datafusion-common", @@ -2632,7 +2632,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "27.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" dependencies = [ "arrow", "arrow-schema", @@ -2645,7 +2645,7 @@ dependencies = [ [[package]] name = "datafusion-substrait" version = "27.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" dependencies = [ "async-recursion", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 6fdd77e2056e..72f58747ae0f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,13 +68,13 @@ arrow-schema = { version = "43.0", features = ["serde"] } async-stream = "0.3" async-trait = "0.1" chrono = { version = "0.4", features = ["serde"] } -datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" } -datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" } -datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" } -datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" } -datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" } -datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" } -datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" } +datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" } +datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" } +datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" } +datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" } +datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" } +datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" } +datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" } derive_builder = "0.12" futures = "0.3" futures-util = "0.3" From 45a134dffd4e63cd08089ddad99c910af5a36a33 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Sun, 8 Oct 2023 20:49:21 +0800 Subject: [PATCH 4/5] update sqlness result Signed-off-by: Ruihang Xia --- .../cases/distributed/explain/single_partition.result | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/tests/cases/distributed/explain/single_partition.result b/tests/cases/distributed/explain/single_partition.result index 0df318ff1b31..ca94fee301dc 100644 --- a/tests/cases/distributed/explain/single_partition.result +++ b/tests/cases/distributed/explain/single_partition.result @@ -12,15 +12,8 @@ EXPLAIN SELECT COUNT(*) FROM single_partition; +-+-+ | plan_type_| plan_| +-+-+ -| logical_plan_| Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1))]]_| -|_|_Projection:_| -|_|_MergeScan [is_placeholder=false]_| -| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[COUNT(UInt8(1))]_| -|_|_CoalescePartitionsExec_| -|_|_AggregateExec: mode=Partial, gby=[], aggr=[COUNT(UInt8(1))]_| -|_|_RepartitionExec: partitioning=REDACTED -|_|_ProjectionExec: expr=[]_| -|_|_MergeScanExec: REDACTED +| logical_plan_| MergeScan [is_placeholder=false]_| +| physical_plan | MergeScanExec: REDACTED |_|_| +-+-+ From 2b84393a660b902f2332801f0a256818d343e53c Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Sun, 8 Oct 2023 21:02:36 +0800 Subject: [PATCH 5/5] use retain instead Signed-off-by: Ruihang Xia --- src/query/src/query_engine/state.rs | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/src/query/src/query_engine/state.rs b/src/query/src/query_engine/state.rs index 2cecca3e179f..753015582c8a 100644 --- a/src/query/src/query_engine/state.rs +++ b/src/query/src/query_engine/state.rs @@ -136,32 +136,14 @@ impl QueryEngineState { } fn remove_analyzer_rule(rules: &mut Vec>, name: &str) { - let mut index_to_move = None; - for (i, rule) in rules.iter().enumerate() { - if rule.name() == name { - index_to_move = Some(i); - break; - } - } - if let Some(index) = index_to_move { - let _ = rules.remove(index); - } + rules.retain(|rule| rule.name() != name); } fn remove_physical_optimize_rule( rules: &mut Vec>, name: &str, ) { - let mut index_to_move = None; - for (i, rule) in rules.iter().enumerate() { - if rule.name() == name { - index_to_move = Some(i); - break; - } - } - if let Some(index) = index_to_move { - let _ = rules.remove(index); - } + rules.retain(|rule| rule.name() != name); } /// Register a udf function