From 28878d8bf745940fc4f49822cdf75e5509555068 Mon Sep 17 00:00:00 2001 From: Dylan Date: Mon, 28 Nov 2022 17:33:58 +0800 Subject: [PATCH] fix(optimizer): fix subquery unnesting with having (#6622) * fix subquery unesting in having expr * fmt * change is correlated >= back to == Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> --- .../planner_test/tests/testdata/subquery.yaml | 51 +++++++++++++++++++ .../testdata/subquery_expr_correlated.yaml | 26 ++++++++-- src/frontend/src/binder/select.rs | 3 +- src/frontend/src/expr/mod.rs | 2 +- .../src/optimizer/max_one_row_visitor.rs | 5 +- 5 files changed, 79 insertions(+), 8 deletions(-) diff --git a/src/frontend/planner_test/tests/testdata/subquery.yaml b/src/frontend/planner_test/tests/testdata/subquery.yaml index cf9167e9683c7..a4fdf50eb7fad 100644 --- a/src/frontend/planner_test/tests/testdata/subquery.yaml +++ b/src/frontend/planner_test/tests/testdata/subquery.yaml @@ -244,3 +244,54 @@ | └─BatchExchange { order: [], dist: HashShard(auction.date_time) } | └─BatchScan { table: auction, columns: [auction.date_time], distribution: SomeShard } └─BatchValues { rows: [[]] } +- sql: | + CREATE TABLE t (v int); + SELECT 1 FROM t AS t_inner WHERE EXISTS ( SELECT 1 HAVING t_inner.v > 1); + logical_plan: | + LogicalProject { exprs: [1:Int32] } + └─LogicalApply { type: LeftSemi, on: true, correlated_id: 1 } + ├─LogicalScan { table: t, columns: [t.v, t._row_id] } + └─LogicalProject { exprs: [1:Int32] } + └─LogicalFilter { predicate: (CorrelatedInputRef { index: 0, correlated_id: 1 } > 1:Int32) } + └─LogicalAgg { aggs: [] } + └─LogicalProject { exprs: [] } + └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } + optimized_logical_plan: | + LogicalProject { exprs: [1:Int32] } + └─LogicalJoin { type: LeftSemi, on: true, output: all } + ├─LogicalScan { table: t, output_columns: [], required_columns: [v], predicate: (t.v > 1:Int32) } + └─LogicalAgg { aggs: [] } + └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } +- sql: | + create table a (a1 int, a2 int); + create table b (b1 int, b2 int); + create table c (c1 int, c2 int); + select 1 from a where exists ( select 1 from b having exists ( select a1 from c )); + logical_plan: | + LogicalProject { exprs: [1:Int32] } + └─LogicalApply { type: LeftSemi, on: true, correlated_id: 1 } + ├─LogicalScan { table: a, columns: [a.a1, a.a2, a._row_id] } + └─LogicalProject { exprs: [1:Int32] } + └─LogicalApply { type: LeftSemi, on: true, correlated_id: 2 } + ├─LogicalAgg { aggs: [] } + | └─LogicalProject { exprs: [] } + | └─LogicalScan { table: b, columns: [b.b1, b.b2, b._row_id] } + └─LogicalProject { exprs: [CorrelatedInputRef { index: 0, correlated_id: 1 }] } + └─LogicalScan { table: c, columns: [c.c1, c.c2, c._row_id] } + optimized_logical_plan: | + LogicalProject { exprs: [1:Int32] } + └─LogicalJoin { type: LeftSemi, on: IsNotDistinctFrom(a.a1, a.a1), output: [] } + ├─LogicalScan { table: a, columns: [a.a1] } + └─LogicalJoin { type: LeftSemi, on: IsNotDistinctFrom(a.a1, a.a1), output: all } + ├─LogicalAgg { group_key: [a.a1], aggs: [] } + | └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(a.a1, a.a1), output: [a.a1] } + | ├─LogicalAgg { group_key: [a.a1], aggs: [] } + | | └─LogicalScan { table: a, columns: [a.a1] } + | └─LogicalJoin { type: Inner, on: true, output: all } + | ├─LogicalAgg { group_key: [a.a1], aggs: [] } + | | └─LogicalScan { table: a, columns: [a.a1] } + | └─LogicalScan { table: b, columns: [] } + └─LogicalJoin { type: Inner, on: true, output: all } + ├─LogicalAgg { group_key: [a.a1], aggs: [] } + | └─LogicalScan { table: a, columns: [a.a1] } + └─LogicalScan { table: c, columns: [] } diff --git a/src/frontend/planner_test/tests/testdata/subquery_expr_correlated.yaml b/src/frontend/planner_test/tests/testdata/subquery_expr_correlated.yaml index f1aa359083c3a..d9fbe82c38fdf 100644 --- a/src/frontend/planner_test/tests/testdata/subquery_expr_correlated.yaml +++ b/src/frontend/planner_test/tests/testdata/subquery_expr_correlated.yaml @@ -373,9 +373,29 @@ select a1 from c ) ); - planner_error: |- - Feature is not yet implemented: correlated subquery in HAVING or SELECT with agg - Tracking issue: https://github.com/risingwavelabs/risingwave/issues/2275 + optimized_logical_plan: | + LogicalProject { exprs: [1:Int32] } + └─LogicalJoin { type: LeftSemi, on: IsNotDistinctFrom(a.a1, a.a1), output: [] } + ├─LogicalScan { table: a, columns: [a.a1] } + └─LogicalJoin { type: LeftSemi, on: IsNotDistinctFrom(a.a1, a.a1), output: all } + ├─LogicalAgg { group_key: [a.a1], aggs: [] } + | └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(a.a1, a.a1), output: [a.a1] } + | ├─LogicalAgg { group_key: [a.a1], aggs: [] } + | | └─LogicalScan { table: a, columns: [a.a1] } + | └─LogicalJoin { type: Inner, on: true, output: all } + | ├─LogicalAgg { group_key: [a.a1], aggs: [] } + | | └─LogicalScan { table: a, columns: [a.a1] } + | └─LogicalScan { table: b, columns: [] } + └─LogicalJoin { type: Inner, on: true, output: all } + ├─LogicalAgg { group_key: [a.a1], aggs: [] } + | └─LogicalScan { table: a, columns: [a.a1] } + └─LogicalScan { table: c, columns: [] } +- sql: | + create table a (a1 int, a2 int); + create table b (b1 int, b2 int); + create table c (c1 int, c2 int); + select 1 from a where exists (select (select a1 from c ), min(b1) from b ); + optimizer_error: 'internal error: Scalar subquery might produce more than one row.' - sql: | create table t1(x int, y int); create table t2(x int, y int); diff --git a/src/frontend/src/binder/select.rs b/src/frontend/src/binder/select.rs index e3ba805889705..dee1dd466011a 100644 --- a/src/frontend/src/binder/select.rs +++ b/src/frontend/src/binder/select.rs @@ -62,8 +62,7 @@ impl BoundSelect { .iter_mut() .chain(self.group_by.iter_mut()) .chain(self.where_clause.iter_mut()) - // TODO: uncomment `having` below after #4850 is fixed - // .chain(self.having.iter_mut()) + .chain(self.having.iter_mut()) } pub fn is_correlated(&self) -> bool { diff --git a/src/frontend/src/expr/mod.rs b/src/frontend/src/expr/mod.rs index 23aa3ebc0f393..407ae2f0aa1a9 100644 --- a/src/frontend/src/expr/mod.rs +++ b/src/frontend/src/expr/mod.rs @@ -284,7 +284,7 @@ impl ExprImpl { &mut self, correlated_input_ref: &CorrelatedInputRef, ) -> bool { - correlated_input_ref.depth() >= self.depth + correlated_input_ref.depth() == self.depth } fn visit_subquery(&mut self, subquery: &Subquery) -> bool { diff --git a/src/frontend/src/optimizer/max_one_row_visitor.rs b/src/frontend/src/optimizer/max_one_row_visitor.rs index 7924c3156fd80..032b6981d259e 100644 --- a/src/frontend/src/optimizer/max_one_row_visitor.rs +++ b/src/frontend/src/optimizer/max_one_row_visitor.rs @@ -16,7 +16,8 @@ use std::collections::HashSet; use crate::optimizer::plan_node::{ LogicalAgg, LogicalApply, LogicalExpand, LogicalFilter, LogicalHopWindow, LogicalLimit, - LogicalProjectSet, LogicalTopN, LogicalUnion, LogicalValues, PlanTreeNodeUnary, + LogicalProjectSet, LogicalTopN, LogicalUnion, LogicalValues, PlanTreeNodeBinary, + PlanTreeNodeUnary, }; use crate::optimizer::plan_visitor::PlanVisitor; @@ -84,6 +85,6 @@ impl PlanVisitor for HasMaxOneRowApply { } fn visit_logical_apply(&mut self, plan: &LogicalApply) -> bool { - plan.max_one_row() + plan.max_one_row() | self.visit(plan.left()) | self.visit(plan.right()) } }