-
Notifications
You must be signed in to change notification settings - Fork 594
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(binder): bind RCTE #15522
feat(binder): bind RCTE #15522
Changes from 12 commits
27b5439
24d763b
51cefe3
20493ab
cfb9920
a5c7d81
f8e3788
76cc79c
8e5b68a
e2292a3
78a2422
ced2f59
6b84174
8ada38b
3175084
b0f1446
65379b0
f81531e
4044d54
da9776c
ebaa6c2
79b7015
249408e
aa6cdb4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,18 +12,22 @@ | |
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
use std::cell::RefCell; | ||
use std::collections::HashMap; | ||
use std::rc::Rc; | ||
|
||
use risingwave_common::bail_not_implemented; | ||
use risingwave_common::catalog::Schema; | ||
use risingwave_common::types::DataType; | ||
use risingwave_common::util::sort_util::{ColumnOrder, OrderType}; | ||
use risingwave_sqlparser::ast::{Cte, Expr, Fetch, OrderByExpr, Query, Value, With}; | ||
use risingwave_sqlparser::ast::{ | ||
Cte, Expr, Fetch, OrderByExpr, Query, SetExpr, SetOperator, Value, With, | ||
}; | ||
use thiserror_ext::AsReport; | ||
|
||
use super::bind_context::BindingCteState; | ||
use super::statement::RewriteExprsRecursive; | ||
use super::BoundValues; | ||
use crate::binder::bind_context::{BindingCte, RecursiveUnion}; | ||
use crate::binder::{Binder, BoundSetExpr}; | ||
use crate::error::{ErrorCode, Result}; | ||
use crate::expr::{CorrelatedId, Depth, ExprImpl, ExprRewriter}; | ||
|
@@ -279,20 +283,125 @@ impl Binder { | |
} | ||
|
||
fn bind_with(&mut self, with: With) -> Result<()> { | ||
if with.recursive { | ||
bail_not_implemented!("recursive cte"); | ||
} else { | ||
for cte_table in with.cte_tables { | ||
let Cte { alias, query, .. } = cte_table; | ||
let table_name = alias.name.real_value(); | ||
let bound_query = self.bind_query(query)?; | ||
let share_id = self.next_share_id(); | ||
self.context | ||
for cte_table in with.cte_tables { | ||
let share_id = self.next_share_id(); | ||
let Cte { alias, query, .. } = cte_table; | ||
let table_name = alias.name.real_value(); | ||
|
||
if with.recursive { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately this Example in PostgreSQL:
|
||
let Query { | ||
with, | ||
body, | ||
order_by, | ||
limit, | ||
offset, | ||
fetch, | ||
} = query; | ||
|
||
/// the input clause should not be supported. | ||
fn should_be_empty<T>(v: Option<T>, clause: &str) -> Result<()> { | ||
if v.is_some() { | ||
return Err(ErrorCode::BindError(format!( | ||
"`{clause}` is not supported in recursive CTE" | ||
)) | ||
.into()); | ||
} | ||
Ok(()) | ||
} | ||
|
||
should_be_empty(order_by.first(), "ORDER BY")?; | ||
should_be_empty(limit, "LIMIT")?; | ||
should_be_empty(offset, "OFFSET")?; | ||
should_be_empty(fetch, "FETCH")?; | ||
|
||
let SetExpr::SetOperation { | ||
op: SetOperator::Union, | ||
all, | ||
left, | ||
right, | ||
} = body.clone() | ||
else { | ||
return Err(ErrorCode::BindError( | ||
"`UNION` is required in recursive CTE".to_string(), | ||
) | ||
.into()); | ||
}; | ||
|
||
if !all { | ||
return Err(ErrorCode::BindError( | ||
"only `UNION ALL` is supported in recursive CTE now".to_string(), | ||
) | ||
.into()); | ||
} | ||
|
||
let entry = self | ||
.context | ||
.cte_to_relation | ||
.insert(table_name, Rc::new((share_id, bound_query, alias))); | ||
.entry(table_name) | ||
.insert_entry(Rc::new(RefCell::new(BindingCte { | ||
share_id, | ||
state: BindingCteState::Init, | ||
alias, | ||
}))) | ||
.get() | ||
.clone(); | ||
|
||
if let Some(with) = with { | ||
self.bind_with(with)?; | ||
} | ||
|
||
// We assume `left` is the base term, otherwise the implementation may be very hard. | ||
// The behavior is the same as PostgreSQL's. | ||
// reference: <https://www.postgresql.org/docs/16/sql-select.html#:~:text=the%20recursive%20self%2Dreference%20must%20appear%20on%20the%20right%2Dhand%20side%20of%20the%20UNION> | ||
let mut base = self.bind_set_expr(*left)?; | ||
|
||
entry.borrow_mut().state = BindingCteState::BaseResolved { | ||
schema: base.schema().clone(), | ||
}; | ||
|
||
// bind the rest of the recursive cte | ||
let mut recursive = self.bind_set_expr(*right)?; | ||
|
||
// todo: add validate check here for *bound* `base` and `recursive` | ||
Self::align_schema(&mut base, &mut recursive, SetOperator::Union)?; | ||
|
||
// please note that even after aligning, the schema of `left` | ||
// may not be the same as `right`; this is because there may | ||
// be case(s) where the `base` term is just a value, and the | ||
// `recursive` term is a select expression / statement. | ||
xzhseh marked this conversation as resolved.
Show resolved
Hide resolved
|
||
let schema = recursive.schema().clone(); | ||
// yet another sanity check | ||
assert_eq!( | ||
schema, | ||
recursive.schema().clone(), | ||
"expect `schema` to be the same as recursive's" | ||
); | ||
|
||
let recursive_union = RecursiveUnion { | ||
all, | ||
base: Box::new(base), | ||
recursive: Box::new(recursive), | ||
schema, | ||
}; | ||
|
||
entry.borrow_mut().state = BindingCteState::Bound { | ||
query: either::Either::Right(recursive_union), | ||
}; | ||
} else { | ||
let bound_query = self.bind_query(query)?; | ||
self.context.cte_to_relation.insert( | ||
table_name, | ||
Rc::new(RefCell::new(BindingCte { | ||
share_id, | ||
state: BindingCteState::Bound { | ||
query: either::Either::Left(bound_query), | ||
}, | ||
alias, | ||
})), | ||
); | ||
} | ||
Ok(()) | ||
} | ||
Ok(()) | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Intuitively, if we have decoupled the declaration (knowing its schema) and definition (binding the recursive query) stage of CTE, we can use the existing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, it's a back reference. If you take a look at the definition of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
// Copyright 2024 RisingWave Labs | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
use crate::binder::statement::RewriteExprsRecursive; | ||
use crate::binder::ShareId; | ||
|
||
/// A CTE reference, currently only used in the back reference of recursive CTE. | ||
/// For the non-recursive one, see [`BoundShare`](super::BoundShare). | ||
#[derive(Debug, Clone)] | ||
pub struct BoundBackCteRef { | ||
#[expect(dead_code)] | ||
pub(crate) share_id: ShareId, | ||
} | ||
|
||
impl RewriteExprsRecursive for BoundBackCteRef { | ||
fn rewrite_exprs_recursive(&mut self, _rewriter: &mut impl crate::expr::ExprRewriter) {} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#16023 (comment)
We shall add the test case to showcase the necessity of this special check. (Btw I am still trying to understand it at this time. The ask for test case is not an endorsement of it being valid 😂 )
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Panicked here:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should be fixed now - just update the logic when checking if
merge_context
is applying forrcte
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems this special bypass just suppress something that is actually wrong behind the scene:
Without the update here, the error is saying that
t
is already inBindContext
before bindingt
. This error is TRUE, and adding this bypass does not fix the actual dirty-context problem: before bindingt
, the context is supposed to be empty.As an example, having a dirty initial context makes the following malformed query to pass binder validation (and panic in planner):
I am still tracing down how the context become dirty.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
risingwave/src/frontend/src/binder/query.rs
Lines 378 to 379 in 3f50c60
The nonrecursive branch's
bind_query
pushes a new context upon entry and clears it before returning, while the recursive branch's twobind_set_expr
calls write directly into current context. Maybe (?) this is the reason 🤔The problem would not arise if both sides of RecursiveUnion were considered
Query
rather thanSetExpr
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fix available for review: #16282