From f25a5fca7917509e9f003d2ef8fbc5cbd213eccf Mon Sep 17 00:00:00 2001 From: Jinser Kafka Date: Sat, 21 Oct 2023 02:16:10 +0800 Subject: [PATCH] feat: support extended array slicing with step --- .../impl/src/scalar/array_range_access.rs | 34 +++- src/frontend/src/binder/expr/mod.rs | 9 +- src/frontend/src/binder/expr/value.rs | 9 +- src/sqlparser/src/ast/mod.rs | 16 +- src/sqlparser/src/parser.rs | 182 ++++++++++++++---- 5 files changed, 201 insertions(+), 49 deletions(-) diff --git a/src/expr/impl/src/scalar/array_range_access.rs b/src/expr/impl/src/scalar/array_range_access.rs index 2782a97a7147f..d431ad29327e8 100644 --- a/src/expr/impl/src/scalar/array_range_access.rs +++ b/src/expr/impl/src/scalar/array_range_access.rs @@ -18,18 +18,38 @@ use risingwave_expr::function; /// If the case is `array[1,2,3][:2]`, then start will be 0 set by the frontend /// If the case is `array[1,2,3][1:]`, then end will be `i32::MAX` set by the frontend -#[function("array_range_access(anyarray, int4, int4) -> anyarray")] -pub fn array_range_access(list: ListRef<'_>, start: i32, end: i32) -> Option { +#[function("array_range_access(anyarray, int4, int4, int4) -> anyarray")] +pub fn array_range_access(list: ListRef<'_>, start: i32, end: i32, step: i32) -> Option { let mut data = vec![]; let list_all_values = list.iter(); - let start = std::cmp::max(start, 1) as usize; - let end = std::cmp::min(std::cmp::max(0, end), list_all_values.len() as i32) as usize; - if start > end { + + let mut new_start = std::cmp::max(start, 1) as usize; + let mut new_end = std::cmp::min(std::cmp::max(0, end), list_all_values.len() as i32) as usize; + + let (step, need_rev) = if step < 0 { + if start != 0 && end != i32::MAX { + (new_start, new_end) = (new_end, new_start); + } + (-step, true) + } else { + (step, false) + }; + + if new_start > new_end { return Some(ListValue::new(data)); } - for datumref in list_all_values.take(end).skip(start - 1) { + for datumref in list_all_values + .take(new_end) + .skip(new_start - 1) + .step_by(step as _) + { data.push(datumref.to_owned_datum()); } - Some(ListValue::new(data)) + + Some(ListValue::new(if need_rev { + data.into_iter().rev().collect() + } else { + data + })) } diff --git a/src/frontend/src/binder/expr/mod.rs b/src/frontend/src/binder/expr/mod.rs index 6da590c2d315d..3e98f9ac066d1 100644 --- a/src/frontend/src/binder/expr/mod.rs +++ b/src/frontend/src/binder/expr/mod.rs @@ -122,9 +122,12 @@ impl Binder { Expr::Nested(expr) => self.bind_expr_inner(*expr), Expr::Array(Array { elem: exprs, .. }) => self.bind_array(exprs), Expr::ArrayIndex { obj, index } => self.bind_array_index(*obj, *index), - Expr::ArrayRangeIndex { obj, start, end } => { - self.bind_array_range_index(*obj, start, end) - } + Expr::ArrayRangeIndex { + obj, + start, + end, + step, + } => self.bind_array_range_index(*obj, start, end, step), Expr::Function(f) => self.bind_function(f), Expr::Subquery(q) => self.bind_subquery_expr(*q, SubqueryKind::Scalar), Expr::Exists(q) => self.bind_subquery_expr(*q, SubqueryKind::Existential), diff --git a/src/frontend/src/binder/expr/value.rs b/src/frontend/src/binder/expr/value.rs index b958ae8338a49..4e3ebdbdf45c7 100644 --- a/src/frontend/src/binder/expr/value.rs +++ b/src/frontend/src/binder/expr/value.rs @@ -153,6 +153,7 @@ impl Binder { obj: Expr, start: Option>, end: Option>, + step: Option>, ) -> Result { let obj = self.bind_expr_inner(obj)?; let start = match start { @@ -169,10 +170,16 @@ impl Binder { .bind_expr_inner(*expr)? .cast_implicit(DataType::Int32)?, }; + let step = match step { + None => ExprImpl::literal_int(1), + Some(expr) => self + .bind_expr_inner(*expr)? + .cast_implicit(DataType::Int32)?, + }; match obj.return_type() { DataType::List(return_type) => Ok(FunctionCall::new_unchecked( ExprType::ArrayRangeAccess, - vec![obj, start, end], + vec![obj, start, end, step], DataType::List(return_type), ) .into()), diff --git a/src/sqlparser/src/ast/mod.rs b/src/sqlparser/src/ast/mod.rs index ecae5a9663a88..0c119113fa768 100644 --- a/src/sqlparser/src/ast/mod.rs +++ b/src/sqlparser/src/ast/mod.rs @@ -442,11 +442,12 @@ pub enum Expr { obj: Box, index: Box, }, - /// A slice expression `arr[1:3]` + /// A slice expression `arr[1:3:2]` ArrayRangeIndex { obj: Box, start: Option>, end: Option>, + step: Option>, }, LambdaFunction { args: Vec, @@ -670,7 +671,12 @@ impl fmt::Display for Expr { write!(f, "{}[{}]", obj, index)?; Ok(()) } - Expr::ArrayRangeIndex { obj, start, end } => { + Expr::ArrayRangeIndex { + obj, + start, + end, + step, + } => { let start_str = match start { None => "".to_string(), Some(start) => format!("{}", start), @@ -679,7 +685,11 @@ impl fmt::Display for Expr { None => "".to_string(), Some(end) => format!("{}", end), }; - write!(f, "{}[{}:{}]", obj, start_str, end_str)?; + let step_str = match step { + None => "".to_string(), + Some(step) => format!(":{}", step), + }; + write!(f, "{}[{}:{}{}]", obj, start_str, end_str, step_str)?; Ok(()) } Expr::Array(exprs) => write!(f, "{}", exprs), diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index ee054f7d17031..f95a1f0610d84 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -1540,63 +1540,175 @@ impl Parser { } } - /// We parse both `array[1,9][1]`, `array[1,9][1:2]`, `array[1,9][:2]`, `array[1,9][1:]` and - /// `array[1,9][:]` in this function. - pub fn parse_array_index(&mut self, expr: Expr) -> Result { - let new_expr = match self.peek_token().token { + fn parse_array_index_inner(&mut self, expr: Expr) -> Result { + // [N] + // [:], [N:], [:N], [N:M] + // [::], [N::], [:N:], [::N] + // [N:M:], [N::M], [:N:P], [N:M:P] + match self.peek_token().token { + // [:] + // [:N] + // [:N:M], [:N:] Token::Colon => { - // [:] or [:N] assert!(self.consume_token(&Token::Colon)); - let end = match self.peek_token().token { - Token::RBracket => None, + match self.peek_token().token { + // [:] + Token::RBracket => Ok(Expr::ArrayRangeIndex { + obj: Box::new(expr), + start: None, + end: None, + step: None, + }), + // [:N], [:N:] + // [:N:M] _ => { - let end_index = Box::new(self.parse_expr()?); - Some(end_index) + let end = Some(Box::new(self.parse_expr()?)); + match self.peek_token().token { + // [:N], [:N:] + Token::RBracket | Token::Colon => Ok(Expr::ArrayRangeIndex { + obj: Box::new(expr), + start: None, + end, + step: None, + }), + // [:N:M] + _ => { + let step = Some(Box::new(self.parse_expr()?)); + Ok(Expr::ArrayRangeIndex { + obj: Box::new(expr), + start: None, + end, + step, + }) + } + } } - }; - Expr::ArrayRangeIndex { - obj: Box::new(expr), - start: None, - end, } } + // [::], [::N] + Token::DoubleColon => { + assert!(self.consume_token(&Token::DoubleColon)); + match self.peek_token().token { + // [::] + Token::RBracket => Ok(Expr::ArrayRangeIndex { + obj: Box::new(expr), + start: None, + end: None, + step: None, + }), + // [::N] + _ => { + let step = Some(Box::new(self.parse_expr()?)); + Ok(Expr::ArrayRangeIndex { + obj: Box::new(expr), + start: None, + end: None, + step, + }) + } + } + } + // [N] + // [N:], [N:M] + // [N:M:P], [N:M:], [N::M] + // [N::] _ => { - // [N], [N:], [N:M] let index = Box::new(self.parse_expr()?); match self.peek_token().token { + // [N:], [N:M] + // [N:M:P], [N:M:] Token::Colon => { - // [N:], [N:M] assert!(self.consume_token(&Token::Colon)); + match self.peek_token().token { - Token::RBracket => { - // [N:] - Expr::ArrayRangeIndex { - obj: Box::new(expr), - start: Some(index), - end: None, - } - } + // [N:] + Token::RBracket => Ok(Expr::ArrayRangeIndex { + obj: Box::new(expr), + start: Some(index), + end: None, + step: None, + }), + + // [N:M] + // [N:M:P], [N:M:] _ => { - // [N:M] let end = Some(Box::new(self.parse_expr()?)); - Expr::ArrayRangeIndex { - obj: Box::new(expr), - start: Some(index), - end, + match self.peek_token().token { + // [N:M:P], [N:M:] + Token::Colon => { + assert!(self.consume_token(&Token::Colon)); + match self.peek_token().token { + // [N:M:] + Token::RBracket => Ok(Expr::ArrayRangeIndex { + obj: Box::new(expr), + start: Some(index), + end, + step: None, + }), + // [N:M:P] + _ => { + let step = Some(Box::new(self.parse_expr()?)); + Ok(Expr::ArrayRangeIndex { + obj: Box::new(expr), + start: Some(index), + end, + step, + }) + } + } + } + // [N:M] + _ => Ok(Expr::ArrayRangeIndex { + obj: Box::new(expr), + start: Some(index), + end, + step: None, + }), } } } } - _ => { - // [N] - Expr::ArrayIndex { - obj: Box::new(expr), - index, + // [N::], [N::M] + Token::DoubleColon => { + assert!(self.consume_token(&Token::DoubleColon)); + match self.peek_token().token { + // [N::] + Token::RBracket => Ok(Expr::ArrayRangeIndex { + obj: Box::new(expr), + start: Some(index), + end: None, + step: None, + }), + // [N::M] + _ => { + let step = Some(Box::new(self.parse_expr()?)); + Ok(Expr::ArrayRangeIndex { + obj: Box::new(expr), + start: Some(index), + end: None, + step, + }) + } } } + // [N] + _ => Ok(Expr::ArrayIndex { + obj: Box::new(expr), + index, + }), } } - }; + } + } + + /// We parse both array index and slice in this function. + /// [N] + /// [:], [N:], [:N], [N:M] + /// [::], [N::], [:N:], [::N] + /// [N:M:], [N::M], [:N:P], [N:M:P] + pub fn parse_array_index(&mut self, expr: Expr) -> Result { + let new_expr = self.parse_array_index_inner(expr)?; + self.expect_token(&Token::RBracket)?; // recursively checking for more indices if self.consume_token(&Token::LBracket) {