From 85b235619b1be33eae1aa42b6c412fac979bb17e Mon Sep 17 00:00:00 2001 From: Michael Xu Date: Tue, 5 Sep 2023 12:52:19 -0400 Subject: [PATCH 1/5] feat(expr): add `array_max` --- e2e_test/batch/functions/array_max.slt.part | 47 ++++++++++++++++++++ proto/expr.proto | 1 + src/expr/src/vector_op/array_max.rs | 38 ++++++++++++++++ src/expr/src/vector_op/mod.rs | 1 + src/frontend/src/binder/expr/function.rs | 1 + src/frontend/src/expr/pure.rs | 1 + src/frontend/src/expr/type_inference/func.rs | 12 +++++ 7 files changed, 101 insertions(+) create mode 100644 e2e_test/batch/functions/array_max.slt.part create mode 100644 src/expr/src/vector_op/array_max.rs diff --git a/e2e_test/batch/functions/array_max.slt.part b/e2e_test/batch/functions/array_max.slt.part new file mode 100644 index 000000000000..d8b6ad571bed --- /dev/null +++ b/e2e_test/batch/functions/array_max.slt.part @@ -0,0 +1,47 @@ +query I +select array_max(array[1, 2, 3]); +---- +3 + +query I +select array_max(array[2, 3, 5, 2, 4]); +---- +5 + +query I +select array_max(array[114514, 114513]); +---- +114514 + +query I +select array_max(array['a', 'b', 'c', 'a']); +---- +c + +query I +select array_max(array['💩', '🤔️', '🥵', '🥳', '💩']); +---- +🥵 + +query I +select array_max(array['😅🤔😅️', '114🥵514', '🤣🥳', '🥵💩💩🥵']); +---- +🥵💩💩🥵 + +query error invalid digit found in string +select array_max(array['a', 'b', 'c', 114514]); + +query error invalid digit found in string +select array_max(array[114514, 'a', 'b', 'c']); + +# i32::MIN & i32::MIN - 1 & i32::MAX +query I +select array_max(array[-2147483648, 2147483647, -2147483649]); +---- +2147483647 + +# i64::MIN & i64::MIN - 1 & i64::MAX +query I +select array_max(array[-9223372036854775808, 9223372036854775807, -9223372036854775809]); +---- +9223372036854775807 \ No newline at end of file diff --git a/proto/expr.proto b/proto/expr.proto index 1b3aeff6480f..dfca3d91d5a4 100644 --- a/proto/expr.proto +++ b/proto/expr.proto @@ -197,6 +197,7 @@ message ExprNode { ARRAY_REPLACE = 543; ARRAY_DIMS = 544; ARRAY_TRANSFORM = 545; + ARRAY_MAX = 547; // Int256 functions HEX_TO_INT256 = 560; diff --git a/src/expr/src/vector_op/array_max.rs b/src/expr/src/vector_op/array_max.rs new file mode 100644 index 000000000000..7702189c39e7 --- /dev/null +++ b/src/expr/src/vector_op/array_max.rs @@ -0,0 +1,38 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use risingwave_common::array::*; +use risingwave_common::types::{DefaultOrdered, Scalar, ToOwnedDatum}; +use risingwave_expr_macro::function; + +use crate::Result; + +#[function("array_max(list) -> *int")] +#[function("array_max(list) -> *float")] +#[function("array_max(list) -> decimal")] +#[function("array_max(list) -> serial")] +#[function("array_max(list) -> int256")] +#[function("array_max(list) -> date")] +#[function("array_max(list) -> time")] +#[function("array_max(list) -> timestamp")] +#[function("array_max(list) -> timestamptz")] +#[function("array_max(list) -> varchar")] +#[function("array_max(list) -> bytea")] +pub fn array_max(list: ListRef<'_>) -> Result> { + let min_value = list.iter().flatten().map(DefaultOrdered).max(); + match min_value.map(|v| v.0).to_owned_datum() { + Some(s) => Ok(Some(s.try_into()?)), + None => Ok(None), + } +} diff --git a/src/expr/src/vector_op/mod.rs b/src/expr/src/vector_op/mod.rs index 4bc147cf3cae..de0ca4d5d67f 100644 --- a/src/expr/src/vector_op/mod.rs +++ b/src/expr/src/vector_op/mod.rs @@ -13,6 +13,7 @@ // limitations under the License. pub mod arithmetic_op; +pub mod array_max; pub mod array_access; pub mod array_distinct; pub mod array_length; diff --git a/src/frontend/src/binder/expr/function.rs b/src/frontend/src/binder/expr/function.rs index c505aaa18d2b..36149c430ec3 100644 --- a/src/frontend/src/binder/expr/function.rs +++ b/src/frontend/src/binder/expr/function.rs @@ -792,6 +792,7 @@ impl Binder { ("cardinality", raw_call(ExprType::Cardinality)), ("array_remove", raw_call(ExprType::ArrayRemove)), ("array_replace", raw_call(ExprType::ArrayReplace)), + ("array_max", raw_call(ExprType::ArrayMax)), ("array_position", raw_call(ExprType::ArrayPosition)), ("array_positions", raw_call(ExprType::ArrayPositions)), ("trim_array", raw_call(ExprType::TrimArray)), diff --git a/src/frontend/src/expr/pure.rs b/src/frontend/src/expr/pure.rs index 4316223ec07a..2054d372ed2f 100644 --- a/src/frontend/src/expr/pure.rs +++ b/src/frontend/src/expr/pure.rs @@ -153,6 +153,7 @@ impl ExprVisitor for ImpureAnalyzer { | expr_node::Type::Row | expr_node::Type::ArrayToString | expr_node::Type::ArrayCat + | expr_node::Type::ArrayMax | expr_node::Type::ArrayAppend | expr_node::Type::ArrayPrepend | expr_node::Type::FormatType diff --git a/src/frontend/src/expr/type_inference/func.rs b/src/frontend/src/expr/type_inference/func.rs index 8cccde3b251d..68dbbb9afd29 100644 --- a/src/frontend/src/expr/type_inference/func.rs +++ b/src/frontend/src/expr/type_inference/func.rs @@ -607,6 +607,18 @@ fn infer_type_for_special( } Ok(Some(DataType::Varchar)) } + ExprType::ArrayMax => { + ensure_arity!("array_max", | inputs | == 1); + inputs[0].ensure_array_type()?; + + // FIXME: This will be replaced by `as_list` later + let return_type = match inputs[0].return_type() { + DataType::List(t) => Some(*t), + _ => None, + }; + + Ok(return_type) + } ExprType::StringToArray => { ensure_arity!("string_to_array", 2 <= | inputs | <= 3); From 5c0b80a6d73dd75db6d2d145c06701a26b2ee77d Mon Sep 17 00:00:00 2001 From: Michael Xu Date: Tue, 5 Sep 2023 15:03:25 -0400 Subject: [PATCH 2/5] add expansion for test_func_sig_map --- src/expr/src/sig/func.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/expr/src/sig/func.rs b/src/expr/src/sig/func.rs index d84a065ad095..84eb37cd0828 100644 --- a/src/expr/src/sig/func.rs +++ b/src/expr/src/sig/func.rs @@ -199,6 +199,9 @@ mod tests { ArrayAccess: [ "array_access(list, int32) -> boolean/int16/int32/int64/int256/float32/float64/decimal/serial/date/time/timestamp/timestamptz/interval/varchar/bytea/jsonb/list/struct", ], + ArrayMax: [ + "array_max(list) -> bytea/varchar/timestamptz/timestamp/time/date/int256/serial/decimal/float32/float64/int16/int32/int64", + ], } "#]]; expected.assert_debug_eq(&duplicated); From 1a5ee6b6cab7f9f3697ef551e88e9743d217a5c7 Mon Sep 17 00:00:00 2001 From: Michael Xu Date: Wed, 6 Sep 2023 16:01:05 -0400 Subject: [PATCH 3/5] refactor & add more e2e tests for array_max --- e2e_test/batch/functions/array_max.slt.part | 35 ++++++++++++++--- src/expr/src/vector_op/array_min.rs | 38 ------------------- .../{array_max.rs => array_min_max.rs} | 21 ++++++++++ src/expr/src/vector_op/mod.rs | 3 +- src/frontend/src/expr/type_inference/func.rs | 8 +--- 5 files changed, 53 insertions(+), 52 deletions(-) delete mode 100644 src/expr/src/vector_op/array_min.rs rename src/expr/src/vector_op/{array_max.rs => array_min_max.rs} (63%) diff --git a/e2e_test/batch/functions/array_max.slt.part b/e2e_test/batch/functions/array_max.slt.part index d8b6ad571bed..54a0e8cb45d1 100644 --- a/e2e_test/batch/functions/array_max.slt.part +++ b/e2e_test/batch/functions/array_max.slt.part @@ -19,14 +19,14 @@ select array_max(array['a', 'b', 'c', 'a']); c query I -select array_max(array['💩', '🤔️', '🥵', '🥳', '💩']); +select array_max(array['e💩a', 'f🤔️b', 'c🥵c', 'd🥳d', 'e💩e']); ---- -🥵 +f🤔️b query I -select array_max(array['😅🤔😅️', '114🥵514', '🤣🥳', '🥵💩💩🥵']); +select array_max(array['2c😅🤔😅️c2', '114🥵514', '30🤣🥳03', '5🥵💩💩🥵5']); ---- -🥵💩💩🥵 +5🥵💩💩🥵5 query error invalid digit found in string select array_max(array['a', 'b', 'c', 114514]); @@ -44,4 +44,29 @@ select array_max(array[-2147483648, 2147483647, -2147483649]); query I select array_max(array[-9223372036854775808, 9223372036854775807, -9223372036854775809]); ---- -9223372036854775807 \ No newline at end of file +9223372036854775807 + +query I +select array_max(array['a', '', 'c']); +---- +c + +query I +select array_max(array[3.14, 1.14, 1.14514]); +---- +3.14 + +query I +select array_max(array[3.1415926, 191.14, 114514, 1313.1414]); +---- +114514 + +query I +select array_max(array[1e-4, 1.14514e5, 1.14514e-5]); +---- +114514 + +query I +select array_max(array[date'2002-10-30', date'2023-09-06', date'2017-06-18']); +---- +2023-09-06 \ No newline at end of file diff --git a/src/expr/src/vector_op/array_min.rs b/src/expr/src/vector_op/array_min.rs deleted file mode 100644 index 5e8a6d10e89e..000000000000 --- a/src/expr/src/vector_op/array_min.rs +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2023 RisingWave Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use risingwave_common::array::*; -use risingwave_common::types::{DefaultOrdered, Scalar, ToOwnedDatum}; -use risingwave_expr_macro::function; - -use crate::Result; - -#[function("array_min(list) -> *int")] -#[function("array_min(list) -> *float")] -#[function("array_min(list) -> decimal")] -#[function("array_min(list) -> serial")] -#[function("array_min(list) -> int256")] -#[function("array_min(list) -> date")] -#[function("array_min(list) -> time")] -#[function("array_min(list) -> timestamp")] -#[function("array_min(list) -> timestamptz")] -#[function("array_min(list) -> varchar")] -#[function("array_min(list) -> bytea")] -pub fn array_min(list: ListRef<'_>) -> Result> { - let min_value = list.iter().flatten().map(DefaultOrdered).min(); - match min_value.map(|v| v.0).to_owned_datum() { - Some(s) => Ok(Some(s.try_into()?)), - None => Ok(None), - } -} diff --git a/src/expr/src/vector_op/array_max.rs b/src/expr/src/vector_op/array_min_max.rs similarity index 63% rename from src/expr/src/vector_op/array_max.rs rename to src/expr/src/vector_op/array_min_max.rs index 7702189c39e7..286e8a7e7201 100644 --- a/src/expr/src/vector_op/array_max.rs +++ b/src/expr/src/vector_op/array_min_max.rs @@ -18,6 +18,27 @@ use risingwave_expr_macro::function; use crate::Result; +/// FIXME: #[`function("array_min(list`) -> any")] supports +/// In this way we could avoid manual macro expansion +#[function("array_min(list) -> *int")] +#[function("array_min(list) -> *float")] +#[function("array_min(list) -> decimal")] +#[function("array_min(list) -> serial")] +#[function("array_min(list) -> int256")] +#[function("array_min(list) -> date")] +#[function("array_min(list) -> time")] +#[function("array_min(list) -> timestamp")] +#[function("array_min(list) -> timestamptz")] +#[function("array_min(list) -> varchar")] +#[function("array_min(list) -> bytea")] +pub fn array_min(list: ListRef<'_>) -> Result> { + let min_value = list.iter().flatten().map(DefaultOrdered).min(); + match min_value.map(|v| v.0).to_owned_datum() { + Some(s) => Ok(Some(s.try_into()?)), + None => Ok(None), + } +} + #[function("array_max(list) -> *int")] #[function("array_max(list) -> *float")] #[function("array_max(list) -> decimal")] diff --git a/src/expr/src/vector_op/mod.rs b/src/expr/src/vector_op/mod.rs index 3368e3b8b232..f4aaa375f8f7 100644 --- a/src/expr/src/vector_op/mod.rs +++ b/src/expr/src/vector_op/mod.rs @@ -13,11 +13,10 @@ // limitations under the License. pub mod arithmetic_op; -pub mod array_max; pub mod array_access; pub mod array_distinct; pub mod array_length; -pub mod array_min; +pub mod array_min_max; pub mod array_positions; pub mod array_range_access; pub mod array_remove; diff --git a/src/frontend/src/expr/type_inference/func.rs b/src/frontend/src/expr/type_inference/func.rs index 0d2754734730..1febc46788af 100644 --- a/src/frontend/src/expr/type_inference/func.rs +++ b/src/frontend/src/expr/type_inference/func.rs @@ -617,13 +617,7 @@ fn infer_type_for_special( ensure_arity!("array_max", | inputs | == 1); inputs[0].ensure_array_type()?; - // FIXME: This will be replaced by `as_list` later - let return_type = match inputs[0].return_type() { - DataType::List(t) => Some(*t), - _ => None, - }; - - Ok(return_type) + Ok(Some(inputs[0].return_type().as_list().clone())) } ExprType::StringToArray => { ensure_arity!("string_to_array", 2 <= | inputs | <= 3); From 9ffd065a7680f73fc061408558a516c547489922 Mon Sep 17 00:00:00 2001 From: Michael Xu Date: Wed, 6 Sep 2023 19:31:08 -0400 Subject: [PATCH 4/5] tiny fix --- src/expr/src/sig/func.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/expr/src/sig/func.rs b/src/expr/src/sig/func.rs index d0e5dd255198..5dca4da2f448 100644 --- a/src/expr/src/sig/func.rs +++ b/src/expr/src/sig/func.rs @@ -199,11 +199,12 @@ mod tests { ArrayAccess: [ "array_access(list, int32) -> boolean/int16/int32/int64/int256/float32/float64/decimal/serial/date/time/timestamp/timestamptz/interval/varchar/bytea/jsonb/list/struct", ], - ArrayMax: [ - "array_max(list) -> bytea/varchar/timestamptz/timestamp/time/date/int256/serial/decimal/float32/float64/int16/int32/int64", ArrayMin: [ "array_min(list) -> bytea/varchar/timestamptz/timestamp/time/date/int256/serial/decimal/float32/float64/int16/int32/int64", ], + ArrayMax: [ + "array_max(list) -> bytea/varchar/timestamptz/timestamp/time/date/int256/serial/decimal/float32/float64/int16/int32/int64", + ], } "#]]; expected.assert_debug_eq(&duplicated); From 913bbd8689a5565af5c3ff6059e3630d17a6d36e Mon Sep 17 00:00:00 2001 From: Runji Wang Date: Thu, 7 Sep 2023 15:58:49 +0800 Subject: [PATCH 5/5] Update src/expr/src/vector_op/array_min_max.rs --- src/expr/src/vector_op/array_min_max.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/expr/src/vector_op/array_min_max.rs b/src/expr/src/vector_op/array_min_max.rs index 286e8a7e7201..1ff1a4086f2c 100644 --- a/src/expr/src/vector_op/array_min_max.rs +++ b/src/expr/src/vector_op/array_min_max.rs @@ -51,8 +51,8 @@ pub fn array_min(list: ListRef<'_>) -> Result> { #[function("array_max(list) -> varchar")] #[function("array_max(list) -> bytea")] pub fn array_max(list: ListRef<'_>) -> Result> { - let min_value = list.iter().flatten().map(DefaultOrdered).max(); - match min_value.map(|v| v.0).to_owned_datum() { + let max_value = list.iter().flatten().map(DefaultOrdered).max(); + match max_value.map(|v| v.0).to_owned_datum() { Some(s) => Ok(Some(s.try_into()?)), None => Ok(None), }