From 9c98d269e04313fe98e5a7a3843243ede5dd31a0 Mon Sep 17 00:00:00 2001 From: Xu Date: Wed, 6 Sep 2023 14:31:10 -0400 Subject: [PATCH] feat(expr): add `array_min` (#12071) Signed-off-by: Runji Wang Co-authored-by: Runji Wang --- e2e_test/batch/functions/array_min.slt.part | 93 ++++++++++++++++++++ proto/expr.proto | 1 + src/common/src/types/mod.rs | 12 +++ src/expr/src/sig/func.rs | 3 + src/expr/src/vector_op/array_access.rs | 3 +- src/expr/src/vector_op/array_distinct.rs | 2 +- src/expr/src/vector_op/array_length.rs | 2 +- src/expr/src/vector_op/array_min.rs | 38 ++++++++ src/expr/src/vector_op/mod.rs | 1 + src/frontend/src/binder/expr/function.rs | 1 + src/frontend/src/expr/pure.rs | 1 + src/frontend/src/expr/type_inference/func.rs | 6 ++ 12 files changed, 160 insertions(+), 3 deletions(-) create mode 100644 e2e_test/batch/functions/array_min.slt.part create mode 100644 src/expr/src/vector_op/array_min.rs diff --git a/e2e_test/batch/functions/array_min.slt.part b/e2e_test/batch/functions/array_min.slt.part new file mode 100644 index 000000000000..0a252465c58d --- /dev/null +++ b/e2e_test/batch/functions/array_min.slt.part @@ -0,0 +1,93 @@ +query I +select array_min(array[1, 2, 3]); +---- +1 + +query I +select array_min(array[2, 3, 5, 2, 4]); +---- +2 + +query I +select array_min(array[114514, 123456]); +---- +114514 + +query I +select array_min(array['a', 'b', 'c', 'a']); +---- +a + +query I +select array_min(array['e💩a', 'f🤔️e', 'c🥵c', 'g🥳g', 'e💩e']); +---- +c🥵c + +query I +select array_min(array['901😅🤔😅️109', '114🥵514', '3🤣🥳3', '5🥵💩💩🥵5']); +---- +114🥵514 + +query error invalid digit found in string +select array_min(array['a', 'b', 'c', 114514]); + +query error invalid digit found in string +select array_min(array[114514, 'a', 'b', 'c']); + +# i32::MIN & i32::MIN - 1 & i32::MAX +query I +select array_min(array[-2147483648, 2147483647, -2147483649]); +---- +-2147483649 + +# i64::MIN & i64::MIN - 1 & i64::MAX +query I +select array_min(array[-9223372036854775808, 9223372036854775807, -9223372036854775809]); +---- +-9223372036854775809 + +query I +select array_min(array['a', '', 'c']); +---- +(empty) + +query I +select array_min(array[3.14, 1.14, 1.14514]); +---- +1.14 + +query I +select array_min(array[3.1415926, 191.14, 114514, 1313.1414]); +---- +3.1415926 + +query I +select array_min(array[1e-4, 1.14514e5, 1.14514e-5]); +---- +0.0000114514 + +query I +select array_min(array[date'2002-10-30', date'2023-09-06', date'2017-06-18']); +---- +2002-10-30 + +query I +select array_min( + array[ + '2002-10-30 00:00:00'::timestamp, + '2023-09-06 13:10:00'::timestamp, + '2017-06-18 12:00:00'::timestamp + ] +); +---- +2002-10-30 00:00:00 + +query I +select array_min(array['\xDE'::bytea, '\xDF'::bytea, '\xDC'::bytea]); +---- +\xdc + +query I +select array_min(array[NULL, 'a', 'b']); +---- +a \ No newline at end of file diff --git a/proto/expr.proto b/proto/expr.proto index 1b3aeff6480f..2dd60bea4f23 100644 --- a/proto/expr.proto +++ b/proto/expr.proto @@ -197,6 +197,7 @@ message ExprNode { ARRAY_REPLACE = 543; ARRAY_DIMS = 544; ARRAY_TRANSFORM = 545; + ARRAY_MIN = 546; // Int256 functions HEX_TO_INT256 = 560; diff --git a/src/common/src/types/mod.rs b/src/common/src/types/mod.rs index 7737d76cd48f..d0e155f78454 100644 --- a/src/common/src/types/mod.rs +++ b/src/common/src/types/mod.rs @@ -375,6 +375,18 @@ impl DataType { } } + /// Returns the inner type of a list type. + /// + /// # Panics + /// + /// Panics if the type is not a list type. + pub fn as_list(&self) -> &DataType { + match self { + DataType::List(t) => t, + _ => panic!("expect list type"), + } + } + /// WARNING: Currently this should only be used in `WatermarkFilterExecutor`. Please be careful /// if you want to use this. pub fn min_value(&self) -> ScalarImpl { diff --git a/src/expr/src/sig/func.rs b/src/expr/src/sig/func.rs index d84a065ad095..8e943a6ffa71 100644 --- a/src/expr/src/sig/func.rs +++ b/src/expr/src/sig/func.rs @@ -199,6 +199,9 @@ mod tests { ArrayAccess: [ "array_access(list, int32) -> boolean/int16/int32/int64/int256/float32/float64/decimal/serial/date/time/timestamp/timestamptz/interval/varchar/bytea/jsonb/list/struct", ], + ArrayMin: [ + "array_min(list) -> bytea/varchar/timestamptz/timestamp/time/date/int256/serial/decimal/float32/float64/int16/int32/int64", + ], } "#]]; expected.assert_debug_eq(&duplicated); diff --git a/src/expr/src/vector_op/array_access.rs b/src/expr/src/vector_op/array_access.rs index 57fe3f29feb3..40c4568c7d46 100644 --- a/src/expr/src/vector_op/array_access.rs +++ b/src/expr/src/vector_op/array_access.rs @@ -24,7 +24,8 @@ pub fn array_access(list: ListRef<'_>, index: i32) -> Result *int")] +#[function("array_min(list) -> *float")] +#[function("array_min(list) -> decimal")] +#[function("array_min(list) -> serial")] +#[function("array_min(list) -> int256")] +#[function("array_min(list) -> date")] +#[function("array_min(list) -> time")] +#[function("array_min(list) -> timestamp")] +#[function("array_min(list) -> timestamptz")] +#[function("array_min(list) -> varchar")] +#[function("array_min(list) -> bytea")] +pub fn array_min(list: ListRef<'_>) -> Result> { + let min_value = list.iter().flatten().map(DefaultOrdered).min(); + match min_value.map(|v| v.0).to_owned_datum() { + Some(s) => Ok(Some(s.try_into()?)), + None => Ok(None), + } +} diff --git a/src/expr/src/vector_op/mod.rs b/src/expr/src/vector_op/mod.rs index 4bc147cf3cae..61aa194f974c 100644 --- a/src/expr/src/vector_op/mod.rs +++ b/src/expr/src/vector_op/mod.rs @@ -16,6 +16,7 @@ pub mod arithmetic_op; pub mod array_access; pub mod array_distinct; pub mod array_length; +pub mod array_min; pub mod array_positions; pub mod array_range_access; pub mod array_remove; diff --git a/src/frontend/src/binder/expr/function.rs b/src/frontend/src/binder/expr/function.rs index c505aaa18d2b..54a502ecdea8 100644 --- a/src/frontend/src/binder/expr/function.rs +++ b/src/frontend/src/binder/expr/function.rs @@ -788,6 +788,7 @@ impl Binder { ("array_prepend", raw_call(ExprType::ArrayPrepend)), ("array_to_string", raw_call(ExprType::ArrayToString)), ("array_distinct", raw_call(ExprType::ArrayDistinct)), + ("array_min", raw_call(ExprType::ArrayMin)), ("array_length", raw_call(ExprType::ArrayLength)), ("cardinality", raw_call(ExprType::Cardinality)), ("array_remove", raw_call(ExprType::ArrayRemove)), diff --git a/src/frontend/src/expr/pure.rs b/src/frontend/src/expr/pure.rs index 4316223ec07a..894d4667a80a 100644 --- a/src/frontend/src/expr/pure.rs +++ b/src/frontend/src/expr/pure.rs @@ -157,6 +157,7 @@ impl ExprVisitor for ImpureAnalyzer { | expr_node::Type::ArrayPrepend | expr_node::Type::FormatType | expr_node::Type::ArrayDistinct + | expr_node::Type::ArrayMin | expr_node::Type::ArrayDims | expr_node::Type::ArrayLength | expr_node::Type::Cardinality diff --git a/src/frontend/src/expr/type_inference/func.rs b/src/frontend/src/expr/type_inference/func.rs index 8cccde3b251d..02c9abaf72b9 100644 --- a/src/frontend/src/expr/type_inference/func.rs +++ b/src/frontend/src/expr/type_inference/func.rs @@ -595,6 +595,12 @@ fn infer_type_for_special( Ok(Some(inputs[0].return_type())) } + ExprType::ArrayMin => { + ensure_arity!("array_min", | inputs | == 1); + inputs[0].ensure_array_type()?; + + Ok(Some(inputs[0].return_type().as_list().clone())) + } ExprType::ArrayDims => { ensure_arity!("array_dims", | inputs | == 1); inputs[0].ensure_array_type()?;