From 4fcb67f1920f9428a34756d5cbdbec9dac5f4746 Mon Sep 17 00:00:00 2001 From: Zihao Xu Date: Sun, 4 Feb 2024 11:26:11 -0500 Subject: [PATCH] refactor(test): refactor sql udf test (#14941) --- e2e_test/udf/sql_udf.slt | 443 +++++++++--------- .../src/handler/create_sql_function.rs | 2 +- 2 files changed, 228 insertions(+), 217 deletions(-) diff --git a/e2e_test/udf/sql_udf.slt b/e2e_test/udf/sql_udf.slt index 758ec43ca53f..f2e3b2bfd528 100644 --- a/e2e_test/udf/sql_udf.slt +++ b/e2e_test/udf/sql_udf.slt @@ -1,325 +1,346 @@ statement ok SET RW_IMPLICIT_FLUSH TO true; -# Create an anonymous function with double dollar as clause +############################################################# +# Basic tests for sql udf with [unnamed / named] parameters # +############################################################# + +# Create a sql udf function with unnamed parameters with double dollar as clause statement ok create function add(INT, INT) returns int language sql as $$select $1 + $2$$; -# Create an anonymous function with single quote as clause +query I +select add(1, -1); +---- +0 + +# Create a sql udf function with unnamed parameters with single quote as clause statement ok create function sub(INT, INT) returns int language sql as 'select $1 - $2'; -# Create an anonymous function that calls other pre-defined sql udfs +query I +select sub(1, 1); +---- +0 + +# Create a sql udf function with unamed parameters that calls other pre-defined sql udfs statement ok create function add_sub_binding() returns int language sql as 'select add(1, 1) + sub(2, 2)'; -# Create a named sql udf +query I +select add_sub_binding(); +---- +2 + +# Use them all together +query III +select add(1, -1), sub(1, 1), add_sub_binding(); +---- +0 0 2 + +# Create a sql udf with named parameters with single quote as clause statement ok create function add_named(a INT, b INT) returns int language sql as 'select a + b'; -# Create another named sql udf +query I +select add_named(1, -1); +---- +0 + +# Create another sql udf with named parameters with double dollar as clause statement ok -create function sub_named(a INT, b INT) returns int language sql as 'select a - b'; +create function sub_named(a INT, b INT) returns int language sql as $$select a - b$$; + +query I +select sub_named(1, 1); +---- +0 -# Mixed parameter with named / anonymous parameters +# Mixed with named / unnamed parameters statement ok create function add_sub_mix(INT, a INT, INT) returns int language sql as 'select $1 - a + $3'; -# Mixed parameter with calling inner sql udfs -# statement ok -# create function add_sub_mix_wrapper(INT, a INT, INT) returns int language sql as 'select add($1, a) + a + sub(a, $3)'; +query I +select add_sub_mix(1, 2, 3); +---- +2 -# Named sql udf with corner case +# Call sql udf with unnamed parameters inside sql udf with named parameters statement ok -create function corner_case(INT, a INT, INT) returns varchar language sql as $$select '$1 + a + $3'$$; +create function add_named_wrapper(a INT, b INT) returns int language sql as 'select add(a, b)'; -# Named sql udf with invalid parameter in body definition -# Will be rejected at creation time -statement error failed to find named parameter aa -create function unknown_parameter(a INT) returns int language sql as 'select a + aa + a'; +query I +select add_named_wrapper(1, -1); +---- +0 -# Call anonymous sql udf inside named sql udf +# Create a sql udf with unnamed parameters with return expression statement ok -create function add_named_wrapper(a INT, b INT) returns int language sql as 'select add(a, b)'; +create function add_return(INT, INT) returns int language sql return $1 + $2; + +query I +select add_return(1, 1); +---- +2 -# Create an anonymous function that calls built-in functions -# Note that double dollar signs should be used otherwise the parsing will fail, as illutrates below statement ok -create function call_regexp_replace() returns varchar language sql as $$select regexp_replace('💩💩💩💩💩foo🤔️bar亲爱的😭baz这不是爱情❤️‍🔥', 'baz(...)', '这是🥵', 'ic')$$; +create function add_return_binding() returns int language sql return add_return(1, 1) + add_return(1, 1); -statement error Expected end of statement, found: 💩 -create function call_regexp_replace() returns varchar language sql as 'select regexp_replace('💩💩💩💩💩foo🤔️bar亲爱的😭baz这不是爱情❤️‍🔥', 'baz(...)', '这是🥵', 'ic')'; +query I +select add_return_binding(); +---- +4 -# Create an anonymous function with return expression statement ok -create function add_return(INT, INT) returns int language sql return $1 + $2; +create function print(INT) returns int language sql as 'select $1'; + +query T +select print(114514); +---- +114514 +# Multiple type interleaving sql udf statement ok -create function add_return_binding() returns int language sql return add_return(1, 1) + add_return(1, 1); +create function add_sub(INT, FLOAT, INT) returns float language sql as $$select -$1 + $2 - $3$$; -# Recursive definition can NOT be accepted at present due to semantic check -statement error failed to conduct semantic check, please see if you are calling non-existence functions -create function recursive(INT, INT) returns int language sql as 'select recursive($1, $2) + recursive($1, $2)'; +query I +select add_sub(1, 5.1415926, 1); +---- +3.1415926 -# Complex but error-prone definition, recursive & normal sql udfs interleaving -statement error failed to conduct semantic check, please see if you are calling non-existence functions -create function recursive_non_recursive(INT, INT) returns int language sql as 'select recursive($1, $2) + sub($1, $2)'; +query III +select add(1, -1), sub(1, 1), add_sub(1, 5.1415926, 1); +---- +0 0 3.1415926 -# Recursive corner case +# Complex types interleaving statement ok -create function foo(INT) returns varchar language sql as $$select 'foo(INT)'$$; +create function add_sub_types(INT, BIGINT, FLOAT, DECIMAL, REAL) returns double language sql as 'select $1 + $2 - $3 + $4 + $5'; + +query I +select add_sub_types(1, 1919810114514, 3.1415926, 1.123123, 101010.191919); +---- +1919810215523.1734494 + +statement ok +create function add_sub_return(INT, FLOAT, INT) returns float language sql return -$1 + $2 - $3; + +query I +select add_sub_return(1, 5.1415926, 1); +---- +3.1415926 # Create a wrapper function for `add` & `sub` statement ok create function add_sub_wrapper(INT, INT) returns int language sql as 'select add($1, $2) + sub($1, $2) + 114512'; -# Create a valid recursive function -# Please note we do NOT support actual running the recursive sql udf at present -statement error failed to conduct semantic check, please see if you are calling non-existence functions -create function fib(INT) returns int - language sql as 'select case - when $1 = 0 then 0 - when $1 = 1 then 1 - when $1 = 2 then 1 - when $1 = 3 then 2 - else fib($1 - 1) + fib($1 - 2) - end;'; - -# The execution will eventually exceed the pre-defined max stack depth -# statement error function fib calling stack depth limit exceeded -# select fib(100); +query I +select add_sub_wrapper(1, 1); +---- +114514 -# Currently create a materialized view with a recursive sql udf will be rejected -# statement error function fib calling stack depth limit exceeded -# create materialized view foo_mv as select fib(100); +########################################################## +# Basic sql udfs integrated with the use of mock tables # +# P.S. This is also a simulation of real world use cases # +########################################################## statement ok -create function regexp_replace_wrapper(varchar) returns varchar language sql as $$select regexp_replace($1, 'baz(...)', '这是🥵', 'ic')$$; +create table t1 (c1 INT, c2 INT); statement ok -create function print(INT) returns int language sql as 'select $1'; +create table t2 (c1 INT, c2 FLOAT, c3 INT); -# Adjust the input value of the calling function (i.e., `print` here) with the actual input parameter +# Special table for named sql udf statement ok -create function print_add_one(INT) returns int language sql as 'select print($1 + 1)'; +create table t3 (a INT, b INT); statement ok -create function print_add_two(INT) returns int language sql as 'select print($1 + $1)'; - -# Calling a non-existence function -statement error failed to conduct semantic check, please see if you are calling non-existence functions -create function non_exist(INT) returns int language sql as 'select yo(114514)'; - -# Try to create an anonymous sql udf whose return type mismatches with the sql body definition -statement error return type mismatch detected -create function type_mismatch(INT) returns varchar language sql as 'select $1 + 114514 + $1'; +insert into t1 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5); -# A valid example statement ok -create function type_match(INT) returns varchar language sql as $$select '$1 + 114514 + $1'$$; +insert into t2 values (1, 3.14, 2), (2, 4.44, 5), (20, 10.30, 02); -query T -select type_match(114514); ----- -$1 + 114514 + $1 +statement ok +insert into t3 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5); -# Call the defined anonymous sql udfs query I -select add(1, -1); +select c1, c2, add_return(c1, c2) from t1 order by c1 asc; ---- -0 +1 1 2 +2 2 4 +3 3 6 +4 4 8 +5 5 10 -query I -select sub(1, 1); +query III +select sub(c1, c2), c1, c2, add(c1, c2) from t1 order by c1 asc; ---- -0 +0 1 1 2 +0 2 2 4 +0 3 3 6 +0 4 4 8 +0 5 5 10 -# Call the defined named sql udfs -query I -select add_named(1, -1); +query IIIIII +select c1, c2, c3, add(c1, c3), sub(c1, c3), add_sub(c1, c2, c3) from t2 order by c1 asc; ---- -0 +1 3.14 2 3 -1 0.14000000000000012 +2 4.44 5 7 -3 -2.5599999999999996 +20 10.3 2 22 18 -11.7 -query I -select sub_named(1, 1); +query IIIIII +select c1, c2, c3, add(c1, c3), sub(c1, c3), add_sub_return(c1, c2, c3) from t2 order by c1 asc; ---- -0 +1 3.14 2 3 -1 0.14000000000000012 +2 4.44 5 7 -3 -2.5599999999999996 +20 10.3 2 22 18 -11.7 query I -select add_sub_mix(1, 2, 3); +select add_named(a, b) from t3 order by a asc; ---- 2 +4 +6 +8 +10 -query T -select corner_case(1, 2, 3); ----- -$1 + a + $3 - -query I -select add_named_wrapper(1, -1); ----- -0 +################################ +# Corner & Special cases tests # +################################ -query I -select add_sub_binding(); ----- -2 +# Mixed parameter with calling inner sql udfs +# statement ok +# create function add_sub_mix_wrapper(INT, a INT, INT) returns int language sql as 'select add($1, a) + a + sub(a, $3)'; -query III -select add(1, -1), sub(1, 1), add_sub_binding(); ----- -0 0 2 +# Named sql udf with corner case +statement ok +create function corner_case(INT, a INT, INT) returns varchar language sql as $$select '$1 + a + $3'$$; -query I -select add_return(1, 1); +query T +select corner_case(1, 2, 3); ---- -2 +$1 + a + $3 -query I -select add_return_binding(); ----- -4 +# Create a sql udf with unnamed parameters that calls built-in functions +# Note that double dollar signs should be used otherwise the parsing will fail, as illutrates below +statement ok +create function call_regexp_replace() returns varchar language sql as $$select regexp_replace('💩💩💩💩💩foo🤔️bar亲爱的😭baz这不是爱情❤️‍🔥', 'baz(...)', '这是🥵', 'ic')$$; query T select call_regexp_replace(); ---- 💩💩💩💩💩foo🤔️bar亲爱的😭这是🥵爱情❤️‍🔥 +statement ok +create function regexp_replace_wrapper(varchar) returns varchar language sql as $$select regexp_replace($1, 'baz(...)', '这是🥵', 'ic')$$; + query T select regexp_replace_wrapper('💩💩💩💩💩foo🤔️bar亲爱的😭baz这不是爱情❤️‍🔥'); ---- 💩💩💩💩💩foo🤔️bar亲爱的😭这是🥵爱情❤️‍🔥 +# Recursive corner case (i.e., valid definition should not be rejected) +statement ok +create function foo(INT) returns varchar language sql as $$select 'foo(INT)'$$; + query T select foo(114514); ---- foo(INT) -# Rejected deep calling stack -# statement error function recursive calling stack depth limit exceeded -# select recursive(1, 1); - -# Same as above -# statement error function recursive calling stack depth limit exceeded -# select recursive_non_recursive(1, 1); +# Adjust the input value of the calling function (i.e., `print` here) with the actual input parameter +statement ok +create function print_add_one(INT) returns int language sql as 'select print($1 + 1)'; -query I -select add_sub_wrapper(1, 1); ----- -114514 +statement ok +create function print_add_two(INT) returns int language sql as 'select print($1 + $1)'; query III select print_add_one(1), print_add_one(114513), print_add_two(2); ---- 2 114514 4 -# Create a mock table for anonymous sql udf +# Note: the valid example of `type_mismatch` in the below test section statement ok -create table t1 (c1 INT, c2 INT); +create function type_match(INT) returns varchar language sql as $$select '$1 + 114514 + $1'$$; -# Create a mock table for named sql udf -statement ok -create table t3 (a INT, b INT); +query T +select type_match(114514); +---- +$1 + 114514 + $1 -# Insert some data into the mock table -statement ok -insert into t1 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5); +################################################################################# +# Invalid definition (and maybe not yet supported features 🤪) / use case tests # +################################################################################# -statement ok -insert into t3 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5); +# Named sql udf with invalid parameter in body definition +# Will be rejected at creation time +statement error failed to find named parameter aa +create function unknown_parameter(a INT) returns int language sql as 'select a + aa + a'; -query I -select add_named(a, b) from t3 order by a asc; ----- -2 -4 -6 -8 -10 +statement error Expected end of statement, found: 💩 +create function call_regexp_replace() returns varchar language sql as 'select regexp_replace('💩💩💩💩💩foo🤔️bar亲爱的😭baz这不是爱情❤️‍🔥', 'baz(...)', '这是🥵', 'ic')'; -query III -select sub(c1, c2), c1, c2, add(c1, c2) from t1 order by c1 asc; ----- -0 1 1 2 -0 2 2 4 -0 3 3 6 -0 4 4 8 -0 5 5 10 +# Recursive definition can NOT be accepted at present due to semantic check +statement error failed to conduct semantic check, please see if you are calling non-existent functions +create function recursive(INT, INT) returns int language sql as 'select recursive($1, $2) + recursive($1, $2)'; -query I -select c1, c2, add_return(c1, c2) from t1 order by c1 asc; ----- -1 1 2 -2 2 4 -3 3 6 -4 4 8 -5 5 10 +# Complex but error-prone definition, recursive & normal sql udfs interleaving +statement error failed to conduct semantic check, please see if you are calling non-existent functions +create function recursive_non_recursive(INT, INT) returns int language sql as 'select recursive($1, $2) + sub($1, $2)'; -# Recursive sql udf with normal table +# Create a valid recursive function +# Please note we do NOT support actual running the recursive sql udf at present +statement error failed to conduct semantic check, please see if you are calling non-existent functions +create function fib(INT) returns int + language sql as 'select case + when $1 = 0 then 0 + when $1 = 1 then 1 + when $1 = 2 then 1 + when $1 = 3 then 2 + else fib($1 - 1) + fib($1 - 2) + end;'; + +# The execution will eventually exceed the pre-defined max stack depth # statement error function fib calling stack depth limit exceeded -# select fib(c1) from t1; +# select fib(100); -# Recursive sql udf with materialized view +# Currently create a materialized view with a recursive sql udf will be rejected # statement error function fib calling stack depth limit exceeded -# create materialized view bar_mv as select fib(c1) from t1; +# create materialized view foo_mv as select fib(100); + +# Calling a non-existent function +statement error failed to conduct semantic check, please see if you are calling non-existent functions +create function non_exist(INT) returns int language sql as 'select yo(114514)'; + +# Try to create an sql udf with unnamed parameters whose return type mismatches with the sql body definition +statement error return type mismatch detected +create function type_mismatch(INT) returns varchar language sql as 'select $1 + 114514 + $1'; # Invalid function body syntax statement error Expected an expression:, found: EOF at the end create function add_error(INT, INT) returns int language sql as $$select $1 + $2 +$$; -# Multiple type interleaving sql udf -statement ok -create function add_sub(INT, FLOAT, INT) returns float language sql as $$select -$1 + $2 - $3$$; - -# Complex types interleaving -statement ok -create function add_sub_types(INT, BIGINT, FLOAT, DECIMAL, REAL) returns double language sql as 'select $1 + $2 - $3 + $4 + $5'; - -statement ok -create function add_sub_return(INT, FLOAT, INT) returns float language sql return -$1 + $2 - $3; - -query I -select add_sub(1, 5.1415926, 1); ----- -3.1415926 - -query I -select add_sub_return(1, 5.1415926, 1); ----- -3.1415926 - -query III -select add(1, -1), sub(1, 1), add_sub(1, 5.1415926, 1); ----- -0 0 3.1415926 - -query I -select add_sub_types(1, 1919810114514, 3.1415926, 1.123123, 101010.191919); ----- -1919810215523.1734494 +# Rejected deep calling stack +# statement error function recursive calling stack depth limit exceeded +# select recursive(1, 1); -# Create another mock table -statement ok -create table t2 (c1 INT, c2 FLOAT, c3 INT); +# Same as above +# statement error function recursive calling stack depth limit exceeded +# select recursive_non_recursive(1, 1); -statement ok -insert into t2 values (1, 3.14, 2), (2, 4.44, 5), (20, 10.30, 02); +# Recursive sql udf with normal table +# statement error function fib calling stack depth limit exceeded +# select fib(c1) from t1; -query IIIIII -select c1, c2, c3, add(c1, c3), sub(c1, c3), add_sub(c1, c2, c3) from t2 order by c1 asc; ----- -1 3.14 2 3 -1 0.14000000000000012 -2 4.44 5 7 -3 -2.5599999999999996 -20 10.3 2 22 18 -11.7 +# Recursive sql udf with materialized view +# statement error function fib calling stack depth limit exceeded +# create materialized view bar_mv as select fib(c1) from t1; -query IIIIII -select c1, c2, c3, add(c1, c3), sub(c1, c3), add_sub_return(c1, c2, c3) from t2 order by c1 asc; ----- -1 3.14 2 3 -1 0.14000000000000012 -2 4.44 5 7 -3 -2.5599999999999996 -20 10.3 2 22 18 -11.7 +################################################## +# Clean up the funtions / mock tables at the end # +################################################## -# Drop the functions statement ok drop function add; @@ -347,21 +368,12 @@ drop function call_regexp_replace; statement ok drop function add_sub_wrapper; -# statement ok -# drop function recursive; - statement ok drop function foo; -# statement ok -# drop function recursive_non_recursive; - statement ok drop function add_sub_types; -# statement ok -# drop function fib; - statement ok drop function print; @@ -392,7 +404,6 @@ drop function add_named_wrapper; statement ok drop function type_match; -# Drop the mock table statement ok drop table t1; diff --git a/src/frontend/src/handler/create_sql_function.rs b/src/frontend/src/handler/create_sql_function.rs index 4eaa78f82533..de24027723bb 100644 --- a/src/frontend/src/handler/create_sql_function.rs +++ b/src/frontend/src/handler/create_sql_function.rs @@ -204,7 +204,7 @@ pub async fn handle_create_sql_function( } } Err(e) => return Err(ErrorCode::InvalidInputSyntax(format!( - "failed to conduct semantic check, please see if you are calling non-existence functions: {}", + "failed to conduct semantic check, please see if you are calling non-existent functions: {}", e.as_report() )) .into()),