Merge branch 'main' into yiming/no-read-version-in-reset

risingwavelabs · Jan 31, 2024 · 1eb9145 · 1eb9145
2 parents dcc337d + 1f57fdd
commit 1eb9145
Show file tree

Hide file tree

Showing 28 changed files with 487 additions and 219 deletions.
diff --git a/ci/scripts/run-backfill-tests.sh b/ci/scripts/run-backfill-tests.sh
@@ -265,6 +265,20 @@ test_backfill_snapshot_with_limited_storage_throughput() {
   kill_cluster
 }
 
+# Test case where we do backfill with PK of 10 columns to measure performance impact.
+test_backfill_snapshot_with_wider_rows() {
+  echo "--- e2e, test_backfill_snapshot_with_wider_rows, $RUNTIME_CLUSTER_PROFILE"
+  cargo make ci-start $RUNTIME_CLUSTER_PROFILE
+  sqllogictest -p 4566 -d dev 'e2e_test/backfill/runtime/create_wide_table.slt'
+  sqllogictest -p 4566 -d dev 'e2e_test/backfill/runtime/insert_wide_snapshot.slt'
+  sqllogictest -p 4566 -d dev 'e2e_test/backfill/runtime/create_arrangement_backfill_mv.slt'
+  sqllogictest -p 4566 -d dev 'e2e_test/backfill/runtime/create_no_shuffle_mv.slt'
+  sqllogictest -p 4566 -d dev 'e2e_test/backfill/runtime/validate_rows_no_shuffle.slt'
+  sqllogictest -p 4566 -d dev 'e2e_test/backfill/runtime/validate_rows_arrangement.slt'
+
+  kill_cluster
+}
+
 main() {
   set -euo pipefail
   test_snapshot_and_upstream_read
@@ -282,6 +296,7 @@ main() {
 
     # Backfill will happen in sequence here.
     test_backfill_snapshot_runtime
+    test_backfill_snapshot_with_wider_rows
     test_backfill_snapshot_with_limited_storage_throughput
 
     # No upstream only tests, because if there's no snapshot,

diff --git a/e2e_test/backfill/runtime/create_wide_table.slt b/e2e_test/backfill/runtime/create_wide_table.slt
@@ -0,0 +1,20 @@
+statement ok
+CREATE TABLE t (
+    v1 int,
+    v2 varchar,
+    v3 bigint,
+    v4 int,
+    v5 varchar,
+    v6 bigint,
+    v7 int,
+    v8 varchar,
+    v9 bigint,
+    v10 int,
+    v11 varchar,
+    v12 bigint,
+    v13 int,
+    v14 varchar,
+    v15 bigint,
+    primary key (v1, v2, v3, v4, v5,
+                 v8, v9, v10, v11, v12)
+);
diff --git a/e2e_test/backfill/runtime/insert_wide_snapshot.slt b/e2e_test/backfill/runtime/insert_wide_snapshot.slt
@@ -0,0 +1,22 @@
+# 15 columns wide
+statement ok
+INSERT INTO t select
+  generate_series,
+  'jakbj2khbe2',
+  22222222222,
+  generate_series,
+  'jakbj2khbe2',
+  22222222222,
+  generate_series,
+  'jakbj2khbe2',
+  22222222222,
+  generate_series,
+  'jakbj2khbe2',
+  22222222222,
+  generate_series,
+  'jakbj2khbe2',
+  22222222222
+  from generate_series(2000001, 4000000);
+
+statement ok
+flush;
diff --git a/e2e_test/udf/sql_udf.slt b/e2e_test/udf/sql_udf.slt
@@ -13,6 +13,35 @@ create function sub(INT, INT) returns int language sql as 'select $1 - $2';
 statement ok
 create function add_sub_binding() returns int language sql as 'select add(1, 1) + sub(2, 2)';
 
+# Create a named sql udf
+statement ok
+create function add_named(a INT, b INT) returns int language sql as 'select a + b';
+
+# Create another named sql udf
+statement ok
+create function sub_named(a INT, b INT) returns int language sql as 'select a - b';
+
+# Mixed parameter with named / anonymous parameters
+statement ok
+create function add_sub_mix(INT, a INT, INT) returns int language sql as 'select $1 - a + $3';
+
+# Mixed parameter with calling inner sql udfs
+# statement ok
+# create function add_sub_mix_wrapper(INT, a INT, INT) returns int language sql as 'select add($1, a) + a + sub(a, $3)';
+
+# Named sql udf with corner case
+statement ok
+create function corner_case(INT, a INT, INT) returns varchar language sql as $$select '$1 + a + $3'$$;
+
+# Named sql udf with invalid parameter in body definition
+# Will be rejected at creation time
+statement error failed to find named parameter aa
+create function unknown_parameter(a INT) returns int language sql as 'select a + aa + a';
+
+# Call anonymous sql udf inside named sql udf
+statement ok
+create function add_named_wrapper(a INT, b INT) returns int language sql as 'select add(a, b)';
+
 # Create an anonymous function that calls built-in functions
 # Note that double dollar signs should be used otherwise the parsing will fail, as illutrates below
 statement ok
@@ -81,7 +110,20 @@ create function print_add_two(INT) returns int language sql as 'select print($1
 statement error failed to conduct semantic check, please see if you are calling non-existence functions
 create function non_exist(INT) returns int language sql as 'select yo(114514)';
 
-# Call the defined sql udf
+# Try to create an anonymous sql udf whose return type mismatches with the sql body definition
+statement error return type mismatch detected
+create function type_mismatch(INT) returns varchar language sql as 'select $1 + 114514 + $1';
+
+# A valid example
+statement ok
+create function type_match(INT) returns varchar language sql as $$select '$1 + 114514 + $1'$$;
+
+query T
+select type_match(114514);
+----
+$1 + 114514 + $1
+
+# Call the defined anonymous sql udfs
 query I
 select add(1, -1);
 ----
@@ -92,6 +134,32 @@ select sub(1, 1);
 ----
 0
 
+# Call the defined named sql udfs
+query I
+select add_named(1, -1);
+----
+0
+
+query I
+select sub_named(1, 1);
+----
+0
+
+query I
+select add_sub_mix(1, 2, 3);
+----
+2
+
+query T
+select corner_case(1, 2, 3);
+----
+$1 + a + $3
+
+query I
+select add_named_wrapper(1, -1);
+----
+0
+
 query I
 select add_sub_binding();
 ----
@@ -145,14 +213,30 @@ select print_add_one(1), print_add_one(114513), print_add_two(2);
 ----
 2 114514 4
 
-# Create a mock table
+# Create a mock table for anonymous sql udf
 statement ok
 create table t1 (c1 INT, c2 INT);
 
+# Create a mock table for named sql udf
+statement ok
+create table t3 (a INT, b INT);
+
 # Insert some data into the mock table
 statement ok
 insert into t1 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
 
+statement ok
+insert into t3 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);
+
+query I
+select add_named(a, b) from t3 order by a asc;
+----
+2
+4
+6
+8
+10
+
 query III
 select sub(c1, c2), c1, c2, add(c1, c2) from t1 order by c1 asc;
 ----
@@ -189,7 +273,7 @@ create function add_sub(INT, FLOAT, INT) returns float language sql as $$select
 
 # Complex types interleaving
 statement ok
-create function add_sub_types(INT, BIGINT, FLOAT, DECIMAL, REAL) returns real language sql as 'select $1 + $2 - $3 + $4 + $5';
+create function add_sub_types(INT, BIGINT, FLOAT, DECIMAL, REAL) returns double language sql as 'select $1 + $2 - $3 + $4 + $5';
 
 statement ok
 create function add_sub_return(INT, FLOAT, INT) returns float language sql return -$1 + $2 - $3;
@@ -290,9 +374,30 @@ drop function print_add_two;
 statement ok
 drop function regexp_replace_wrapper;
 
+statement ok
+drop function corner_case;
+
+statement ok
+drop function add_named;
+
+statement ok
+drop function sub_named;
+
+statement ok
+drop function add_sub_mix;
+
+statement ok
+drop function add_named_wrapper;
+
+statement ok
+drop function type_match;
+
 # Drop the mock table
 statement ok
 drop table t1;
 
 statement ok
 drop table t2;
+
+statement ok
+drop table t3;
diff --git a/src/frontend/src/binder/expr/column.rs b/src/frontend/src/binder/expr/column.rs
@@ -52,6 +52,8 @@ impl Binder {
                 // The reason that we directly return error here,
                 // is because during a valid sql udf binding,
                 // there will not exist any column identifiers
+                // And invalid cases should already be caught
+                // during semantic check phase
                 return Err(ErrorCode::BindError(format!(
                     "failed to find named parameter {column_name}"
                 ))

diff --git a/src/frontend/src/binder/mod.rs b/src/frontend/src/binder/mod.rs
@@ -219,26 +219,29 @@ impl UdfContext {
         Ok(expr)
     }
 
-    /// TODO: add name related logic
-    /// NOTE: need to think of a way to prevent naming conflict
-    /// e.g., when existing column names conflict with parameter names in sql udf
     pub fn create_udf_context(
         args: &[FunctionArg],
-        _catalog: &Arc<FunctionCatalog>,
+        catalog: &Arc<FunctionCatalog>,
     ) -> Result<HashMap<String, AstExpr>> {
         let mut ret: HashMap<String, AstExpr> = HashMap::new();
         for (i, current_arg) in args.iter().enumerate() {
-            if let FunctionArg::Unnamed(arg) = current_arg {
-                let FunctionArgExpr::Expr(e) = arg else {
-                    return Err(ErrorCode::InvalidInputSyntax("invalid syntax".to_string()).into());
-                };
-                // if catalog.arg_names.is_some() {
-                //      todo!()
-                // }
-                ret.insert(format!("${}", i + 1), e.clone());
-                continue;
+            match current_arg {
+                FunctionArg::Unnamed(arg) => {
+                    let FunctionArgExpr::Expr(e) = arg else {
+                        return Err(
+                            ErrorCode::InvalidInputSyntax("invalid syntax".to_string()).into()
+                        );
+                    };
+                    if catalog.arg_names[i].is_empty() {
+                        ret.insert(format!("${}", i + 1), e.clone());
+                    } else {
+                        // The index mapping here is accurate
+                        // So that we could directly use the index
+                        ret.insert(catalog.arg_names[i].clone(), e.clone());
+                    }
+                }
+                _ => return Err(ErrorCode::InvalidInputSyntax("invalid syntax".to_string()).into()),
             }
-            return Err(ErrorCode::InvalidInputSyntax("invalid syntax".to_string()).into());
         }
         Ok(ret)
     }

diff --git a/src/frontend/src/handler/create_sql_function.rs b/src/frontend/src/handler/create_sql_function.rs
@@ -29,18 +29,28 @@ use thiserror_ext::AsReport;
 use super::*;
 use crate::binder::UdfContext;
 use crate::catalog::CatalogError;
-use crate::expr::{ExprImpl, Literal};
+use crate::expr::{Expr, ExprImpl, Literal};
 use crate::{bind_data_type, Binder};
 
 /// Create a mock `udf_context`, which is used for semantic check
-fn create_mock_udf_context(arg_types: Vec<DataType>) -> HashMap<String, ExprImpl> {
-    (1..=arg_types.len())
+fn create_mock_udf_context(
+    arg_types: Vec<DataType>,
+    arg_names: Vec<String>,
+) -> HashMap<String, ExprImpl> {
+    let mut ret: HashMap<String, ExprImpl> = (1..=arg_types.len())
         .map(|i| {
             let mock_expr =
                 ExprImpl::Literal(Box::new(Literal::new(None, arg_types[i - 1].clone())));
-            (format!("${i}"), mock_expr.clone())
+            (format!("${i}"), mock_expr)
         })
-        .collect()
+        .collect();
+
+    for (i, arg_name) in arg_names.into_iter().enumerate() {
+        let mock_expr = ExprImpl::Literal(Box::new(Literal::new(None, arg_types[i].clone())));
+        ret.insert(arg_name, mock_expr);
+    }
+
+    ret
 }
 
 pub async fn handle_create_sql_function(
@@ -173,15 +183,31 @@ pub async fn handle_create_sql_function(
 
         binder
             .udf_context_mut()
-            .update_context(create_mock_udf_context(arg_types.clone()));
+            .update_context(create_mock_udf_context(
+                arg_types.clone(),
+                arg_names.clone(),
+            ));
+
+        binder.set_udf_binding_flag();
 
         if let Ok(expr) = UdfContext::extract_udf_expression(ast) {
-            if let Err(e) = binder.bind_expr(expr) {
-                return Err(ErrorCode::InvalidInputSyntax(format!(
+            match binder.bind_expr(expr) {
+                Ok(expr) => {
+                    // Check if the return type mismatches
+                    if expr.return_type() != return_type {
+                        return Err(ErrorCode::InvalidInputSyntax(format!(
+                            "\nreturn type mismatch detected\nexpected: [{}]\nactual: [{}]\nplease adjust your function definition accordingly",
+                            return_type,
+                            expr.return_type()
+                        ))
+                        .into());
+                    }
+                }
+                Err(e) => return Err(ErrorCode::InvalidInputSyntax(format!(
                     "failed to conduct semantic check, please see if you are calling non-existence functions: {}",
                     e.as_report()
                 ))
-                .into());
+                .into()),
             }
         } else {
             return Err(ErrorCode::InvalidInputSyntax(
@@ -191,6 +217,8 @@ pub async fn handle_create_sql_function(
             )
             .into());
         }
+
+        binder.unset_udf_binding_flag();
     }
 
     // Create the actual function, will be stored in function catalog