Skip to content

Commit

Permalink
Merge branch 'main' into yiming/no-read-version-in-reset
Browse files Browse the repository at this point in the history
  • Loading branch information
wenym1 authored Jan 31, 2024
2 parents dcc337d + 1f57fdd commit 1eb9145
Show file tree
Hide file tree
Showing 28 changed files with 487 additions and 219 deletions.
15 changes: 15 additions & 0 deletions ci/scripts/run-backfill-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,20 @@ test_backfill_snapshot_with_limited_storage_throughput() {
kill_cluster
}

# Test case where we do backfill with PK of 10 columns to measure performance impact.
test_backfill_snapshot_with_wider_rows() {
echo "--- e2e, test_backfill_snapshot_with_wider_rows, $RUNTIME_CLUSTER_PROFILE"
cargo make ci-start $RUNTIME_CLUSTER_PROFILE
sqllogictest -p 4566 -d dev 'e2e_test/backfill/runtime/create_wide_table.slt'
sqllogictest -p 4566 -d dev 'e2e_test/backfill/runtime/insert_wide_snapshot.slt'
sqllogictest -p 4566 -d dev 'e2e_test/backfill/runtime/create_arrangement_backfill_mv.slt'
sqllogictest -p 4566 -d dev 'e2e_test/backfill/runtime/create_no_shuffle_mv.slt'
sqllogictest -p 4566 -d dev 'e2e_test/backfill/runtime/validate_rows_no_shuffle.slt'
sqllogictest -p 4566 -d dev 'e2e_test/backfill/runtime/validate_rows_arrangement.slt'

kill_cluster
}

main() {
set -euo pipefail
test_snapshot_and_upstream_read
Expand All @@ -282,6 +296,7 @@ main() {

# Backfill will happen in sequence here.
test_backfill_snapshot_runtime
test_backfill_snapshot_with_wider_rows
test_backfill_snapshot_with_limited_storage_throughput

# No upstream only tests, because if there's no snapshot,
Expand Down
20 changes: 20 additions & 0 deletions e2e_test/backfill/runtime/create_wide_table.slt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
statement ok
CREATE TABLE t (
v1 int,
v2 varchar,
v3 bigint,
v4 int,
v5 varchar,
v6 bigint,
v7 int,
v8 varchar,
v9 bigint,
v10 int,
v11 varchar,
v12 bigint,
v13 int,
v14 varchar,
v15 bigint,
primary key (v1, v2, v3, v4, v5,
v8, v9, v10, v11, v12)
);
22 changes: 22 additions & 0 deletions e2e_test/backfill/runtime/insert_wide_snapshot.slt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# 15 columns wide
statement ok
INSERT INTO t select
generate_series,
'jakbj2khbe2',
22222222222,
generate_series,
'jakbj2khbe2',
22222222222,
generate_series,
'jakbj2khbe2',
22222222222,
generate_series,
'jakbj2khbe2',
22222222222,
generate_series,
'jakbj2khbe2',
22222222222
from generate_series(2000001, 4000000);

statement ok
flush;
111 changes: 108 additions & 3 deletions e2e_test/udf/sql_udf.slt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,35 @@ create function sub(INT, INT) returns int language sql as 'select $1 - $2';
statement ok
create function add_sub_binding() returns int language sql as 'select add(1, 1) + sub(2, 2)';

# Create a named sql udf
statement ok
create function add_named(a INT, b INT) returns int language sql as 'select a + b';

# Create another named sql udf
statement ok
create function sub_named(a INT, b INT) returns int language sql as 'select a - b';

# Mixed parameter with named / anonymous parameters
statement ok
create function add_sub_mix(INT, a INT, INT) returns int language sql as 'select $1 - a + $3';

# Mixed parameter with calling inner sql udfs
# statement ok
# create function add_sub_mix_wrapper(INT, a INT, INT) returns int language sql as 'select add($1, a) + a + sub(a, $3)';

# Named sql udf with corner case
statement ok
create function corner_case(INT, a INT, INT) returns varchar language sql as $$select '$1 + a + $3'$$;

# Named sql udf with invalid parameter in body definition
# Will be rejected at creation time
statement error failed to find named parameter aa
create function unknown_parameter(a INT) returns int language sql as 'select a + aa + a';

# Call anonymous sql udf inside named sql udf
statement ok
create function add_named_wrapper(a INT, b INT) returns int language sql as 'select add(a, b)';

# Create an anonymous function that calls built-in functions
# Note that double dollar signs should be used otherwise the parsing will fail, as illutrates below
statement ok
Expand Down Expand Up @@ -81,7 +110,20 @@ create function print_add_two(INT) returns int language sql as 'select print($1
statement error failed to conduct semantic check, please see if you are calling non-existence functions
create function non_exist(INT) returns int language sql as 'select yo(114514)';

# Call the defined sql udf
# Try to create an anonymous sql udf whose return type mismatches with the sql body definition
statement error return type mismatch detected
create function type_mismatch(INT) returns varchar language sql as 'select $1 + 114514 + $1';

# A valid example
statement ok
create function type_match(INT) returns varchar language sql as $$select '$1 + 114514 + $1'$$;

query T
select type_match(114514);
----
$1 + 114514 + $1

# Call the defined anonymous sql udfs
query I
select add(1, -1);
----
Expand All @@ -92,6 +134,32 @@ select sub(1, 1);
----
0

# Call the defined named sql udfs
query I
select add_named(1, -1);
----
0

query I
select sub_named(1, 1);
----
0

query I
select add_sub_mix(1, 2, 3);
----
2

query T
select corner_case(1, 2, 3);
----
$1 + a + $3

query I
select add_named_wrapper(1, -1);
----
0

query I
select add_sub_binding();
----
Expand Down Expand Up @@ -145,14 +213,30 @@ select print_add_one(1), print_add_one(114513), print_add_two(2);
----
2 114514 4

# Create a mock table
# Create a mock table for anonymous sql udf
statement ok
create table t1 (c1 INT, c2 INT);

# Create a mock table for named sql udf
statement ok
create table t3 (a INT, b INT);

# Insert some data into the mock table
statement ok
insert into t1 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);

statement ok
insert into t3 values (1, 1), (2, 2), (3, 3), (4, 4), (5, 5);

query I
select add_named(a, b) from t3 order by a asc;
----
2
4
6
8
10

query III
select sub(c1, c2), c1, c2, add(c1, c2) from t1 order by c1 asc;
----
Expand Down Expand Up @@ -189,7 +273,7 @@ create function add_sub(INT, FLOAT, INT) returns float language sql as $$select

# Complex types interleaving
statement ok
create function add_sub_types(INT, BIGINT, FLOAT, DECIMAL, REAL) returns real language sql as 'select $1 + $2 - $3 + $4 + $5';
create function add_sub_types(INT, BIGINT, FLOAT, DECIMAL, REAL) returns double language sql as 'select $1 + $2 - $3 + $4 + $5';

statement ok
create function add_sub_return(INT, FLOAT, INT) returns float language sql return -$1 + $2 - $3;
Expand Down Expand Up @@ -290,9 +374,30 @@ drop function print_add_two;
statement ok
drop function regexp_replace_wrapper;

statement ok
drop function corner_case;

statement ok
drop function add_named;

statement ok
drop function sub_named;

statement ok
drop function add_sub_mix;

statement ok
drop function add_named_wrapper;

statement ok
drop function type_match;

# Drop the mock table
statement ok
drop table t1;

statement ok
drop table t2;

statement ok
drop table t3;
2 changes: 2 additions & 0 deletions src/frontend/src/binder/expr/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ impl Binder {
// The reason that we directly return error here,
// is because during a valid sql udf binding,
// there will not exist any column identifiers
// And invalid cases should already be caught
// during semantic check phase
return Err(ErrorCode::BindError(format!(
"failed to find named parameter {column_name}"
))
Expand Down
31 changes: 17 additions & 14 deletions src/frontend/src/binder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,26 +219,29 @@ impl UdfContext {
Ok(expr)
}

/// TODO: add name related logic
/// NOTE: need to think of a way to prevent naming conflict
/// e.g., when existing column names conflict with parameter names in sql udf
pub fn create_udf_context(
args: &[FunctionArg],
_catalog: &Arc<FunctionCatalog>,
catalog: &Arc<FunctionCatalog>,
) -> Result<HashMap<String, AstExpr>> {
let mut ret: HashMap<String, AstExpr> = HashMap::new();
for (i, current_arg) in args.iter().enumerate() {
if let FunctionArg::Unnamed(arg) = current_arg {
let FunctionArgExpr::Expr(e) = arg else {
return Err(ErrorCode::InvalidInputSyntax("invalid syntax".to_string()).into());
};
// if catalog.arg_names.is_some() {
// todo!()
// }
ret.insert(format!("${}", i + 1), e.clone());
continue;
match current_arg {
FunctionArg::Unnamed(arg) => {
let FunctionArgExpr::Expr(e) = arg else {
return Err(
ErrorCode::InvalidInputSyntax("invalid syntax".to_string()).into()
);
};
if catalog.arg_names[i].is_empty() {
ret.insert(format!("${}", i + 1), e.clone());
} else {
// The index mapping here is accurate
// So that we could directly use the index
ret.insert(catalog.arg_names[i].clone(), e.clone());
}
}
_ => return Err(ErrorCode::InvalidInputSyntax("invalid syntax".to_string()).into()),
}
return Err(ErrorCode::InvalidInputSyntax("invalid syntax".to_string()).into());
}
Ok(ret)
}
Expand Down
46 changes: 37 additions & 9 deletions src/frontend/src/handler/create_sql_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,28 @@ use thiserror_ext::AsReport;
use super::*;
use crate::binder::UdfContext;
use crate::catalog::CatalogError;
use crate::expr::{ExprImpl, Literal};
use crate::expr::{Expr, ExprImpl, Literal};
use crate::{bind_data_type, Binder};

/// Create a mock `udf_context`, which is used for semantic check
fn create_mock_udf_context(arg_types: Vec<DataType>) -> HashMap<String, ExprImpl> {
(1..=arg_types.len())
fn create_mock_udf_context(
arg_types: Vec<DataType>,
arg_names: Vec<String>,
) -> HashMap<String, ExprImpl> {
let mut ret: HashMap<String, ExprImpl> = (1..=arg_types.len())
.map(|i| {
let mock_expr =
ExprImpl::Literal(Box::new(Literal::new(None, arg_types[i - 1].clone())));
(format!("${i}"), mock_expr.clone())
(format!("${i}"), mock_expr)
})
.collect()
.collect();

for (i, arg_name) in arg_names.into_iter().enumerate() {
let mock_expr = ExprImpl::Literal(Box::new(Literal::new(None, arg_types[i].clone())));
ret.insert(arg_name, mock_expr);
}

ret
}

pub async fn handle_create_sql_function(
Expand Down Expand Up @@ -173,15 +183,31 @@ pub async fn handle_create_sql_function(

binder
.udf_context_mut()
.update_context(create_mock_udf_context(arg_types.clone()));
.update_context(create_mock_udf_context(
arg_types.clone(),
arg_names.clone(),
));

binder.set_udf_binding_flag();

if let Ok(expr) = UdfContext::extract_udf_expression(ast) {
if let Err(e) = binder.bind_expr(expr) {
return Err(ErrorCode::InvalidInputSyntax(format!(
match binder.bind_expr(expr) {
Ok(expr) => {
// Check if the return type mismatches
if expr.return_type() != return_type {
return Err(ErrorCode::InvalidInputSyntax(format!(
"\nreturn type mismatch detected\nexpected: [{}]\nactual: [{}]\nplease adjust your function definition accordingly",
return_type,
expr.return_type()
))
.into());
}
}
Err(e) => return Err(ErrorCode::InvalidInputSyntax(format!(
"failed to conduct semantic check, please see if you are calling non-existence functions: {}",
e.as_report()
))
.into());
.into()),
}
} else {
return Err(ErrorCode::InvalidInputSyntax(
Expand All @@ -191,6 +217,8 @@ pub async fn handle_create_sql_function(
)
.into());
}

binder.unset_udf_binding_flag();
}

// Create the actual function, will be stored in function catalog
Expand Down
Loading

0 comments on commit 1eb9145

Please sign in to comment.