From 5992c1feba2c74935069ba80b93ab38eebf6970e Mon Sep 17 00:00:00 2001 From: xxchan Date: Mon, 25 Sep 2023 17:24:13 +0800 Subject: [PATCH] add duckdb cte tests & fix batch topn bug --- Makefile.toml | 3 +- e2e_test/batch/duckdb/all.slt.part | 1 + .../cte/insert_cte_bug_3417.test.slt.part | 18 +++ .../duckdb/cte/test_bug_922.test.slt.part | 8 ++ .../batch/duckdb/cte/test_cte.test.slt.part | 135 ++++++++++++++++++ .../duckdb/cte/test_cte_in_cte.test.slt.part | 53 +++++++ .../cte/test_cte_overflow.test.slt.part | 20 +++ .../duckdb/cte/test_issue_5673.test.slt.part | 48 +++++++ .../tests/testdata/input/limit.yaml | 5 + .../tests/testdata/output/limit.yaml | 18 ++- .../src/optimizer/plan_node/batch_limit.rs | 2 +- 11 files changed, 304 insertions(+), 7 deletions(-) create mode 100644 e2e_test/batch/duckdb/cte/insert_cte_bug_3417.test.slt.part create mode 100644 e2e_test/batch/duckdb/cte/test_bug_922.test.slt.part create mode 100644 e2e_test/batch/duckdb/cte/test_cte.test.slt.part create mode 100644 e2e_test/batch/duckdb/cte/test_cte_in_cte.test.slt.part create mode 100644 e2e_test/batch/duckdb/cte/test_cte_overflow.test.slt.part create mode 100644 e2e_test/batch/duckdb/cte/test_issue_5673.test.slt.part diff --git a/Makefile.toml b/Makefile.toml index 3f10deb424465..0bad643f27285 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -1292,11 +1292,12 @@ echo "All processes has exited." [tasks.slt] category = "RiseDev - SQLLogicTest" -install_crate = { version = "0.17.0", crate_name = "sqllogictest-bin", binary = "sqllogictest", test_arg = [ +install_crate = { version = "0.17.1", crate_name = "sqllogictest-bin", binary = "sqllogictest", test_arg = [ "--help", ], install_command = "binstall" } command = "sqllogictest" args = ["${@}"] +env = { SLT_PORT = "4566", SLT_DB = "dev" } description = "🌟 Run SQLLogicTest" [tasks.slt-streaming] diff --git a/e2e_test/batch/duckdb/all.slt.part b/e2e_test/batch/duckdb/all.slt.part index 81ed190749455..68adce73dea24 100644 --- a/e2e_test/batch/duckdb/all.slt.part +++ b/e2e_test/batch/duckdb/all.slt.part @@ -6,3 +6,4 @@ include ./conjunction/*.slt.part include ./conjunction/*/*.slt.part include ./limit/*.slt.part include ./select/*.slt.part +include ./cte/*.slt.part diff --git a/e2e_test/batch/duckdb/cte/insert_cte_bug_3417.test.slt.part b/e2e_test/batch/duckdb/cte/insert_cte_bug_3417.test.slt.part new file mode 100644 index 0000000000000..d4a6a955caf69 --- /dev/null +++ b/e2e_test/batch/duckdb/cte/insert_cte_bug_3417.test.slt.part @@ -0,0 +1,18 @@ +# name: test/sql/cte/insert_cte_bug_3417.test +# description: Test for a crash reported in issue #3417 +# group: [cte] + +statement ok +CREATE TABLE table1 (id INTEGER, a INTEGER); + +statement ok +CREATE TABLE table2 (table1_id INTEGER); + +statement error +INSERT INTO table2 WITH cte AS (INSERT INTO table1 SELECT 1, 2 RETURNING id) SELECT id FROM cte; + +statement ok +DROP TABLE table1; + +statement ok +DROP TABLE table2; diff --git a/e2e_test/batch/duckdb/cte/test_bug_922.test.slt.part b/e2e_test/batch/duckdb/cte/test_bug_922.test.slt.part new file mode 100644 index 0000000000000..371fc7b5b8ef0 --- /dev/null +++ b/e2e_test/batch/duckdb/cte/test_bug_922.test.slt.part @@ -0,0 +1,8 @@ +# name: test/sql/cte/test_bug_922.test +# description: Test for a crash reported in issue #922 +# group: [cte] + +query I +WITH my_list(value) AS (VALUES (1), (2), (3)) + SELECT * FROM my_list LIMIT 0 OFFSET 1 +---- diff --git a/e2e_test/batch/duckdb/cte/test_cte.test.slt.part b/e2e_test/batch/duckdb/cte/test_cte.test.slt.part new file mode 100644 index 0000000000000..dc264f19dd40b --- /dev/null +++ b/e2e_test/batch/duckdb/cte/test_cte.test.slt.part @@ -0,0 +1,135 @@ +# name: test/sql/cte/test_cte.test +# description: Test Common Table Expressions (CTE) +# group: [cte] + +statement ok +SET RW_IMPLICIT_FLUSH TO TRUE; + +statement ok +create table a(i integer); + +statement ok +insert into a values (42); + +query I +with cte1 as (Select i as j from a) select * from cte1; +---- +42 + +# FIXME: this should succeed +query error failed to bind expression: x +with cte1 as (Select i as j from a) select x from cte1 t1(x); + +query I +with cte1(xxx) as (Select i as j from a) select xxx from cte1; +---- +42 + +query I +with cte1(xxx) as (Select i as j from a) select x from cte1 t1(x); +---- +42 + +query II +with cte1 as (Select i as j from a), cte2 as (select ref.j as k from cte1 as ref), cte3 as (select ref2.j+1 as i from cte1 as ref2) select * from cte2 , cte3; +---- +42 43 + +query I rowsort +with cte1 as (select i as j from a), cte2 as (select ref.j as k from cte1 as ref), cte3 as (select ref2.j+1 as i from cte1 as ref2) select * from cte2 union all select * FROM cte3; +---- +42 +43 + + +# FIXME: this should be an error +# duplicate CTE alias +query I +with cte1 as (select 42), cte1 as (select 43) select * FROM cte1; +---- +43 + +# reference to CTE before its actually defined +# duckdb is ok +# postgres: query failed: db error: ERROR: relation "cte1" does not exist +# DETAIL: There is a WITH item named "cte1", but it cannot be referenced from this part of the query. +# HINT: Use WITH RECURSIVE, or re-order the WITH items to remove forward references. +query error table or source not found: cte1 +with cte3 as (select ref2.j as i from cte1 as ref2), cte1 as (Select i as j from a), cte2 as (select ref.j+1 as k from cte1 as ref) select * from cte2 union all select * FROM cte3; + + +# multiple uses of same CTE +query II +with cte1 as (Select i as j from a) select * from cte1 cte11, cte1 cte12; +---- +42 42 + +# refer to CTE in subquery +query I +with cte1 as (Select i as j from a) select * from cte1 where j = (select max(j) from cte1 as cte2); +---- +42 + +# multi-column name alias +query II +with cte1(x, y) as (select 42 a, 84 b) select zzz, y from cte1 t1(zzz); +---- +42 84 + +# use a CTE in a view definition +statement ok +create view va AS (with cte as (Select i as j from a) select * from cte); + +query I +select * from va +---- +42 + +# nested CTE views that re-use CTE aliases +query I +with cte AS (SELECT * FROM va) SELECT * FROM cte; +---- +42 + +# multiple ctes in a view definition +statement ok +create view vb AS (with cte1 as (Select i as j from a), cte2 as (select ref.j+1 as k from cte1 as ref) select * from cte2); + +query I +select * from vb +---- +43 + +# cte in set operation node +query I +SELECT 1 UNION ALL (WITH cte AS (SELECT 42) SELECT * FROM cte); +---- +1 +42 + +# # cte in recursive cte +# query I +# WITH RECURSIVE cte(d) AS ( +# SELECT 1 +# UNION ALL +# (WITH c(d) AS (SELECT * FROM cte) +# SELECT d + 1 +# FROM c +# WHERE FALSE +# ) +# ) +# SELECT max(d) FROM cte; +# ---- +# 1 + +# FIXME: this should succeed +# test CTE with nested aliases in where clause +query error failed to bind expression: alias1 +with cte (a) as ( + select 1 +) +select + a as alias1, + alias1 as alias2 +from cte +where alias2 > 0; diff --git a/e2e_test/batch/duckdb/cte/test_cte_in_cte.test.slt.part b/e2e_test/batch/duckdb/cte/test_cte_in_cte.test.slt.part new file mode 100644 index 0000000000000..76e3257d0eb53 --- /dev/null +++ b/e2e_test/batch/duckdb/cte/test_cte_in_cte.test.slt.part @@ -0,0 +1,53 @@ +# name: test/sql/cte/test_cte_in_cte.test +# description: Test Nested Common Table Expressions (CTE) +# group: [cte] + +statement ok +SET RW_IMPLICIT_FLUSH TO TRUE; + +statement ok +create table a(i integer); + +statement ok +insert into a values (42); + +query I +with cte1 as (Select i as j from a) select * from cte1; +---- +42 + +# FIXME: this should succeed +query error failed to bind expression: x +with cte1 as (with b as (Select i as j from a) Select j from b) select x from cte1 t1(x); + +query I +with cte1(xxx) as (with ncte(yyy) as (Select i as j from a) Select yyy from ncte) select xxx from cte1; +---- +42 + +query II +with cte1 as (with b as (Select i as j from a) select j from b), cte2 as (with c as (select ref.j+1 as k from cte1 as ref) select k from c) select * from cte1 , cte2; +---- +42 43 + +# refer to CTE in subquery tableref +query I +with cte1 as (Select i as j from a) select * from (with cte2 as (select max(j) as j from cte1) select * from cte2) f +---- +42 + +# refer to CTE in subquery expression +query I +with cte1 as (Select i as j from a) select * from cte1 where j = (with cte2 as (select max(j) as j from cte1) select j from cte2); +---- +42 + +# refer to same-named CTE in a subquery expression +query I +with cte as (Select i as j from a) select * from cte where j = (with cte as (select max(j) as j from cte) select j from cte); +---- +42 + +# self-refer to non-existent cte +statement error +with cte as (select * from cte) select * from cte diff --git a/e2e_test/batch/duckdb/cte/test_cte_overflow.test.slt.part b/e2e_test/batch/duckdb/cte/test_cte_overflow.test.slt.part new file mode 100644 index 0000000000000..915614472ec5a --- /dev/null +++ b/e2e_test/batch/duckdb/cte/test_cte_overflow.test.slt.part @@ -0,0 +1,20 @@ +# name: test/sql/cte/test_cte_overflow.test +# description: Ensure no stack overflow for CTE names that match existing tables +# group: [cte] + +statement ok +SET RW_IMPLICIT_FLUSH TO TRUE; + +statement ok +create table a (id integer) + +statement ok +insert into a values (1729) + +statement ok +create view va as (with v as (select * from a) select * from v) + +query I +with a as (select * from va) select * from a +---- +1729 diff --git a/e2e_test/batch/duckdb/cte/test_issue_5673.test.slt.part b/e2e_test/batch/duckdb/cte/test_issue_5673.test.slt.part new file mode 100644 index 0000000000000..a157867b5529b --- /dev/null +++ b/e2e_test/batch/duckdb/cte/test_issue_5673.test.slt.part @@ -0,0 +1,48 @@ +# name: test/sql/cte/test_issue_5673.test +# description: Issue #5673 and #4987: CTE and Table name are name shadowing +# group: [cte] + +statement ok +SET RW_IMPLICIT_FLUSH TO TRUE; + +statement ok +create table orders(ordered_at int); + +statement ok +create table stg_orders(ordered_at int); + +statement ok +insert into orders values (1); + +statement ok +insert into stg_orders values (1); + +# Note: postgres succeeds. +# duckdb returns Binder Error: Circular reference to CTE "orders", There are two possible solutions. +query ok +with +orders as ( + select * from stg_orders + where ordered_at >= (select max(ordered_at) from orders) +), +some_more_logic as ( + select * + from orders +) +select * from some_more_logic; +---- +1 + +query I +with +orders as ( + select * from public.stg_orders + where ordered_at >= (select max(ordered_at) from public.orders) +), +some_more_logic as ( + select * + from orders +) +select * from some_more_logic; +---- +1 diff --git a/src/frontend/planner_test/tests/testdata/input/limit.yaml b/src/frontend/planner_test/tests/testdata/input/limit.yaml index 31344d6b4dc18..0d15a5de6cd17 100644 --- a/src/frontend/planner_test/tests/testdata/input/limit.yaml +++ b/src/frontend/planner_test/tests/testdata/input/limit.yaml @@ -53,6 +53,11 @@ expected_outputs: - batch_plan - stream_plan +- sql: | + select 1 c limit 1 offset 2 + expected_outputs: + - batch_plan + - stream_plan - sql: | select 1 c order by 1 limit 1 expected_outputs: diff --git a/src/frontend/planner_test/tests/testdata/output/limit.yaml b/src/frontend/planner_test/tests/testdata/output/limit.yaml index 81a832e8f48a2..3668754609094 100644 --- a/src/frontend/planner_test/tests/testdata/output/limit.yaml +++ b/src/frontend/planner_test/tests/testdata/output/limit.yaml @@ -83,6 +83,15 @@ StreamMaterialize { columns: [c, _row_id(hidden)], stream_key: [], pk_columns: [], pk_conflict: NoCheck } └─StreamTopN [append_only] { order: [1:Int32 ASC], limit: 1, offset: 0 } └─StreamValues { rows: [[1:Int32, 0:Int64]] } +- sql: | + select 1 c limit 1 offset 2 + batch_plan: |- + BatchLimit { limit: 1, offset: 2 } + └─BatchValues { rows: [[1:Int32]] } + stream_plan: |- + StreamMaterialize { columns: [c, _row_id(hidden)], stream_key: [], pk_columns: [], pk_conflict: NoCheck } + └─StreamTopN [append_only] { order: [1:Int32 ASC], limit: 1, offset: 2 } + └─StreamValues { rows: [[1:Int32, 0:Int64]] } - sql: | select 1 c order by 1 limit 1 batch_plan: |- @@ -114,11 +123,10 @@ create table t (a int); select count(*) from t limit 1; batch_plan: |- - BatchLimit { limit: 1, offset: 0 } - └─BatchSimpleAgg { aggs: [sum0(count)] } - └─BatchExchange { order: [], dist: Single } - └─BatchSimpleAgg { aggs: [count] } - └─BatchScan { table: t, columns: [], distribution: SomeShard } + BatchExchange { order: [], dist: Single } + └─BatchLimit { limit: 1, offset: 0 } + └─BatchSimpleAgg { aggs: [count] } + └─BatchScan { table: t, columns: [], distribution: Single } stream_plan: |- StreamMaterialize { columns: [count], stream_key: [], pk_columns: [], pk_conflict: NoCheck } └─StreamTopN { order: [sum0(count) ASC], limit: 1, offset: 0 } diff --git a/src/frontend/src/optimizer/plan_node/batch_limit.rs b/src/frontend/src/optimizer/plan_node/batch_limit.rs index 5fe37b1713ecc..aee8d2a23f704 100644 --- a/src/frontend/src/optimizer/plan_node/batch_limit.rs +++ b/src/frontend/src/optimizer/plan_node/batch_limit.rs @@ -52,7 +52,7 @@ impl BatchLimit { single_dist.enforce_if_not_satisfies(batch_partial_limit.into(), &any_order)? } else { // The input's distribution is singleton, so use one phase limit is enough. - return Ok(batch_partial_limit.into()); + return Ok(self.clone().into()); }; let batch_global_limit = self.clone_with_input(ensure_single_dist);