risingwavelabs · yuhao-su · Aug 7, 2024 · Jul 22, 2024 · Aug 1, 2024 · Aug 1, 2024
diff --git a/e2e_test/batch/basic/union.slt.part b/e2e_test/batch/basic/union.slt.part
@@ -2,33 +2,33 @@ statement ok
 SET RW_IMPLICIT_FLUSH TO true;
 
 statement ok
-create table t1 (v1 int, v2 bigint);
+create table t1 (v1 int, v2 bigint, v4 int);
 
 statement ok
-create table t2 (v1 int, v3 int);
+create table t2 (v1 int, v3 int, v4 int);
 
 statement ok
-insert into t1 values(1, 2);
+insert into t1 values(1, 2, 3);
 
 statement ok
-insert into t2 values(1, 2);
+insert into t2 values(1, 2, 3);
 
-query II
+query III
 select * from t1 union select * from t2
 ----
-1 2
+1 2 3
 
-query II
+query III
 select * from t1 union all select * from t2
 ----
-1 2
-1 2
+1 2 3
+1 2 3
 
-query II
+query III
 select * from t1 union all select * from t2 order by v1
 ----
-1 2
-1 2
+1 2 3
+1 2 3
 
 statement error
 select * from t1 union all select * from t2 order by v1 + 1
@@ -69,9 +69,43 @@ NULL
 statement error
 select null union all select null select union 1
 
+query II
+select * from t1 union all corresponding select * from t2 order by v1
+----
+1 3
+1 3
+
+query II
+select * from t1 union corresponding select v4, v3 as v1 from t2 order by v1
+----
+1 3
+2 3
+
+query II
+select * from t1 union all corresponding by (v4, v1) select * from t2
+----
+3 1
+3 1
+
+query II
+select * from t1 union corresponding by (v4) select * from t2
+----
+3
+
+statement error Invalid input syntax: Every <column name> in the <corresponding column list> shall be a <column name> of both left and right side. Missing column: `vxx`
+select * from t1 union corresponding by (vxx) select * from t2
+
+statement ok
+create table txx (vxx int);
+
+statement error Invalid input syntax: At least one column of the left side shall have a <column name> that is the <column name> of some column of the right side
+select * from t1 union corresponding select * from txx
 
 statement ok
 drop table t1;
 
 statement ok
 drop table t2;
+
+statement ok
+drop table txx;
diff --git a/e2e_test/streaming/union.slt b/e2e_test/streaming/union.slt
@@ -2,17 +2,23 @@ statement ok
 SET RW_IMPLICIT_FLUSH TO true;
 
 statement ok
-create table t1 (v1 int, v2 int);
+create table t1 (v1 int, v2 int, v4 int);
 
 statement ok
-create table t2 (v1 int, v3 int);
+create table t2 (v1 int, v3 int, v4 int);
 
 statement ok
 create materialized view v as select * from t1 union all select * from t2;
 
 statement ok
 create materialized view v2 as select * from t1 union select * from t2;
 
+statement ok
+create materialized view v3 as select * from t1 union all corresponding select * from t2;
+
+statement ok
+create materialized view v4 as select * from t1 union corresponding by (v4, v1) select * from t2;
+
 query II
 select * from v;
 ----
@@ -22,64 +28,118 @@ select * from v2;
 ----
 
 statement ok
-insert into t1 values(1, 2);
+insert into t1 values(1, 2, 3);
 
-query II
+query III
 select * from v;
 ----
-1 2
+1 2 3
 
-query II
+query III
 select * from v2;
 ----
-1 2
+1 2 3
+
+query II
+select * from v3;
+----
+1 3
+
+query II
+select * from v4;
+----
+3 1
 
 statement ok
-insert into t2 values(1, 2);
+insert into t2 values(1, 2, 3);
 
 
-query II
+query III
 select * from v;
 ----
-1 2
-1 2
+1 2 3
+1 2 3
 
-query II
+query III
 select * from v2;
 ----
-1 2
+1 2 3
+
+query II
+select * from v3;
+----
+1 3
+1 3
+
+query II
+select * from v4;
+----
+3 1
 
 statement ok
 delete from t1 where v1 = 1;
 
-query II
+query III
 select * from v;
 ----
-1 2
+1 2 3
 
-query II
+query III
 select * from v2;
 ----
-1 2
+1 2 3
+
+query II
+select * from v3;
+----
+1 3
+
+query II
+select * from v4;
+----
+3 1
 
 statement ok
 delete from t2 where v1 = 1;
 
-query II
+query III
 select * from v;
 ----
 
-query II
+query III
 select * from v2;
 ----
 
+query II
+select * from v3;
+----
+
+query II
+select * from v4;
+----
+
 
 statement ok
 drop materialized view v;
 
 statement ok
 drop materialized view v2;
 
+statement ok
+drop materialized view v3;
+
+statement ok
+drop materialized view v4;
+
+statement error Invalid input syntax: Every <column name> in the <corresponding column list> shall be a <column name> of both left and right side. Missing column: `vxx`
+create materialized view v5 as select * from t1 union corresponding by (vxx, v1) select * from t2
+
+statement ok
+create table txx (vxx int);
+
+statement error Invalid input syntax: At least one column of the left side shall have a <column name> that is the <column name> of some column of the right side
+create materialized view v5 as select * from t1 union corresponding select * from txx
+
 statement ok
 drop table t1;
 

diff --git a/src/frontend/planner_test/tests/testdata/input/union.yaml b/src/frontend/planner_test/tests/testdata/input/union.yaml
@@ -95,3 +95,23 @@
     select * from t1 union all select * from t2 union all select * from t3 union all select * from t4 union all select * from t5;
   expected_outputs:
     - stream_dist_plan
+
+- name: test corresponding union
+  sql: |
+    create table t1 (a int, b numeric, c bigint);
+    create table t2 (a int, b numeric, y bigint);
+    create table t3 (x int, b numeric, c bigint);
+    select * from t1 union corresponding select * from t2 union all corresponding by (b) select * from t3;
+  expected_outputs:
+    - batch_plan
+    - stream_plan
+    - stream_dist_plan
+
+- name: test corresponding union error
+  sql: |
+    create table t1 (a int, b numeric, c bigint);
+    create table t2 (a int, b numeric, y bigint);
+    create table t3 (x int, b numeric, c bigint);
+    select * from t1 union corresponding select * from t2 union all corresponding by (c) select * from t3;
+  expected_outputs:
+    - binder_error
diff --git a/src/frontend/planner_test/tests/testdata/output/union.yaml b/src/frontend/planner_test/tests/testdata/output/union.yaml
@@ -639,3 +639,100 @@
     ├── distribution key: [ 0, 1, 3 ]
     └── read pk prefix len hint: 3
 
+- name: test corresponding union
+  sql: |
+    create table t1 (a int, b numeric, c bigint);
+    create table t2 (a int, b numeric, y bigint);
+    create table t3 (x int, b numeric, c bigint);
+    select * from t1 union corresponding select * from t2 union all corresponding by (b) select * from t3;
+  batch_plan: |-
+    BatchUnion { all: true }
+    ├─BatchExchange { order: [], dist: Single }
+    │ └─BatchProject { exprs: [t1.b] }
+    │   └─BatchHashAgg { group_key: [t1.a, t1.b], aggs: [] }
+    │     └─BatchExchange { order: [], dist: HashShard(t1.a, t1.b) }
+    │       └─BatchUnion { all: true }
+    │         ├─BatchExchange { order: [], dist: Single }
+    │         │ └─BatchScan { table: t1, columns: [t1.a, t1.b], distribution: SomeShard }
+    │         └─BatchExchange { order: [], dist: Single }
+    │           └─BatchScan { table: t2, columns: [t2.a, t2.b], distribution: SomeShard }
+    └─BatchExchange { order: [], dist: Single }
+      └─BatchScan { table: t3, columns: [t3.b], distribution: SomeShard }
+  stream_plan: |-
+    StreamMaterialize { columns: [b, t1.a(hidden), t1.b(hidden), null:Serial(hidden), $src(hidden)], stream_key: [t1.a, t1.b, null:Serial, $src], pk_columns: [t1.a, t1.b, null:Serial, $src], pk_conflict: NoCheck }
+    └─StreamUnion { all: true }
+      ├─StreamExchange { dist: HashShard(t1.a, t1.b, null:Serial, 0:Int32) }
+      │ └─StreamProject { exprs: [t1.b, t1.a, t1.b, null:Serial, 0:Int32], noop_update_hint: true }
+      │   └─StreamHashAgg { group_key: [t1.a, t1.b], aggs: [count] }
+      │     └─StreamExchange { dist: HashShard(t1.a, t1.b) }
+      │       └─StreamUnion { all: true }
+      │         ├─StreamExchange { dist: HashShard(t1._row_id, 0:Int32) }
+      │         │ └─StreamProject { exprs: [t1.a, t1.b, t1._row_id, 0:Int32] }
+      │         │   └─StreamTableScan { table: t1, columns: [t1.a, t1.b, t1._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t1._row_id], pk: [_row_id], dist: UpstreamHashShard(t1._row_id) }
+      │         └─StreamExchange { dist: HashShard(t2._row_id, 1:Int32) }
+      │           └─StreamProject { exprs: [t2.a, t2.b, t2._row_id, 1:Int32] }
+      │             └─StreamTableScan { table: t2, columns: [t2.a, t2.b, t2._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t2._row_id], pk: [_row_id], dist: UpstreamHashShard(t2._row_id) }
+      └─StreamExchange { dist: HashShard(null:Int32, null:Decimal, t3._row_id, 1:Int32) }
+        └─StreamProject { exprs: [t3.b, null:Int32, null:Decimal, t3._row_id, 1:Int32] }
+          └─StreamTableScan { table: t3, columns: [t3.b, t3._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t3._row_id], pk: [_row_id], dist: UpstreamHashShard(t3._row_id) }
+  stream_dist_plan: |+
+    Fragment 0
+    StreamMaterialize { columns: [b, t1.a(hidden), t1.b(hidden), null:Serial(hidden), $src(hidden)], stream_key: [t1.a, t1.b, null:Serial, $src], pk_columns: [t1.a, t1.b, null:Serial, $src], pk_conflict: NoCheck }
+    ├── tables: [ Materialize: 4294967294 ]
+    └── StreamUnion { all: true }
+        ├── StreamExchange Hash([1, 2, 3, 4]) from 1
+        └── StreamExchange Hash([1, 2, 3, 4]) from 5
+
+    Fragment 1
+    StreamProject { exprs: [t1.b, t1.a, t1.b, null:Serial, 0:Int32], noop_update_hint: true }
+    └── StreamHashAgg { group_key: [t1.a, t1.b], aggs: [count] } { tables: [ HashAggState: 0 ] }
+        └── StreamExchange Hash([0, 1]) from 2
+
+    Fragment 2
+    StreamUnion { all: true }
+    ├── StreamExchange Hash([2, 3]) from 3
+    └── StreamExchange Hash([2, 3]) from 4
+
+    Fragment 3
+    StreamProject { exprs: [t1.a, t1.b, t1._row_id, 0:Int32] }
+    └── StreamTableScan { table: t1, columns: [t1.a, t1.b, t1._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t1._row_id], pk: [_row_id], dist: UpstreamHashShard(t1._row_id) }
+        ├── tables: [ StreamScan: 1 ]
+        ├── Upstream
+        └── BatchPlanNode
+
+    Fragment 4
+    StreamProject { exprs: [t2.a, t2.b, t2._row_id, 1:Int32] }
+    └── StreamTableScan { table: t2, columns: [t2.a, t2.b, t2._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t2._row_id], pk: [_row_id], dist: UpstreamHashShard(t2._row_id) }
+        ├── tables: [ StreamScan: 2 ]
+        ├── Upstream
+        └── BatchPlanNode
+
+    Fragment 5
+    StreamProject { exprs: [t3.b, null:Int32, null:Decimal, t3._row_id, 1:Int32] }
+    └── StreamTableScan { table: t3, columns: [t3.b, t3._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t3._row_id], pk: [_row_id], dist: UpstreamHashShard(t3._row_id) }
+        ├── tables: [ StreamScan: 3 ]
+        ├── Upstream
+        └── BatchPlanNode
+
+    Table 0 { columns: [ t1_a, t1_b, count ], primary key: [ $0 ASC, $1 ASC ], value indices: [ 2 ], distribution key: [ 0, 1 ], read pk prefix len hint: 2 }
+
+    Table 1 { columns: [ vnode, _row_id, backfill_finished, row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 }
+
+    Table 2 { columns: [ vnode, _row_id, backfill_finished, row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 }
+
+    Table 3 { columns: [ vnode, _row_id, backfill_finished, row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 }
+
+    Table 4294967294
+    ├── columns: [ b, t1.a, t1.b, null:Serial, $src ]
+    ├── primary key: [ $1 ASC, $2 ASC, $3 ASC, $4 ASC ]
+    ├── value indices: [ 0, 1, 2, 3, 4 ]
+    ├── distribution key: [ 1, 2, 3, 4 ]
+    └── read pk prefix len hint: 4
+
+- name: test corresponding union error
+  sql: |
+    create table t1 (a int, b numeric, c bigint);
+    create table t2 (a int, b numeric, y bigint);
+    create table t3 (x int, b numeric, c bigint);
+    select * from t1 union corresponding select * from t2 union all corresponding by (c) select * from t3;
+  binder_error: 'Invalid input syntax: Every <column name> in the <corresponding column list> shall be a <column name> of both left and right side. Missing column: `c`'