Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(over window): fix error in using aggregate function result as win… #12551

Merged
merged 8 commits into from
Oct 10, 2023
82 changes: 82 additions & 0 deletions e2e_test/batch/aggregate/with_over_window.slt.part
Copy link
Member

@stdrc stdrc Oct 7, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you plz test some cases like the following where partition by and order by are not the same as group by?

select
    a, b,
    sum( sum(c) ) over (partition by a order by b)
from t
group by a, b;

Or, if we want to go further:

select
    a, b,
    sum( sum(c) ) over (partition by a, avg(d) order by max(e), b)
from t
group by a, b;

Also plz add streaming version of these tests. You may refer to how OverWindow is e2e-tested.

Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
statement ok
create table t (a int, b int, c int, d int, e int);

statement ok
insert into t values
(1, 23, 84, 11, 87),
(2, 34, 29, 22, 98),
(3, 45, 43, 33, 10),
(4, 56, 83, 44, 26),
(5, 68, 20, 55, 12),
(5, 68, 90, 66, 34),
(5, 68, 11, 77, 32);

query II
select
a,
sum((sum(b))) over (partition by a order by a)
from t
group by a;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better to add order by a to ensure consistent result.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it is for consistent result, should order by constraints be added to every query?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. Should be added to every query in this file.

----
1 23
2 34
3 45
4 56
5 204

query II
select
a,
row_number() over (partition by a order by a)
from t
group by a;
----
1 1
2 1
3 1
4 1
5 1

query II
select
a,
row_number() over (partition by a order by a desc)
from t
group by a;
----
1 1
2 1
3 1
4 1
5 1

query III
select
a,
b,
sum(sum(c)) over (partition by a order by b)
from t
group by a, b;
----
1 23 84
2 34 29
3 45 43
4 56 83
5 68 121

query III
select
a,
b,
sum(sum(c)) over (partition by a, avg(d) order by max(e), b)
from t
group by a, b;
----
1 23 84
2 34 29
3 45 43
4 56 83
5 68 121

statement ok
drop table t;
12 changes: 11 additions & 1 deletion e2e_test/over_window/generated/batch/create.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,16 @@ select
, row_number() over (partition by p1 order by p2 desc, id) as out11
from t;

# over + agg
statement ok
create view v_e as
select
p1, p2
, row_number() over (partition by p1 order by p2) as out12
, sum(sum(v2)) over (partition by p1, avg(time) order by max(v1), p2) as out13
from t
group by p1, p2;

statement ok
create view v_a_b as
select
Expand Down Expand Up @@ -103,4 +113,4 @@ select
, first_value(v1) over (partition by p1, p2 order by time, id rows 3 preceding) as out3
, lag(v1 + 2, 0 + 1) over (partition by p1 - 1 order by id) as out4
, min(v1 * 2) over (partition by p1, p2 order by time + 1, id rows between current row and unbounded following) as out5
from t;
from t;
3 changes: 3 additions & 0 deletions e2e_test/over_window/generated/batch/drop.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ drop view v_c;
statement ok
drop view v_d;

statement ok
drop view v_e;

statement ok
drop view v_a_b;

Expand Down
28 changes: 28 additions & 0 deletions e2e_test/over_window/generated/batch/mod.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ select * from v_d order by id;
100003 100 208 2 723 807 3 1
100004 103 200 2 702 808 1 1

query iiii
select * from v_e order by p1;
----
100 200 1 1611
100 208 2 807
103 200 1 808

include ./cross_check.slt.part

statement ok
Expand Down Expand Up @@ -88,6 +95,14 @@ select * from v_d order by id;
100005 100 200 3 717 810 4 4
100006 105 204 5 703 828 1 1

query iiii
select * from v_e order by p1, p2;
----
100 200 1 2421
100 208 2 3228
103 200 1 808
105 204 1 828

include ./cross_check.slt.part

statement ok
Expand Down Expand Up @@ -139,6 +154,13 @@ select * from v_d order by id;
100005 100 200 1 717 810 2 4
100006 105 204 5 703 828 1 1

query iiiiiii
select * from v_e order by p1;
----
100 200 1 3228
103 200 1 808
105 204 1 828

query iiiiiiiiii
select * from v_expr order by id;
----
Expand Down Expand Up @@ -182,6 +204,12 @@ select * from v_d order by id;
100005 100 200 1 717 810 2 2
100006 105 204 5 703 828 1 1

query iiii
select * from v_e order by p1;
----
100 200 1 1615
105 204 1 828

query iiiiiiiiii
select * from v_expr order by id;
----
Expand Down
12 changes: 11 additions & 1 deletion e2e_test/over_window/generated/streaming/create.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,16 @@ select
, row_number() over (partition by p1 order by p2 desc, id) as out11
from t;

# over + agg
statement ok
create materialized view v_e as
select
p1, p2
, row_number() over (partition by p1 order by p2) as out12
, sum(sum(v2)) over (partition by p1, avg(time) order by max(v1), p2) as out13
from t
group by p1, p2;

statement ok
create materialized view v_a_b as
select
Expand Down Expand Up @@ -103,4 +113,4 @@ select
, first_value(v1) over (partition by p1, p2 order by time, id rows 3 preceding) as out3
, lag(v1 + 2, 0 + 1) over (partition by p1 - 1 order by id) as out4
, min(v1 * 2) over (partition by p1, p2 order by time + 1, id rows between current row and unbounded following) as out5
from t;
from t;
3 changes: 3 additions & 0 deletions e2e_test/over_window/generated/streaming/drop.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ drop materialized view v_c;
statement ok
drop materialized view v_d;

statement ok
drop materialized view v_e;

statement ok
drop materialized view v_a_b;

Expand Down
28 changes: 28 additions & 0 deletions e2e_test/over_window/generated/streaming/mod.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ select * from v_d order by id;
100003 100 208 2 723 807 3 1
100004 103 200 2 702 808 1 1

query iiii
select * from v_e order by p1;
----
100 200 1 1611
100 208 2 807
103 200 1 808

include ./cross_check.slt.part

statement ok
Expand Down Expand Up @@ -88,6 +95,14 @@ select * from v_d order by id;
100005 100 200 3 717 810 4 4
100006 105 204 5 703 828 1 1

query iiii
select * from v_e order by p1, p2;
----
100 200 1 2421
100 208 2 3228
103 200 1 808
105 204 1 828

include ./cross_check.slt.part

statement ok
Expand Down Expand Up @@ -139,6 +154,13 @@ select * from v_d order by id;
100005 100 200 1 717 810 2 4
100006 105 204 5 703 828 1 1

query iiiiiii
select * from v_e order by p1;
----
100 200 1 3228
103 200 1 808
105 204 1 828

query iiiiiiiiii
select * from v_expr order by id;
----
Expand Down Expand Up @@ -182,6 +204,12 @@ select * from v_d order by id;
100005 100 200 1 717 810 2 2
100006 105 204 5 703 828 1 1

query iiii
select * from v_e order by p1;
----
100 200 1 1615
105 204 1 828

query iiiiiiiiii
select * from v_expr order by id;
----
Expand Down
12 changes: 11 additions & 1 deletion e2e_test/over_window/templates/create.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,16 @@ select
, row_number() over (partition by p1 order by p2 desc, id) as out11
from t;

# over + agg
statement ok
create $view_type v_e as
select
p1, p2
, row_number() over (partition by p1 order by p2) as out12
, sum(sum(v2)) over (partition by p1, avg(time) order by max(v1), p2) as out13
from t
group by p1, p2;

statement ok
create $view_type v_a_b as
select
Expand Down Expand Up @@ -101,4 +111,4 @@ select
, first_value(v1) over (partition by p1, p2 order by time, id rows 3 preceding) as out3
, lag(v1 + 2, 0 + 1) over (partition by p1 - 1 order by id) as out4
, min(v1 * 2) over (partition by p1, p2 order by time + 1, id rows between current row and unbounded following) as out5
from t;
from t;
3 changes: 3 additions & 0 deletions e2e_test/over_window/templates/drop.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ drop $view_type v_c;
statement ok
drop $view_type v_d;

statement ok
drop $view_type v_e;

statement ok
drop $view_type v_a_b;

Expand Down
28 changes: 28 additions & 0 deletions e2e_test/over_window/templates/mod.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ select * from v_d order by id;
100003 100 208 2 723 807 3 1
100004 103 200 2 702 808 1 1

query iiii
select * from v_e order by p1;
----
100 200 1 1611
100 208 2 807
103 200 1 808

include ./cross_check.slt.part

statement ok
Expand Down Expand Up @@ -86,6 +93,14 @@ select * from v_d order by id;
100005 100 200 3 717 810 4 4
100006 105 204 5 703 828 1 1

query iiii
select * from v_e order by p1, p2;
----
100 200 1 2421
100 208 2 3228
103 200 1 808
105 204 1 828

include ./cross_check.slt.part

statement ok
Expand Down Expand Up @@ -137,6 +152,13 @@ select * from v_d order by id;
100005 100 200 1 717 810 2 4
100006 105 204 5 703 828 1 1

query iiiiiii
select * from v_e order by p1;
----
100 200 1 3228
103 200 1 808
105 204 1 828

query iiiiiiiiii
select * from v_expr order by id;
----
Expand Down Expand Up @@ -180,6 +202,12 @@ select * from v_d order by id;
100005 100 200 1 717 810 2 2
100006 105 204 5 703 828 1 1

query iiii
select * from v_e order by p1;
----
100 200 1 1615
105 204 1 828

query iiiiiiiiii
select * from v_expr order by id;
----
Expand Down
29 changes: 29 additions & 0 deletions src/frontend/planner_test/tests/testdata/input/agg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -929,3 +929,32 @@
expected_outputs:
- batch_plan
- stream_plan

- sql: |
CREATE TABLE t (a int, b int);
SELECT a, sum((sum(b))) OVER (PARTITION BY a ORDER BY a) FROM t GROUP BY a;
expected_outputs:
- batch_plan
- stream_plan
- sql: |
CREATE TABLE t (a int, b int);
SELECT a, row_number() OVER (PARTITION BY a ORDER BY a DESC) FROM t GROUP BY a;
expected_outputs:
- batch_plan
- stream_plan
- sql: |
CREATE TABLE t (a int, b int, c int);
SELECT a, b, sum(sum(c)) OVER (PARTITION BY a ORDER BY b)
FROM t
GROUP BY a, b;
expected_outputs:
- batch_plan
- stream_plan
- sql: |
CREATE TABLE t (a int, b int, c int, d int, e int);
SELECT a, b, sum(sum(c)) OVER (PARTITION BY a, avg(d) ORDER BY max(e), b)
FROM t
GROUP BY a, b;
expected_outputs:
- batch_plan
- stream_plan
Loading
Loading