Skip to content

Commit

Permalink
fix(optimizer): correctly derive cardinality for group top-n (#18561)
Browse files Browse the repository at this point in the history
Signed-off-by: Bugen Zhao <[email protected]>
  • Loading branch information
BugenZhao authored Sep 18, 2024
1 parent 24336eb commit f19dec5
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 1 deletion.
25 changes: 25 additions & 0 deletions src/frontend/planner_test/tests/testdata/input/topn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,28 @@
SELECT * FROM t1_mv ORDER BY a DESC LIMIT 50 OFFSET 50;
expected_outputs:
- batch_plan
- sql: |
WITH c1(k, v) AS (
VALUES
(1, 'foo'),
(2, 'bar')
),
c2 AS (
SELECT
*,
row_number() over (
PARTITION by k
ORDER BY 1
) AS rn
FROM
c1
)
SELECT
count(*)
FROM
c2
WHERE
rn <= 1;
expected_outputs:
- logical_plan
- optimized_logical_plan_for_batch
38 changes: 38 additions & 0 deletions src/frontend/planner_test/tests/testdata/output/topn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,41 @@
└─BatchExchange { order: [], dist: Single }
└─BatchLimit { limit: 100, offset: 0 }
└─BatchScan { table: t1_mv, columns: [t1_mv.pk, t1_mv.a, t1_mv.b, t1_mv.c, t1_mv.d], limit: 100, distribution: SomeShard }
- sql: |
WITH c1(k, v) AS (
VALUES
(1, 'foo'),
(2, 'bar')
),
c2 AS (
SELECT
*,
row_number() over (
PARTITION by k
ORDER BY 1
) AS rn
FROM
c1
)
SELECT
count(*)
FROM
c2
WHERE
rn <= 1;
logical_plan: |-
LogicalProject { exprs: [count] }
└─LogicalAgg { aggs: [count] }
└─LogicalProject { exprs: [] }
└─LogicalFilter { predicate: (row_number <= 1:Int32) }
└─LogicalShare { id: 5 }
└─LogicalProject { exprs: [*VALUES*_0.column_0, *VALUES*_0.column_1, row_number] }
└─LogicalOverWindow { window_functions: [row_number() OVER(PARTITION BY *VALUES*_0.column_0 ORDER BY 1:Int32 ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] }
└─LogicalProject { exprs: [*VALUES*_0.column_0, *VALUES*_0.column_1, 1:Int32] }
└─LogicalShare { id: 1 }
└─LogicalValues { rows: [[1:Int32, 'foo':Varchar], [2:Int32, 'bar':Varchar]], schema: Schema { fields: [*VALUES*_0.column_0:Int32, *VALUES*_0.column_1:Varchar] } }
optimized_logical_plan_for_batch: |-
LogicalAgg { aggs: [count] }
└─LogicalTopN { order: [1:Int32 ASC], limit: 1, offset: 0, group_key: [*VALUES*_0.column_0] }
└─LogicalProject { exprs: [*VALUES*_0.column_0, 1:Int32] }
└─LogicalValues { rows: [[1:Int32], [2:Int32]], schema: Schema { fields: [*VALUES*_0.column_0:Int32] } }
12 changes: 11 additions & 1 deletion src/frontend/src/optimizer/plan_visitor/cardinality_visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,22 @@ impl PlanVisitor for CardinalityVisitor {
fn visit_logical_top_n(&mut self, plan: &plan_node::LogicalTopN) -> Cardinality {
let input = self.visit(plan.input());

match plan.limit_attr() {
let each_group = match plan.limit_attr() {
TopNLimit::Simple(limit) => input.sub(plan.offset() as usize).min(limit as usize),
TopNLimit::WithTies(limit) => {
assert_eq!(plan.offset(), 0, "ties with offset is not supported yet");
input.min((limit as usize)..)
}
};

if plan.group_key().is_empty() {
each_group
} else {
let group_number = input.min(1..);
each_group
.mul(group_number)
// the output cardinality will never be more than the input, thus `.min(input)`
.min(input)
}
}

Expand Down

0 comments on commit f19dec5

Please sign in to comment.