Skip to content

Commit

Permalink
[multistage] don't prune project after agg during relBuilder (apache#…
Browse files Browse the repository at this point in the history
…12058)



---------

Co-authored-by: Rong Rong <[email protected]>
  • Loading branch information
walterddr and Rong Rong authored Nov 29, 2023
1 parent 37270f7 commit c57117a
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ public QueryEnvironment(TypeFactory typeFactory, CalciteSchema rootSchema, Worke
// SUB-QUERY Threshold is useless as we are encoding all IN clause in-line anyway
.withInSubQueryThreshold(Integer.MAX_VALUE)
.addRelBuilderConfigTransform(c -> c.withPushJoinCondition(true))
.addRelBuilderConfigTransform(c -> c.withAggregateUnique(true)))
.addRelBuilderConfigTransform(c -> c.withAggregateUnique(true))
.addRelBuilderConfigTransform(c -> c.withPruneInputOfAggregate(false)))
.build();
_optProgram = getOptProgram();
_traitProgram = getTraitProgram();
Expand Down
76 changes: 75 additions & 1 deletion pinot-query-planner/src/test/resources/queries/JoinPlans.json
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,81 @@
]
},
{
"description": "nexted reused tmp table SEMI JOINs",
"description": "multiple SEMI JOINs on same column multiple conditions and multiple columns",
"sql": "EXPLAIN PLAN FOR WITH tmp1 AS ( SELECT * FROM a WHERE col2 NOT IN ('foo', 'bar') ) SELECT * FROM a WHERE col2 IN (SELECT col1 FROM tmp1) AND col2 IN (SELECT col1 FROM b WHERE col3 > 100) AND col3 IN (SELECT col3 from b WHERE col3 < 100)",
"output": [
"Execution Plan",
"\nLogicalJoin(condition=[=($2, $7)], joinType=[semi])",
"\n LogicalJoin(condition=[=($1, $7)], joinType=[semi])",
"\n LogicalJoin(condition=[=($1, $7)], joinType=[semi])",
"\n LogicalTableScan(table=[[a]])",
"\n PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
"\n LogicalProject(col1=[$0])",
"\n LogicalFilter(condition=[AND(<>($1, 'bar'), <>($1, 'foo'))])",
"\n LogicalTableScan(table=[[a]])",
"\n PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
"\n LogicalProject(col1=[$0])",
"\n LogicalFilter(condition=[>($2, 100)])",
"\n LogicalTableScan(table=[[b]])",
"\n PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
"\n LogicalProject(col3=[$2])",
"\n LogicalFilter(condition=[<($2, 100)])",
"\n LogicalTableScan(table=[[b]])",
"\n"
]
},
{
"description": "multiple SEMI JOINs with Agg",
"sql": "EXPLAIN PLAN FOR WITH tmp1 AS ( SELECT * FROM a WHERE col2 NOT IN ('foo', 'bar') ) SELECT COUNT(*) FROM a WHERE col2 IN (SELECT col1 FROM tmp1) AND col3 IN (SELECT col3 from b WHERE col3 < 100)",
"output": [
"Execution Plan",
"\nLogicalAggregate(group=[{}], agg#0=[COUNT($0)])",
"\n PinotLogicalExchange(distribution=[hash])",
"\n LogicalAggregate(group=[{}], agg#0=[COUNT()])",
"\n LogicalJoin(condition=[=($0, $1)], joinType=[semi])",
"\n LogicalProject(col3=[$1])",
"\n LogicalJoin(condition=[=($0, $2)], joinType=[semi])",
"\n LogicalProject(col2=[$1], col3=[$2])",
"\n LogicalTableScan(table=[[a]])",
"\n PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
"\n LogicalProject(col1=[$0])",
"\n LogicalFilter(condition=[AND(<>($1, 'bar'), <>($1, 'foo'))])",
"\n LogicalTableScan(table=[[a]])",
"\n PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
"\n LogicalProject(col3=[$2])",
"\n LogicalFilter(condition=[<($2, 100)])",
"\n LogicalTableScan(table=[[b]])",
"\n"
]
},
{
"description": "multiple SEMI JOINs with group-by and having clause",
"sql": "EXPLAIN PLAN FOR WITH tmp1 AS ( SELECT * FROM a WHERE col2 NOT IN ('foo', 'bar') ) SELECT col1, SUM(col3) FROM a WHERE col2 IN (SELECT col1 FROM tmp1) AND col3 IN (SELECT col3 from b WHERE col3 < 100) GROUP BY col1 HAVING COUNT(*) > 10",
"output": [
"Execution Plan",
"\nLogicalProject(col1=[$0], EXPR$1=[$1])",
"\n LogicalFilter(condition=[>($2, 10)])",
"\n LogicalAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($2)])",
"\n PinotLogicalExchange(distribution=[hash[0]])",
"\n LogicalAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT()])",
"\n LogicalJoin(condition=[=($1, $2)], joinType=[semi])",
"\n LogicalProject(col1=[$0], col3=[$2])",
"\n LogicalJoin(condition=[=($1, $3)], joinType=[semi])",
"\n LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
"\n LogicalTableScan(table=[[a]])",
"\n PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
"\n LogicalProject(col1=[$0])",
"\n LogicalFilter(condition=[AND(<>($1, 'bar'), <>($1, 'foo'))])",
"\n LogicalTableScan(table=[[a]])",
"\n PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
"\n LogicalProject(col3=[$2])",
"\n LogicalFilter(condition=[<($2, 100)])",
"\n LogicalTableScan(table=[[b]])",
"\n"
]
},
{
"description": "reused tmp table in SEMI JOINs",
"sql": "EXPLAIN PLAN FOR WITH tmp1 AS ( SELECT * FROM a WHERE col2 NOT IN ('foo', 'bar') ), tmp2 AS ( SELECT * FROM b WHERE col1 IN (SELECT col1 FROM tmp1) AND col3 < 100 ) SELECT * FROM tmp2 WHERE col3 IN (SELECT col3 from tmp1)",
"output": [
"Execution Plan",
Expand Down
21 changes: 21 additions & 0 deletions pinot-query-runtime/src/test/resources/queries/WithStatements.json
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,27 @@
["b", "bob", 196883]
]
},
{
"description": "multi 'with' table and semi-joins on same column multiple conditions and other column condition",
"sql": "WITH t1 AS ( SELECT * FROM {tbl1} WHERE intCol > 1 ) SELECT boolCol, strCol1, strCol2, intCol FROM {tbl2} WHERE strCol1 IN (SELECT strCol FROM t1) AND strCol2 IN (SELECT strCol2 FROM {tbl2} WHERE intCol > 1000) AND strCol1 IN (SELECT strCol1 FROM {tbl2} WHERE boolCol)",
"outputs": [
[true, "b", "bob", 196883]
]
},
{
"description": "multi 'with' table and semi-joins and group-by with having",
"sql": "WITH t1 AS ( SELECT * FROM {tbl1} WHERE intCol > 1 ) SELECT strCol1, SUM(doubleCol) FROM {tbl2} WHERE strCol1 IN (SELECT strCol FROM t1) AND intCol IN (SELECT intCol FROM {tbl1} WHERE intCol < 100) GROUP BY strCol1 HAVING COUNT(*) < 10",
"outputs": [
["a", 275.12]
]
},
{
"description": "multi 'with' table and semi-joins and agg",
"sql": "WITH t1 AS ( SELECT * FROM {tbl1} WHERE intCol > 1 ) SELECT COUNT(*) FROM {tbl2} WHERE strCol1 IN (SELECT strCol FROM t1) AND intCol IN (SELECT intCol FROM {tbl1} WHERE intCol < 100)",
"outputs": [
[2]
]
},
{
"description": "nested 'with' on agg table: (with a as ( ... ), select ... ",
"sql": "WITH agg1 AS (SELECT strCol1, strCol2, sum(intCol) AS sumVal FROM {tbl2} GROUP BY strCol1, strCol2) SELECT strCol1, avg(sumVal) AS avgVal FROM agg1 GROUP BY strCol1",
Expand Down

0 comments on commit c57117a

Please sign in to comment.