From c43186341d3c5cdfd81b1ccfdf3186fd20a88827 Mon Sep 17 00:00:00 2001 From: yikaifei Date: Tue, 29 Oct 2024 14:22:14 +0800 Subject: [PATCH] Velox backend support merge two aggregate to one complete mode aggregate --- .../backendsapi/velox/VeloxRuleApi.scala | 1 + .../HashAggregateExecTransformer.scala | 14 +- .../tpch-approved-plan/v1-bhj/spark33/15.txt | 74 ++-- .../tpch-approved-plan/v1-bhj/spark34/15.txt | 74 ++-- .../tpch-approved-plan/v1/spark32/13.txt | 178 ++++---- .../tpch-approved-plan/v1/spark32/14.txt | 110 +++-- .../tpch-approved-plan/v1/spark32/17.txt | 170 ++++---- .../tpch-approved-plan/v1/spark32/18.txt | 284 +++++++------ .../tpch-approved-plan/v1/spark32/19.txt | 106 +++-- .../tpch-approved-plan/v1/spark32/20.txt | 390 +++++++++--------- .../tpch-approved-plan/v1/spark32/3.txt | 180 ++++---- .../tpch-approved-plan/v1/spark33/11.txt | 152 ++++--- .../tpch-approved-plan/v1/spark33/13.txt | 178 ++++---- .../tpch-approved-plan/v1/spark33/14.txt | 110 +++-- .../tpch-approved-plan/v1/spark33/15.txt | 74 ++-- .../tpch-approved-plan/v1/spark33/17.txt | 170 ++++---- .../tpch-approved-plan/v1/spark33/18.txt | 284 +++++++------ .../tpch-approved-plan/v1/spark33/19.txt | 106 +++-- .../tpch-approved-plan/v1/spark33/20.txt | 382 +++++++++-------- .../tpch-approved-plan/v1/spark33/3.txt | 180 ++++---- .../tpch-approved-plan/v1/spark34/11.txt | 152 ++++--- .../tpch-approved-plan/v1/spark34/13.txt | 178 ++++---- .../tpch-approved-plan/v1/spark34/14.txt | 110 +++-- .../tpch-approved-plan/v1/spark34/15.txt | 74 ++-- .../tpch-approved-plan/v1/spark34/17.txt | 170 ++++---- .../tpch-approved-plan/v1/spark34/18.txt | 284 +++++++------ .../tpch-approved-plan/v1/spark34/19.txt | 106 +++-- .../tpch-approved-plan/v1/spark34/20.txt | 382 +++++++++-------- .../tpch-approved-plan/v1/spark34/3.txt | 180 ++++---- .../VeloxAggregateFunctionsSuite.scala | 1 + docs/Configuration.md | 1 + .../MergeTwoPhasesHashBaseAggregate.scala | 5 +- .../GlutenReplaceHashWithSortAggSuite.scala | 7 +- .../GlutenReplaceHashWithSortAggSuite.scala | 7 +- .../GlutenReplaceHashWithSortAggSuite.scala | 6 + ...MergeTwoPhasesHashBaseAggregateSuite.scala | 194 +++++++++ .../org/apache/gluten/GlutenConfig.scala | 9 + 37 files changed, 2545 insertions(+), 2538 deletions(-) rename {backends-clickhouse/src/main/scala/org/apache/gluten/extension => gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar}/MergeTwoPhasesHashBaseAggregate.scala (96%) create mode 100644 gluten-ut/test/src/test/scala/org/apache/gluten/execution/VeloxMergeTwoPhasesHashBaseAggregateSuite.scala diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxRuleApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxRuleApi.scala index 3554bc5c9c018..ea20e0fa00944 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxRuleApi.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxRuleApi.scala @@ -54,6 +54,7 @@ private object VeloxRuleApi { injector.injectTransform(_ => PushDownInputFileExpression.PreOffload) injector.injectTransform(c => FallbackOnANSIMode.apply(c.session)) injector.injectTransform(c => FallbackMultiCodegens.apply(c.session)) + injector.injectTransform(c => MergeTwoPhasesHashBaseAggregate(c.session)) injector.injectTransform(_ => RewriteSubqueryBroadcast()) injector.injectTransform(c => BloomFilterMightContainJointRewriteRule.apply(c.session)) injector.injectTransform(c => ArrowScanReplaceRule.apply(c.session)) diff --git a/backends-velox/src/main/scala/org/apache/gluten/execution/HashAggregateExecTransformer.scala b/backends-velox/src/main/scala/org/apache/gluten/execution/HashAggregateExecTransformer.scala index 61af545e411d8..9a6ba3a220f1a 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/execution/HashAggregateExecTransformer.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/execution/HashAggregateExecTransformer.scala @@ -214,7 +214,7 @@ abstract class HashAggregateExecTransformer( VeloxIntermediateData.getIntermediateTypeNode(aggregateFunction) ) aggregateNodeList.add(aggFunctionNode) - case Final => + case Final | Complete => val aggFunctionNode = ExpressionBuilder.makeAggregateFunction( VeloxAggregateFunctionsBuilder.create(args, aggregateFunction, aggregateMode), childrenNodeList, @@ -242,7 +242,7 @@ abstract class HashAggregateExecTransformer( aggregateFunction.inputAggBufferAttributes.head.nullable) ) aggregateNodeList.add(partialNode) - case Final => + case Final | Complete => val aggFunctionNode = ExpressionBuilder.makeAggregateFunction( VeloxAggregateFunctionsBuilder.create(args, aggregateFunction, aggregateMode), childrenNodeList, @@ -275,7 +275,7 @@ abstract class HashAggregateExecTransformer( expression.mode match { case Partial | PartialMerge => typeNodeList.add(VeloxIntermediateData.getIntermediateTypeNode(aggregateFunction)) - case Final => + case Final | Complete => typeNodeList.add( ConverterUtils .getTypeNode(aggregateFunction.dataType, aggregateFunction.nullable)) @@ -356,7 +356,7 @@ abstract class HashAggregateExecTransformer( // The process of handling the inconsistency in column types and order between // Spark and Velox is exactly the opposite of applyExtractStruct. aggregateExpression.mode match { - case PartialMerge | Final => + case PartialMerge | Final | Complete => val newInputAttributes = new ArrayBuffer[Attribute]() val childNodes = new JArrayList[ExpressionNode]() val (sparkOrders, sparkTypes) = @@ -467,7 +467,7 @@ abstract class HashAggregateExecTransformer( // by previous projection. childrenNodes.add(ExpressionBuilder.makeSelection(colIdx)) colIdx += 1 - case Partial => + case Partial | Complete => aggFunc.children.foreach { _ => childrenNodes.add(ExpressionBuilder.makeSelection(colIdx)) @@ -600,7 +600,7 @@ abstract class HashAggregateExecTransformer( } val aggregateFunc = aggExpr.aggregateFunction val childrenNodes = aggExpr.mode match { - case Partial => + case Partial | Complete => aggregateFunc.children.toList.map( expr => { ExpressionConverter @@ -784,7 +784,7 @@ case class HashAggregateExecPullOutHelper( expr.mode match { case Partial | PartialMerge => expr.aggregateFunction.aggBufferAttributes - case Final => + case Final | Complete => Seq(aggregateAttributes(index)) case other => throw new GlutenNotSupportException(s"Unsupported aggregate mode: $other.") diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/15.txt index 74246ef672258..52a87f43535d4 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/15.txt @@ -237,31 +237,30 @@ Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 20 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (69) +AdaptiveSparkPlan (68) +- == Final Plan == - VeloxColumnarToRow (60) - +- ^ RegularHashAggregateExecTransformer (58) - +- ^ RegularHashAggregateExecTransformer (57) - +- ^ ProjectExecTransformer (56) - +- ^ RegularHashAggregateExecTransformer (55) - +- ^ InputIteratorTransformer (54) - +- ShuffleQueryStage (52), Statistics(X) - +- ColumnarExchange (51) - +- VeloxResizeBatches (50) - +- ^ ProjectExecTransformer (48) - +- ^ FlushableHashAggregateExecTransformer (47) - +- ^ ProjectExecTransformer (46) - +- ^ FilterExecTransformer (45) - +- ^ ScanTransformer parquet (44) + VeloxColumnarToRow (59) + +- ^ RegularHashAggregateExecTransformer (57) + +- ^ ProjectExecTransformer (56) + +- ^ RegularHashAggregateExecTransformer (55) + +- ^ InputIteratorTransformer (54) + +- ShuffleQueryStage (52), Statistics(X) + +- ColumnarExchange (51) + +- VeloxResizeBatches (50) + +- ^ ProjectExecTransformer (48) + +- ^ FlushableHashAggregateExecTransformer (47) + +- ^ ProjectExecTransformer (46) + +- ^ FilterExecTransformer (45) + +- ^ ScanTransformer parquet (44) +- == Initial Plan == - HashAggregate (68) - +- HashAggregate (67) - +- HashAggregate (66) - +- Exchange (65) - +- HashAggregate (64) - +- Project (63) - +- Filter (62) - +- Scan parquet (61) + HashAggregate (67) + +- HashAggregate (66) + +- HashAggregate (65) + +- Exchange (64) + +- HashAggregate (63) + +- Project (62) + +- Filter (61) + +- Scan parquet (60) (44) ScanTransformer parquet @@ -326,71 +325,64 @@ Input [2]: [l_suppkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedpri (57) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] -Functions [1]: [partial_max(total_revenue#X)] -Aggregate Attributes [1]: [max#X] -Results [1]: [max#X] - -(58) RegularHashAggregateExecTransformer -Input [1]: [max#X] -Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(59) WholeStageCodegenTransformer (X) +(58) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(60) VeloxColumnarToRow +(59) VeloxColumnarToRow Input [1]: [max(total_revenue)#X] -(61) Scan parquet +(60) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(62) Filter +(61) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(63) Project +(62) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(64) HashAggregate +(63) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(65) Exchange +(64) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) HashAggregate +(65) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] -(67) HashAggregate +(66) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(68) HashAggregate +(67) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(69) AdaptiveSparkPlan +(68) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/15.txt index a65fc78a2f032..48b06986ee1b4 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/15.txt @@ -239,31 +239,30 @@ Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 20 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (69) +AdaptiveSparkPlan (68) +- == Final Plan == - VeloxColumnarToRow (60) - +- ^ RegularHashAggregateExecTransformer (58) - +- ^ RegularHashAggregateExecTransformer (57) - +- ^ ProjectExecTransformer (56) - +- ^ RegularHashAggregateExecTransformer (55) - +- ^ InputIteratorTransformer (54) - +- ShuffleQueryStage (52), Statistics(X) - +- ColumnarExchange (51) - +- VeloxResizeBatches (50) - +- ^ ProjectExecTransformer (48) - +- ^ FlushableHashAggregateExecTransformer (47) - +- ^ ProjectExecTransformer (46) - +- ^ FilterExecTransformer (45) - +- ^ ScanTransformer parquet (44) + VeloxColumnarToRow (59) + +- ^ RegularHashAggregateExecTransformer (57) + +- ^ ProjectExecTransformer (56) + +- ^ RegularHashAggregateExecTransformer (55) + +- ^ InputIteratorTransformer (54) + +- ShuffleQueryStage (52), Statistics(X) + +- ColumnarExchange (51) + +- VeloxResizeBatches (50) + +- ^ ProjectExecTransformer (48) + +- ^ FlushableHashAggregateExecTransformer (47) + +- ^ ProjectExecTransformer (46) + +- ^ FilterExecTransformer (45) + +- ^ ScanTransformer parquet (44) +- == Initial Plan == - HashAggregate (68) - +- HashAggregate (67) - +- HashAggregate (66) - +- Exchange (65) - +- HashAggregate (64) - +- Project (63) - +- Filter (62) - +- Scan parquet (61) + HashAggregate (67) + +- HashAggregate (66) + +- HashAggregate (65) + +- Exchange (64) + +- HashAggregate (63) + +- Project (62) + +- Filter (61) + +- Scan parquet (60) (44) ScanTransformer parquet @@ -328,71 +327,64 @@ Input [2]: [l_suppkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] (57) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] -Functions [1]: [partial_max(total_revenue#X)] -Aggregate Attributes [1]: [max#X] -Results [1]: [max#X] - -(58) RegularHashAggregateExecTransformer -Input [1]: [max#X] -Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(59) WholeStageCodegenTransformer (X) +(58) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(60) VeloxColumnarToRow +(59) VeloxColumnarToRow Input [1]: [max(total_revenue)#X] -(61) Scan parquet +(60) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(62) Filter +(61) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(63) Project +(62) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(64) HashAggregate +(63) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(65) Exchange +(64) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) HashAggregate +(65) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] -(67) HashAggregate +(66) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(68) HashAggregate +(67) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(69) AdaptiveSparkPlan +(68) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt index 91444cca05282..c8ddb54370094 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt @@ -1,55 +1,54 @@ == Physical Plan == -AdaptiveSparkPlan (58) +AdaptiveSparkPlan (57) +- == Final Plan == - VeloxColumnarToRow (40) - +- ^ SortExecTransformer (38) - +- ^ InputIteratorTransformer (37) - +- ShuffleQueryStage (35) - +- ColumnarExchange (34) - +- VeloxResizeBatches (33) - +- ^ RegularHashAggregateExecTransformer (31) - +- ^ InputIteratorTransformer (30) - +- ShuffleQueryStage (28) - +- ColumnarExchange (27) - +- VeloxResizeBatches (26) - +- ^ ProjectExecTransformer (24) - +- ^ FlushableHashAggregateExecTransformer (23) - +- ^ ProjectExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ RegularHashAggregateExecTransformer (20) - +- ^ ProjectExecTransformer (19) - +- ^ ShuffledHashJoinExecTransformer LeftOuter BuildLeft (18) - :- ^ InputIteratorTransformer (8) - : +- ShuffleQueryStage (6) - : +- ColumnarExchange (5) - : +- VeloxResizeBatches (4) - : +- ^ ProjectExecTransformer (2) - : +- ^ ScanTransformer parquet (1) - +- ^ InputIteratorTransformer (17) - +- ShuffleQueryStage (15) - +- ColumnarExchange (14) - +- VeloxResizeBatches (13) - +- ^ ProjectExecTransformer (11) - +- ^ FilterExecTransformer (10) - +- ^ ScanTransformer parquet (9) + VeloxColumnarToRow (39) + +- ^ SortExecTransformer (37) + +- ^ InputIteratorTransformer (36) + +- ShuffleQueryStage (34) + +- ColumnarExchange (33) + +- VeloxResizeBatches (32) + +- ^ RegularHashAggregateExecTransformer (30) + +- ^ InputIteratorTransformer (29) + +- ShuffleQueryStage (27) + +- ColumnarExchange (26) + +- VeloxResizeBatches (25) + +- ^ ProjectExecTransformer (23) + +- ^ FlushableHashAggregateExecTransformer (22) + +- ^ ProjectExecTransformer (21) + +- ^ RegularHashAggregateExecTransformer (20) + +- ^ ProjectExecTransformer (19) + +- ^ ShuffledHashJoinExecTransformer LeftOuter BuildLeft (18) + :- ^ InputIteratorTransformer (8) + : +- ShuffleQueryStage (6) + : +- ColumnarExchange (5) + : +- VeloxResizeBatches (4) + : +- ^ ProjectExecTransformer (2) + : +- ^ ScanTransformer parquet (1) + +- ^ InputIteratorTransformer (17) + +- ShuffleQueryStage (15) + +- ColumnarExchange (14) + +- VeloxResizeBatches (13) + +- ^ ProjectExecTransformer (11) + +- ^ FilterExecTransformer (10) + +- ^ ScanTransformer parquet (9) +- == Initial Plan == - Sort (57) - +- Exchange (56) - +- HashAggregate (55) - +- Exchange (54) - +- HashAggregate (53) - +- HashAggregate (52) - +- HashAggregate (51) - +- Project (50) - +- SortMergeJoin LeftOuter (49) - :- Sort (43) - : +- Exchange (42) - : +- Scan parquet (41) - +- Sort (48) - +- Exchange (47) - +- Project (46) - +- Filter (45) - +- Scan parquet (44) + Sort (56) + +- Exchange (55) + +- HashAggregate (54) + +- Exchange (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin LeftOuter (48) + :- Sort (42) + : +- Exchange (41) + : +- Scan parquet (40) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) ScanTransformer parquet @@ -133,180 +132,173 @@ Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] (20) RegularHashAggregateExecTransformer Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] -Functions [1]: [partial_count(o_orderkey#X)] -Aggregate Attributes [1]: [count#X] -Results [2]: [c_custkey#X, count#X] - -(21) RegularHashAggregateExecTransformer -Input [2]: [c_custkey#X, count#X] -Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [2]: [c_custkey#X, count(o_orderkey#X)#X] -(22) ProjectExecTransformer +(21) ProjectExecTransformer Output [1]: [count(o_orderkey#X)#X AS c_count#X] Input [2]: [c_custkey#X, count(o_orderkey#X)#X] -(23) FlushableHashAggregateExecTransformer +(22) FlushableHashAggregateExecTransformer Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(24) ProjectExecTransformer +(23) ProjectExecTransformer Output [3]: [hash(c_count#X, 42) AS hash_partition_key#X, c_count#X, count#X] Input [2]: [c_count#X, count#X] -(25) WholeStageCodegenTransformer (X) +(24) WholeStageCodegenTransformer (X) Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: false -(26) VeloxResizeBatches +(25) VeloxResizeBatches Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: X, X -(27) ColumnarExchange +(26) ColumnarExchange Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [c_count#X, count#X], [plan_id=X], [shuffle_writer_type=hash] -(28) ShuffleQueryStage +(27) ShuffleQueryStage Output [2]: [c_count#X, count#X] Arguments: X -(29) InputAdapter +(28) InputAdapter Input [2]: [c_count#X, count#X] -(30) InputIteratorTransformer +(29) InputIteratorTransformer Input [2]: [c_count#X, count#X] -(31) RegularHashAggregateExecTransformer +(30) RegularHashAggregateExecTransformer Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(32) WholeStageCodegenTransformer (X) +(31) WholeStageCodegenTransformer (X) Input [2]: [c_count#X, custdist#X] Arguments: false -(33) VeloxResizeBatches +(32) VeloxResizeBatches Input [2]: [c_count#X, custdist#X] Arguments: X, X -(34) ColumnarExchange +(33) ColumnarExchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [shuffle_writer_type=hash] -(35) ShuffleQueryStage +(34) ShuffleQueryStage Output [2]: [c_count#X, custdist#X] Arguments: X -(36) InputAdapter +(35) InputAdapter Input [2]: [c_count#X, custdist#X] -(37) InputIteratorTransformer +(36) InputIteratorTransformer Input [2]: [c_count#X, custdist#X] -(38) SortExecTransformer +(37) SortExecTransformer Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(39) WholeStageCodegenTransformer (X) +(38) WholeStageCodegenTransformer (X) Input [2]: [c_count#X, custdist#X] Arguments: false -(40) VeloxColumnarToRow +(39) VeloxColumnarToRow Input [2]: [c_count#X, custdist#X] -(41) Scan parquet +(40) Scan parquet Output [1]: [c_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(41) Exchange Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Sort +(42) Sort Input [1]: [c_custkey#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(44) Scan parquet +(43) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(45) Filter +(44) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(46) Project +(45) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(47) Exchange +(46) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(47) Sort Input [2]: [o_orderkey#X, o_custkey#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(49) SortMergeJoin +(48) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(50) Project +(49) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(51) HashAggregate +(50) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(52) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(53) HashAggregate +(52) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(54) Exchange +(53) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) HashAggregate +(54) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(56) Exchange +(55) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(57) Sort +(56) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(58) AdaptiveSparkPlan +(57) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt index 33c0dfb71e1ef..40a891738143a 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt @@ -1,40 +1,39 @@ == Physical Plan == -AdaptiveSparkPlan (39) +AdaptiveSparkPlan (38) +- == Final Plan == - VeloxColumnarToRow (25) - +- ^ ProjectExecTransformer (23) - +- ^ RegularHashAggregateExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ ProjectExecTransformer (20) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) - :- ^ InputIteratorTransformer (9) - : +- ShuffleQueryStage (7) - : +- ColumnarExchange (6) - : +- VeloxResizeBatches (5) - : +- ^ ProjectExecTransformer (3) - : +- ^ FilterExecTransformer (2) - : +- ^ ScanTransformer parquet (1) - +- ^ InputIteratorTransformer (18) - +- ShuffleQueryStage (16) - +- ColumnarExchange (15) - +- VeloxResizeBatches (14) - +- ^ ProjectExecTransformer (12) - +- ^ FilterExecTransformer (11) - +- ^ ScanTransformer parquet (10) + VeloxColumnarToRow (24) + +- ^ ProjectExecTransformer (22) + +- ^ RegularHashAggregateExecTransformer (21) + +- ^ ProjectExecTransformer (20) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) + :- ^ InputIteratorTransformer (9) + : +- ShuffleQueryStage (7) + : +- ColumnarExchange (6) + : +- VeloxResizeBatches (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ ScanTransformer parquet (1) + +- ^ InputIteratorTransformer (18) + +- ShuffleQueryStage (16) + +- ColumnarExchange (15) + +- VeloxResizeBatches (14) + +- ^ ProjectExecTransformer (12) + +- ^ FilterExecTransformer (11) + +- ^ ScanTransformer parquet (10) +- == Initial Plan == - HashAggregate (38) - +- HashAggregate (37) - +- Project (36) - +- SortMergeJoin Inner (35) - :- Sort (30) - : +- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Sort (34) - +- Exchange (33) - +- Filter (32) - +- Scan parquet (31) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) ScanTransformer parquet @@ -123,93 +122,86 @@ Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] (21) RegularHashAggregateExecTransformer Input [5]: [l_extendedprice#X, l_discount#X, p_type#X, _pre_X#X, _pre_X#X] Keys: [] -Functions [2]: [partial_sum(_pre_X#X), partial_sum(_pre_X#X)] -Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] - -(22) RegularHashAggregateExecTransformer -Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -Keys: [] -Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] +Functions [2]: [sum(_pre_X#X), sum(_pre_X#X)] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] -(23) ProjectExecTransformer +(22) ProjectExecTransformer Output [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X)), DecimalType(38,6), true)) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X as decimal(38,6)))), DecimalType(38,6), true) AS promo_revenue#X] Input [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] -(24) WholeStageCodegenTransformer (X) +(23) WholeStageCodegenTransformer (X) Input [1]: [promo_revenue#X] Arguments: false -(25) VeloxColumnarToRow +(24) VeloxColumnarToRow Input [1]: [promo_revenue#X] -(26) Scan parquet +(25) Scan parquet Output [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-09-01), LessThan(l_shipdate,1995-10-01), IsNotNull(l_partkey)] ReadSchema: struct -(27) Filter +(26) Filter Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-09-01)) AND (l_shipdate#X < 1995-10-01)) AND isnotnull(l_partkey#X)) -(28) Project +(27) Project Output [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(29) Exchange +(28) Exchange Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Sort +(29) Sort Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(31) Scan parquet +(30) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(32) Filter +(31) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(33) Exchange +(32) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(34) Sort +(33) Sort Input [2]: [p_partkey#X, p_type#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(35) SortMergeJoin +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(36) Project +(35) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(37) HashAggregate +(36) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(38) HashAggregate +(37) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X)), DecimalType(38,6), true)) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X as decimal(38,6)))), DecimalType(38,6), true) AS promo_revenue#X] -(39) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt index 94e892d1e8c9f..30fd4b0b1124b 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt @@ -1,62 +1,61 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (62) +- == Final Plan == - VeloxColumnarToRow (40) - +- ^ ProjectExecTransformer (38) - +- ^ RegularHashAggregateExecTransformer (37) - +- ^ RegularHashAggregateExecTransformer (36) - +- ^ ProjectExecTransformer (35) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (34) - :- ^ ProjectExecTransformer (20) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (18) - : +- ShuffleQueryStage (16) - : +- ColumnarExchange (15) - : +- VeloxResizeBatches (14) - : +- ^ ProjectExecTransformer (12) - : +- ^ FilterExecTransformer (11) - : +- ^ ScanTransformer parquet (10) - +- ^ FilterExecTransformer (33) - +- ^ ProjectExecTransformer (32) - +- ^ RegularHashAggregateExecTransformer (31) - +- ^ InputIteratorTransformer (30) - +- ShuffleQueryStage (28) - +- ColumnarExchange (27) - +- VeloxResizeBatches (26) - +- ^ ProjectExecTransformer (24) - +- ^ FlushableHashAggregateExecTransformer (23) - +- ^ FilterExecTransformer (22) - +- ^ ScanTransformer parquet (21) + VeloxColumnarToRow (39) + +- ^ ProjectExecTransformer (37) + +- ^ RegularHashAggregateExecTransformer (36) + +- ^ ProjectExecTransformer (35) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (34) + :- ^ ProjectExecTransformer (20) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ ScanTransformer parquet (1) + : +- ^ InputIteratorTransformer (18) + : +- ShuffleQueryStage (16) + : +- ColumnarExchange (15) + : +- VeloxResizeBatches (14) + : +- ^ ProjectExecTransformer (12) + : +- ^ FilterExecTransformer (11) + : +- ^ ScanTransformer parquet (10) + +- ^ FilterExecTransformer (33) + +- ^ ProjectExecTransformer (32) + +- ^ RegularHashAggregateExecTransformer (31) + +- ^ InputIteratorTransformer (30) + +- ShuffleQueryStage (28) + +- ColumnarExchange (27) + +- VeloxResizeBatches (26) + +- ^ ProjectExecTransformer (24) + +- ^ FlushableHashAggregateExecTransformer (23) + +- ^ FilterExecTransformer (22) + +- ^ ScanTransformer parquet (21) +- == Initial Plan == - HashAggregate (62) - +- HashAggregate (61) - +- Project (60) - +- SortMergeJoin Inner (59) - :- Project (51) - : +- SortMergeJoin Inner (50) - : :- Sort (44) - : : +- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Sort (49) - : +- Exchange (48) - : +- Project (47) - : +- Filter (46) - : +- Scan parquet (45) - +- Sort (58) - +- Filter (57) - +- HashAggregate (56) - +- Exchange (55) - +- HashAggregate (54) - +- Filter (53) - +- Scan parquet (52) + HashAggregate (61) + +- HashAggregate (60) + +- Project (59) + +- SortMergeJoin Inner (58) + :- Project (50) + : +- SortMergeJoin Inner (49) + : :- Sort (43) + : : +- Exchange (42) + : : +- Filter (41) + : : +- Scan parquet (40) + : +- Sort (48) + : +- Exchange (47) + : +- Project (46) + : +- Filter (45) + : +- Scan parquet (44) + +- Sort (57) + +- Filter (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- Filter (52) + +- Scan parquet (51) (1) ScanTransformer parquet @@ -213,139 +212,132 @@ Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity) (36) RegularHashAggregateExecTransformer Input [1]: [l_extendedprice#X] Keys: [] -Functions [1]: [partial_sum(l_extendedprice#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(37) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [sum(l_extendedprice#X)#X] -(38) ProjectExecTransformer +(37) ProjectExecTransformer Output [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6), true) AS avg_yearly#X] Input [1]: [sum(l_extendedprice#X)#X] -(39) WholeStageCodegenTransformer (X) +(38) WholeStageCodegenTransformer (X) Input [1]: [avg_yearly#X] Arguments: false -(40) VeloxColumnarToRow +(39) VeloxColumnarToRow Input [1]: [avg_yearly#X] -(41) Scan parquet +(40) Scan parquet Output [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_quantity)] ReadSchema: struct -(42) Filter +(41) Filter Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) -(43) Exchange +(42) Exchange Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Sort +(43) Sort Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(45) Scan parquet +(44) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(46) Filter +(45) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(47) Project +(46) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(48) Exchange +(47) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(48) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(50) SortMergeJoin +(49) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(51) Project +(50) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(52) Scan parquet +(51) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(53) Filter +(52) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(54) HashAggregate +(53) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(55) Exchange +(54) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) HashAggregate +(55) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [CheckOverflow((0.200000 * promote_precision(avg(l_quantity#X)#X)), DecimalType(18,7), true) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(57) Filter +(56) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(58) Sort +(57) Sort Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(59) SortMergeJoin +(58) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(60) Project +(59) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(61) HashAggregate +(60) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(62) HashAggregate +(61) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6), true) AS avg_yearly#X] -(63) AdaptiveSparkPlan +(62) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt index 550cd076c1c23..dd4cad538e2cd 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt @@ -1,102 +1,101 @@ == Physical Plan == -AdaptiveSparkPlan (110) +AdaptiveSparkPlan (109) +- == Final Plan == - VeloxColumnarToRow (70) - +- TakeOrderedAndProjectExecTransformer (69) - +- ^ RegularHashAggregateExecTransformer (67) - +- ^ RegularHashAggregateExecTransformer (66) - +- ^ ProjectExecTransformer (65) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (64) - :- ^ InputIteratorTransformer (46) - : +- ShuffleQueryStage (44) - : +- ColumnarExchange (43) - : +- VeloxResizeBatches (42) - : +- ^ ProjectExecTransformer (40) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (39) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (38) - : +- ShuffleQueryStage (36) - : +- ColumnarExchange (35) - : +- VeloxResizeBatches (34) - : +- ^ ProjectExecTransformer (32) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (31) - : :- ^ InputIteratorTransformer (18) - : : +- ShuffleQueryStage (16) - : : +- ColumnarExchange (15) - : : +- VeloxResizeBatches (14) - : : +- ^ ProjectExecTransformer (12) - : : +- ^ FilterExecTransformer (11) - : : +- ^ ScanTransformer parquet (10) - : +- ^ ProjectExecTransformer (30) - : +- ^ FilterExecTransformer (29) - : +- ^ RegularHashAggregateExecTransformer (28) - : +- ^ InputIteratorTransformer (27) - : +- ShuffleQueryStage (25) - : +- ColumnarExchange (24) - : +- VeloxResizeBatches (23) - : +- ^ ProjectExecTransformer (21) - : +- ^ FlushableHashAggregateExecTransformer (20) - : +- ^ ScanTransformer parquet (19) - +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (63) - :- ^ InputIteratorTransformer (55) - : +- ShuffleQueryStage (53) - : +- ColumnarExchange (52) - : +- VeloxResizeBatches (51) - : +- ^ ProjectExecTransformer (49) - : +- ^ FilterExecTransformer (48) - : +- ^ ScanTransformer parquet (47) - +- ^ ProjectExecTransformer (62) - +- ^ FilterExecTransformer (61) - +- ^ RegularHashAggregateExecTransformer (60) - +- ^ InputIteratorTransformer (59) - +- ShuffleQueryStage (57) - +- ReusedExchange (56) + VeloxColumnarToRow (69) + +- TakeOrderedAndProjectExecTransformer (68) + +- ^ RegularHashAggregateExecTransformer (66) + +- ^ ProjectExecTransformer (65) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (64) + :- ^ InputIteratorTransformer (46) + : +- ShuffleQueryStage (44) + : +- ColumnarExchange (43) + : +- VeloxResizeBatches (42) + : +- ^ ProjectExecTransformer (40) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (39) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ ScanTransformer parquet (1) + : +- ^ InputIteratorTransformer (38) + : +- ShuffleQueryStage (36) + : +- ColumnarExchange (35) + : +- VeloxResizeBatches (34) + : +- ^ ProjectExecTransformer (32) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (31) + : :- ^ InputIteratorTransformer (18) + : : +- ShuffleQueryStage (16) + : : +- ColumnarExchange (15) + : : +- VeloxResizeBatches (14) + : : +- ^ ProjectExecTransformer (12) + : : +- ^ FilterExecTransformer (11) + : : +- ^ ScanTransformer parquet (10) + : +- ^ ProjectExecTransformer (30) + : +- ^ FilterExecTransformer (29) + : +- ^ RegularHashAggregateExecTransformer (28) + : +- ^ InputIteratorTransformer (27) + : +- ShuffleQueryStage (25) + : +- ColumnarExchange (24) + : +- VeloxResizeBatches (23) + : +- ^ ProjectExecTransformer (21) + : +- ^ FlushableHashAggregateExecTransformer (20) + : +- ^ ScanTransformer parquet (19) + +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (63) + :- ^ InputIteratorTransformer (55) + : +- ShuffleQueryStage (53) + : +- ColumnarExchange (52) + : +- VeloxResizeBatches (51) + : +- ^ ProjectExecTransformer (49) + : +- ^ FilterExecTransformer (48) + : +- ^ ScanTransformer parquet (47) + +- ^ ProjectExecTransformer (62) + +- ^ FilterExecTransformer (61) + +- ^ RegularHashAggregateExecTransformer (60) + +- ^ InputIteratorTransformer (59) + +- ShuffleQueryStage (57) + +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (109) - +- HashAggregate (108) - +- HashAggregate (107) - +- Project (106) - +- SortMergeJoin Inner (105) - :- Sort (92) - : +- Exchange (91) - : +- Project (90) - : +- SortMergeJoin Inner (89) - : :- Sort (74) - : : +- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Sort (88) - : +- Exchange (87) - : +- SortMergeJoin LeftSemi (86) - : :- Sort (78) - : : +- Exchange (77) - : : +- Filter (76) - : : +- Scan parquet (75) - : +- Sort (85) - : +- Project (84) - : +- Filter (83) - : +- HashAggregate (82) - : +- Exchange (81) - : +- HashAggregate (80) - : +- Scan parquet (79) - +- SortMergeJoin LeftSemi (104) - :- Sort (96) - : +- Exchange (95) - : +- Filter (94) - : +- Scan parquet (93) - +- Sort (103) - +- Project (102) - +- Filter (101) - +- HashAggregate (100) - +- Exchange (99) - +- HashAggregate (98) - +- Scan parquet (97) + TakeOrderedAndProject (108) + +- HashAggregate (107) + +- HashAggregate (106) + +- Project (105) + +- SortMergeJoin Inner (104) + :- Sort (91) + : +- Exchange (90) + : +- Project (89) + : +- SortMergeJoin Inner (88) + : :- Sort (73) + : : +- Exchange (72) + : : +- Filter (71) + : : +- Scan parquet (70) + : +- Sort (87) + : +- Exchange (86) + : +- SortMergeJoin LeftSemi (85) + : :- Sort (77) + : : +- Exchange (76) + : : +- Filter (75) + : : +- Scan parquet (74) + : +- Sort (84) + : +- Project (83) + : +- Filter (82) + : +- HashAggregate (81) + : +- Exchange (80) + : +- HashAggregate (79) + : +- Scan parquet (78) + +- SortMergeJoin LeftSemi (103) + :- Sort (95) + : +- Exchange (94) + : +- Filter (93) + : +- Scan parquet (92) + +- Sort (102) + +- Project (101) + +- Filter (100) + +- HashAggregate (99) + +- Exchange (98) + +- HashAggregate (97) + +- Scan parquet (96) (1) ScanTransformer parquet @@ -371,219 +370,212 @@ Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, (66) RegularHashAggregateExecTransformer Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] -Functions [1]: [partial_sum(l_quantity#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] - -(67) RegularHashAggregateExecTransformer -Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(68) WholeStageCodegenTransformer (X) +(67) WholeStageCodegenTransformer (X) Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: false -(69) TakeOrderedAndProjectExecTransformer +(68) TakeOrderedAndProjectExecTransformer Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X], 0 -(70) VeloxColumnarToRow +(69) VeloxColumnarToRow Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(71) Scan parquet +(70) Scan parquet Output [2]: [c_custkey#X, c_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey)] ReadSchema: struct -(72) Filter +(71) Filter Input [2]: [c_custkey#X, c_name#X] Condition : isnotnull(c_custkey#X) -(73) Exchange +(72) Exchange Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Sort +(73) Sort Input [2]: [c_custkey#X, c_name#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(75) Scan parquet +(74) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(76) Filter +(75) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(77) Exchange +(76) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Sort +(77) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(79) Scan parquet +(78) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(80) HashAggregate +(79) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(81) Exchange +(80) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) HashAggregate +(81) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(83) Filter +(82) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(84) Project +(83) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(85) Sort +(84) Sort Input [1]: [l_orderkey#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(86) SortMergeJoin +(85) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(87) Exchange +(86) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Sort +(87) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(89) SortMergeJoin +(88) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(90) Project +(89) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(91) Exchange +(90) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) Sort +(91) Sort Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(93) Scan parquet +(92) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(94) Filter +(93) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(95) Exchange +(94) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(96) Sort +(95) Sort Input [2]: [l_orderkey#X, l_quantity#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(97) Scan parquet +(96) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(98) HashAggregate +(97) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(99) Exchange +(98) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) HashAggregate +(99) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(101) Filter +(100) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(102) Project +(101) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(103) Sort +(102) Sort Input [1]: [l_orderkey#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(104) SortMergeJoin +(103) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(105) SortMergeJoin +(104) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(106) Project +(105) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(107) HashAggregate +(106) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(108) HashAggregate +(107) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(109) TakeOrderedAndProject +(108) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(110) AdaptiveSparkPlan +(109) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt index f7778691955a5..507b700e37e93 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt @@ -1,39 +1,38 @@ == Physical Plan == -AdaptiveSparkPlan (38) +AdaptiveSparkPlan (37) +- == Final Plan == - VeloxColumnarToRow (24) - +- ^ RegularHashAggregateExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ ProjectExecTransformer (20) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) - :- ^ InputIteratorTransformer (9) - : +- ShuffleQueryStage (7) - : +- ColumnarExchange (6) - : +- VeloxResizeBatches (5) - : +- ^ ProjectExecTransformer (3) - : +- ^ FilterExecTransformer (2) - : +- ^ ScanTransformer parquet (1) - +- ^ InputIteratorTransformer (18) - +- ShuffleQueryStage (16) - +- ColumnarExchange (15) - +- VeloxResizeBatches (14) - +- ^ ProjectExecTransformer (12) - +- ^ FilterExecTransformer (11) - +- ^ ScanTransformer parquet (10) + VeloxColumnarToRow (23) + +- ^ RegularHashAggregateExecTransformer (21) + +- ^ ProjectExecTransformer (20) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) + :- ^ InputIteratorTransformer (9) + : +- ShuffleQueryStage (7) + : +- ColumnarExchange (6) + : +- VeloxResizeBatches (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ ScanTransformer parquet (1) + +- ^ InputIteratorTransformer (18) + +- ShuffleQueryStage (16) + +- ColumnarExchange (15) + +- VeloxResizeBatches (14) + +- ^ ProjectExecTransformer (12) + +- ^ FilterExecTransformer (11) + +- ^ ScanTransformer parquet (10) +- == Initial Plan == - HashAggregate (37) - +- HashAggregate (36) - +- Project (35) - +- SortMergeJoin Inner (34) - :- Sort (29) - : +- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Sort (33) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (36) + +- HashAggregate (35) + +- Project (34) + +- SortMergeJoin Inner (33) + :- Sort (28) + : +- Exchange (27) + : +- Project (26) + : +- Filter (25) + : +- Scan parquet (24) + +- Sort (32) + +- Exchange (31) + +- Filter (30) + +- Scan parquet (29) (1) ScanTransformer parquet @@ -122,89 +121,82 @@ Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partke (21) RegularHashAggregateExecTransformer Input [3]: [l_extendedprice#X, l_discount#X, _pre_X#X] Keys: [] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(22) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X] -(23) WholeStageCodegenTransformer (X) +(22) WholeStageCodegenTransformer (X) Input [1]: [revenue#X] Arguments: false -(24) VeloxColumnarToRow +(23) VeloxColumnarToRow Input [1]: [revenue#X] -(25) Scan parquet +(24) Scan parquet Output [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipinstruct), In(l_shipmode, [AIR,AIR REG]), EqualTo(l_shipinstruct,DELIVER IN PERSON), IsNotNull(l_partkey), Or(Or(And(GreaterThanOrEqual(l_quantity,1.00),LessThanOrEqual(l_quantity,11.00)),And(GreaterThanOrEqual(l_quantity,10.00),LessThanOrEqual(l_quantity,20.00))),And(GreaterThanOrEqual(l_quantity,20.00),LessThanOrEqual(l_quantity,30.00)))] ReadSchema: struct -(26) Filter +(25) Filter Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] Condition : ((((isnotnull(l_shipinstruct#X) AND l_shipmode#X IN (AIR,AIR REG)) AND (l_shipinstruct#X = DELIVER IN PERSON)) AND isnotnull(l_partkey#X)) AND ((((l_quantity#X >= 1.00) AND (l_quantity#X <= 11.00)) OR ((l_quantity#X >= 10.00) AND (l_quantity#X <= 20.00))) OR ((l_quantity#X >= 20.00) AND (l_quantity#X <= 30.00)))) -(27) Project +(26) Project Output [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] -(28) Exchange +(27) Exchange Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Sort +(28) Sort Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(30) Scan parquet +(29) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(31) Filter +(30) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(32) Exchange +(31) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) Sort +(32) Sort Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(34) SortMergeJoin +(33) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(35) Project +(34) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(36) HashAggregate +(35) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(37) HashAggregate +(36) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X] -(38) AdaptiveSparkPlan +(37) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt index 5dd6bbfccab6d..13c10a7bba29b 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt @@ -1,20 +1,20 @@ == Physical Plan == -AdaptiveSparkPlan (146) +AdaptiveSparkPlan (145) +- == Final Plan == - VeloxColumnarToRow (96) - +- ^ SortExecTransformer (94) - +- ^ InputIteratorTransformer (93) - +- ShuffleQueryStage (91) - +- ColumnarExchange (90) - +- VeloxResizeBatches (89) - +- ^ ProjectExecTransformer (87) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (86) - :- ^ InputIteratorTransformer (76) - : +- ShuffleQueryStage (74) - : +- ColumnarExchange (73) - : +- VeloxResizeBatches (72) - : +- ^ ProjectExecTransformer (70) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (69) + VeloxColumnarToRow (95) + +- ^ SortExecTransformer (93) + +- ^ InputIteratorTransformer (92) + +- ShuffleQueryStage (90) + +- ColumnarExchange (89) + +- VeloxResizeBatches (88) + +- ^ ProjectExecTransformer (86) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (85) + :- ^ InputIteratorTransformer (75) + : +- ShuffleQueryStage (73) + : +- ColumnarExchange (72) + : +- VeloxResizeBatches (71) + : +- ^ ProjectExecTransformer (69) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (68) : :- ^ InputIteratorTransformer (9) : : +- ShuffleQueryStage (7) : : +- ColumnarExchange (6) @@ -22,12 +22,12 @@ AdaptiveSparkPlan (146) : : +- ^ ProjectExecTransformer (3) : : +- ^ FilterExecTransformer (2) : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (68) - : +- ShuffleQueryStage (66) - : +- ColumnarExchange (65) - : +- VeloxResizeBatches (64) - : +- ^ ProjectExecTransformer (62) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (61) + : +- ^ InputIteratorTransformer (67) + : +- ShuffleQueryStage (65) + : +- ColumnarExchange (64) + : +- VeloxResizeBatches (63) + : +- ^ ProjectExecTransformer (61) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (60) : :- ^ InputIteratorTransformer (35) : : +- ShuffleQueryStage (33) : : +- ColumnarExchange (32) @@ -48,83 +48,82 @@ AdaptiveSparkPlan (146) : : +- ^ ProjectExecTransformer (21) : : +- ^ FilterExecTransformer (20) : : +- ^ ScanTransformer parquet (19) - : +- ^ InputIteratorTransformer (60) - : +- ShuffleQueryStage (58) - : +- ColumnarExchange (57) - : +- VeloxResizeBatches (56) - : +- ^ ProjectExecTransformer (54) - : +- ^ FilterExecTransformer (53) - : +- ^ ProjectExecTransformer (52) - : +- ^ RegularHashAggregateExecTransformer (51) - : +- ^ RegularHashAggregateExecTransformer (50) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (49) - : :- ^ InputIteratorTransformer (44) - : : +- ShuffleQueryStage (42) - : : +- ColumnarExchange (41) - : : +- VeloxResizeBatches (40) - : : +- ^ ProjectExecTransformer (38) - : : +- ^ FilterExecTransformer (37) - : : +- ^ ScanTransformer parquet (36) - : +- ^ InputIteratorTransformer (48) - : +- ShuffleQueryStage (46) - : +- ReusedExchange (45) - +- ^ InputIteratorTransformer (85) - +- ShuffleQueryStage (83) - +- ColumnarExchange (82) - +- VeloxResizeBatches (81) - +- ^ ProjectExecTransformer (79) - +- ^ FilterExecTransformer (78) - +- ^ ScanTransformer parquet (77) + : +- ^ InputIteratorTransformer (59) + : +- ShuffleQueryStage (57) + : +- ColumnarExchange (56) + : +- VeloxResizeBatches (55) + : +- ^ ProjectExecTransformer (53) + : +- ^ FilterExecTransformer (52) + : +- ^ ProjectExecTransformer (51) + : +- ^ RegularHashAggregateExecTransformer (50) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (49) + : :- ^ InputIteratorTransformer (44) + : : +- ShuffleQueryStage (42) + : : +- ColumnarExchange (41) + : : +- VeloxResizeBatches (40) + : : +- ^ ProjectExecTransformer (38) + : : +- ^ FilterExecTransformer (37) + : : +- ^ ScanTransformer parquet (36) + : +- ^ InputIteratorTransformer (48) + : +- ShuffleQueryStage (46) + : +- ReusedExchange (45) + +- ^ InputIteratorTransformer (84) + +- ShuffleQueryStage (82) + +- ColumnarExchange (81) + +- VeloxResizeBatches (80) + +- ^ ProjectExecTransformer (78) + +- ^ FilterExecTransformer (77) + +- ^ ScanTransformer parquet (76) +- == Initial Plan == - Sort (145) - +- Exchange (144) - +- Project (143) - +- SortMergeJoin Inner (142) - :- Sort (136) - : +- Exchange (135) - : +- Project (134) - : +- SortMergeJoin LeftSemi (133) - : :- Sort (100) - : : +- Exchange (99) - : : +- Filter (98) - : : +- Scan parquet (97) - : +- Sort (132) - : +- Exchange (131) - : +- Project (130) - : +- SortMergeJoin Inner (129) - : :- Sort (112) - : : +- Exchange (111) - : : +- SortMergeJoin LeftSemi (110) - : : :- Sort (104) - : : : +- Exchange (103) - : : : +- Filter (102) - : : : +- Scan parquet (101) - : : +- Sort (109) - : : +- Exchange (108) - : : +- Project (107) - : : +- Filter (106) - : : +- Scan parquet (105) - : +- Sort (128) - : +- Exchange (127) - : +- Filter (126) - : +- HashAggregate (125) - : +- HashAggregate (124) - : +- SortMergeJoin LeftSemi (123) - : :- Sort (117) - : : +- Exchange (116) - : : +- Project (115) - : : +- Filter (114) - : : +- Scan parquet (113) - : +- Sort (122) - : +- Exchange (121) - : +- Project (120) - : +- Filter (119) - : +- Scan parquet (118) - +- Sort (141) - +- Exchange (140) - +- Project (139) - +- Filter (138) - +- Scan parquet (137) + Sort (144) + +- Exchange (143) + +- Project (142) + +- SortMergeJoin Inner (141) + :- Sort (135) + : +- Exchange (134) + : +- Project (133) + : +- SortMergeJoin LeftSemi (132) + : :- Sort (99) + : : +- Exchange (98) + : : +- Filter (97) + : : +- Scan parquet (96) + : +- Sort (131) + : +- Exchange (130) + : +- Project (129) + : +- SortMergeJoin Inner (128) + : :- Sort (111) + : : +- Exchange (110) + : : +- SortMergeJoin LeftSemi (109) + : : :- Sort (103) + : : : +- Exchange (102) + : : : +- Filter (101) + : : : +- Scan parquet (100) + : : +- Sort (108) + : : +- Exchange (107) + : : +- Project (106) + : : +- Filter (105) + : : +- Scan parquet (104) + : +- Sort (127) + : +- Exchange (126) + : +- Filter (125) + : +- HashAggregate (124) + : +- HashAggregate (123) + : +- SortMergeJoin LeftSemi (122) + : :- Sort (116) + : : +- Exchange (115) + : : +- Project (114) + : : +- Filter (113) + : : +- Scan parquet (112) + : +- Sort (121) + : +- Exchange (120) + : +- Project (119) + : +- Filter (118) + : +- Scan parquet (117) + +- Sort (140) + +- Exchange (139) + +- Project (138) + +- Filter (137) + +- Scan parquet (136) (1) ScanTransformer parquet @@ -327,417 +326,410 @@ Join condition: None (50) RegularHashAggregateExecTransformer Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] -Functions [1]: [partial_sum(l_quantity#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] - -(51) RegularHashAggregateExecTransformer -Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] -(52) ProjectExecTransformer +(51) ProjectExecTransformer Output [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3), true) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Input [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] -(53) FilterExecTransformer +(52) FilterExecTransformer Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: isnotnull((0.5 * sum(l_quantity))#X) -(54) ProjectExecTransformer +(53) ProjectExecTransformer Output [4]: [hash(l_partkey#X, l_suppkey#X, 42) AS hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(55) WholeStageCodegenTransformer (X) +(54) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: false -(56) VeloxResizeBatches +(55) VeloxResizeBatches Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: X, X -(57) ColumnarExchange +(56) ColumnarExchange Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X], [plan_id=X], [shuffle_writer_type=hash] -(58) ShuffleQueryStage +(57) ShuffleQueryStage Output [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: X -(59) InputAdapter +(58) InputAdapter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(60) InputIteratorTransformer +(59) InputIteratorTransformer Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(61) ShuffledHashJoinExecTransformer +(60) ShuffledHashJoinExecTransformer Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(cast(ps_availqty#X as decimal(10,0)) as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(62) ProjectExecTransformer +(61) ProjectExecTransformer Output [2]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(63) WholeStageCodegenTransformer (X) +(62) WholeStageCodegenTransformer (X) Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: false -(64) VeloxResizeBatches +(63) VeloxResizeBatches Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: X, X -(65) ColumnarExchange +(64) ColumnarExchange Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X], [plan_id=X], [shuffle_writer_type=hash] -(66) ShuffleQueryStage +(65) ShuffleQueryStage Output [1]: [ps_suppkey#X] Arguments: X -(67) InputAdapter +(66) InputAdapter Input [1]: [ps_suppkey#X] -(68) InputIteratorTransformer +(67) InputIteratorTransformer Input [1]: [ps_suppkey#X] -(69) ShuffledHashJoinExecTransformer +(68) ShuffledHashJoinExecTransformer Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(70) ProjectExecTransformer +(69) ProjectExecTransformer Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(71) WholeStageCodegenTransformer (X) +(70) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: false -(72) VeloxResizeBatches +(71) VeloxResizeBatches Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: X, X -(73) ColumnarExchange +(72) ColumnarExchange Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [s_name#X, s_address#X, s_nationkey#X], [plan_id=X], [shuffle_writer_type=hash] -(74) ShuffleQueryStage +(73) ShuffleQueryStage Output [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: X -(75) InputAdapter +(74) InputAdapter Input [3]: [s_name#X, s_address#X, s_nationkey#X] -(76) InputIteratorTransformer +(75) InputIteratorTransformer Input [3]: [s_name#X, s_address#X, s_nationkey#X] -(77) ScanTransformer parquet +(76) ScanTransformer parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(78) FilterExecTransformer +(77) FilterExecTransformer Input [2]: [n_nationkey#X, n_name#X] Arguments: ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(79) ProjectExecTransformer +(78) ProjectExecTransformer Output [2]: [hash(n_nationkey#X, 42) AS hash_partition_key#X, n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(80) WholeStageCodegenTransformer (X) +(79) WholeStageCodegenTransformer (X) Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: false -(81) VeloxResizeBatches +(80) VeloxResizeBatches Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: X, X -(82) ColumnarExchange +(81) ColumnarExchange Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [n_nationkey#X], [plan_id=X], [shuffle_writer_type=hash] -(83) ShuffleQueryStage +(82) ShuffleQueryStage Output [1]: [n_nationkey#X] Arguments: X -(84) InputAdapter +(83) InputAdapter Input [1]: [n_nationkey#X] -(85) InputIteratorTransformer +(84) InputIteratorTransformer Input [1]: [n_nationkey#X] -(86) ShuffledHashJoinExecTransformer +(85) ShuffledHashJoinExecTransformer Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(87) ProjectExecTransformer +(86) ProjectExecTransformer Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(88) WholeStageCodegenTransformer (X) +(87) WholeStageCodegenTransformer (X) Input [2]: [s_name#X, s_address#X] Arguments: false -(89) VeloxResizeBatches +(88) VeloxResizeBatches Input [2]: [s_name#X, s_address#X] Arguments: X, X -(90) ColumnarExchange +(89) ColumnarExchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [shuffle_writer_type=hash] -(91) ShuffleQueryStage +(90) ShuffleQueryStage Output [2]: [s_name#X, s_address#X] Arguments: X -(92) InputAdapter +(91) InputAdapter Input [2]: [s_name#X, s_address#X] -(93) InputIteratorTransformer +(92) InputIteratorTransformer Input [2]: [s_name#X, s_address#X] -(94) SortExecTransformer +(93) SortExecTransformer Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(95) WholeStageCodegenTransformer (X) +(94) WholeStageCodegenTransformer (X) Input [2]: [s_name#X, s_address#X] Arguments: false -(96) VeloxColumnarToRow +(95) VeloxColumnarToRow Input [2]: [s_name#X, s_address#X] -(97) Scan parquet +(96) Scan parquet Output [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_nationkey)] ReadSchema: struct -(98) Filter +(97) Filter Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Condition : isnotnull(s_nationkey#X) -(99) Exchange +(98) Exchange Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Sort +(99) Sort Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 -(101) Scan parquet +(100) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(102) Filter +(101) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(103) Exchange +(102) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(104) Sort +(103) Sort Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 -(105) Scan parquet +(104) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(106) Filter +(105) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(107) Project +(106) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(108) Exchange +(107) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) Sort +(108) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(110) SortMergeJoin +(109) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(111) Exchange +(110) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(112) Sort +(111) Sort Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 -(113) Scan parquet +(112) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(114) Filter +(113) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(115) Project +(114) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(116) Exchange +(115) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Sort +(116) Sort Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(118) Scan parquet +(117) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(119) Filter +(118) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(120) Project +(119) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(121) Exchange +(120) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) Sort +(121) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(123) SortMergeJoin +(122) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(124) HashAggregate +(123) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(125) HashAggregate +(124) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3), true) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(126) Filter +(125) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(127) Exchange +(126) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(128) Sort +(127) Sort Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 -(129) SortMergeJoin +(128) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(cast(ps_availqty#X as decimal(10,0)) as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(130) Project +(129) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(131) Exchange +(130) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) Sort +(131) Sort Input [1]: [ps_suppkey#X] Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 -(133) SortMergeJoin +(132) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(134) Project +(133) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(135) Exchange +(134) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(136) Sort +(135) Sort Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 -(137) Scan parquet +(136) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(138) Filter +(137) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(139) Project +(138) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(140) Exchange +(139) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(141) Sort +(140) Sort Input [1]: [n_nationkey#X] Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 -(142) SortMergeJoin +(141) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(143) Project +(142) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(144) Exchange +(143) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(145) Sort +(144) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(146) AdaptiveSparkPlan +(145) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt index c4df05de1618f..2e427b6bf3ca1 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt @@ -1,64 +1,63 @@ == Physical Plan == -AdaptiveSparkPlan (67) +AdaptiveSparkPlan (66) +- == Final Plan == - VeloxColumnarToRow (43) - +- TakeOrderedAndProjectExecTransformer (42) - +- ^ ProjectExecTransformer (40) - +- ^ RegularHashAggregateExecTransformer (39) - +- ^ RegularHashAggregateExecTransformer (38) - +- ^ ProjectExecTransformer (37) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (36) - :- ^ InputIteratorTransformer (26) - : +- ShuffleQueryStage (24) - : +- ColumnarExchange (23) - : +- VeloxResizeBatches (22) - : +- ^ ProjectExecTransformer (20) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (18) - : +- ShuffleQueryStage (16) - : +- ColumnarExchange (15) - : +- VeloxResizeBatches (14) - : +- ^ ProjectExecTransformer (12) - : +- ^ FilterExecTransformer (11) - : +- ^ ScanTransformer parquet (10) - +- ^ InputIteratorTransformer (35) - +- ShuffleQueryStage (33) - +- ColumnarExchange (32) - +- VeloxResizeBatches (31) - +- ^ ProjectExecTransformer (29) - +- ^ FilterExecTransformer (28) - +- ^ ScanTransformer parquet (27) + VeloxColumnarToRow (42) + +- TakeOrderedAndProjectExecTransformer (41) + +- ^ ProjectExecTransformer (39) + +- ^ RegularHashAggregateExecTransformer (38) + +- ^ ProjectExecTransformer (37) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (36) + :- ^ InputIteratorTransformer (26) + : +- ShuffleQueryStage (24) + : +- ColumnarExchange (23) + : +- VeloxResizeBatches (22) + : +- ^ ProjectExecTransformer (20) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ ScanTransformer parquet (1) + : +- ^ InputIteratorTransformer (18) + : +- ShuffleQueryStage (16) + : +- ColumnarExchange (15) + : +- VeloxResizeBatches (14) + : +- ^ ProjectExecTransformer (12) + : +- ^ FilterExecTransformer (11) + : +- ^ ScanTransformer parquet (10) + +- ^ InputIteratorTransformer (35) + +- ShuffleQueryStage (33) + +- ColumnarExchange (32) + +- VeloxResizeBatches (31) + +- ^ ProjectExecTransformer (29) + +- ^ FilterExecTransformer (28) + +- ^ ScanTransformer parquet (27) +- == Initial Plan == - TakeOrderedAndProject (66) - +- HashAggregate (65) - +- HashAggregate (64) - +- Project (63) - +- SortMergeJoin Inner (62) - :- Sort (56) - : +- Exchange (55) - : +- Project (54) - : +- SortMergeJoin Inner (53) - : :- Sort (48) - : : +- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Sort (52) - : +- Exchange (51) - : +- Filter (50) - : +- Scan parquet (49) - +- Sort (61) - +- Exchange (60) - +- Project (59) - +- Filter (58) - +- Scan parquet (57) + TakeOrderedAndProject (65) + +- HashAggregate (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (55) + : +- Exchange (54) + : +- Project (53) + : +- SortMergeJoin Inner (52) + : :- Sort (47) + : : +- Exchange (46) + : : +- Project (45) + : : +- Filter (44) + : : +- Scan parquet (43) + : +- Sort (51) + : +- Exchange (50) + : +- Filter (49) + : +- Scan parquet (48) + +- Sort (60) + +- Exchange (59) + +- Project (58) + +- Filter (57) + +- Scan parquet (56) (1) ScanTransformer parquet @@ -215,141 +214,134 @@ Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_exten (38) RegularHashAggregateExecTransformer Input [6]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] - -(39) RegularHashAggregateExecTransformer -Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] -(40) ProjectExecTransformer +(39) ProjectExecTransformer Output [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X, o_orderdate#X, o_shippriority#X] Input [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] -(41) WholeStageCodegenTransformer (X) +(40) WholeStageCodegenTransformer (X) Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: false -(42) TakeOrderedAndProjectExecTransformer +(41) TakeOrderedAndProjectExecTransformer Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X], 0 -(43) VeloxColumnarToRow +(42) VeloxColumnarToRow Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(44) Scan parquet +(43) Scan parquet Output [2]: [c_custkey#X, c_mktsegment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_mktsegment), EqualTo(c_mktsegment,BUILDING), IsNotNull(c_custkey)] ReadSchema: struct -(45) Filter +(44) Filter Input [2]: [c_custkey#X, c_mktsegment#X] Condition : ((isnotnull(c_mktsegment#X) AND (c_mktsegment#X = BUILDING)) AND isnotnull(c_custkey#X)) -(46) Project +(45) Project Output [1]: [c_custkey#X] Input [2]: [c_custkey#X, c_mktsegment#X] -(47) Exchange +(46) Exchange Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(47) Sort Input [1]: [c_custkey#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(49) Scan parquet +(48) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(50) Filter +(49) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(51) Exchange +(50) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(51) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(53) SortMergeJoin +(52) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(54) Project +(53) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(55) Exchange +(54) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Sort +(55) Sort Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(57) Scan parquet +(56) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(58) Filter +(57) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(59) Project +(58) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(60) Exchange +(59) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(61) Sort +(60) Sort Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(62) SortMergeJoin +(61) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(63) Project +(62) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(64) HashAggregate +(63) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(65) HashAggregate +(64) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(66) TakeOrderedAndProject +(65) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(67) AdaptiveSparkPlan +(66) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt index cc1f4d31b697b..55d544b898c64 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt @@ -424,55 +424,54 @@ Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (136) +AdaptiveSparkPlan (135) +- == Final Plan == - VeloxColumnarToRow (114) - +- ^ ProjectExecTransformer (112) - +- ^ RegularHashAggregateExecTransformer (111) - +- ^ RegularHashAggregateExecTransformer (110) - +- ^ ProjectExecTransformer (109) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (108) - :- ^ InputIteratorTransformer (103) - : +- ShuffleQueryStage (101), Statistics(X) - : +- ColumnarExchange (100) - : +- VeloxResizeBatches (99) - : +- ^ ProjectExecTransformer (97) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) - : :- ^ InputIteratorTransformer (91) - : : +- ShuffleQueryStage (89), Statistics(X) - : : +- ColumnarExchange (88) - : : +- VeloxResizeBatches (87) - : : +- ^ ProjectExecTransformer (85) - : : +- ^ FilterExecTransformer (84) - : : +- ^ ScanTransformer parquet (83) - : +- ^ InputIteratorTransformer (95) - : +- ShuffleQueryStage (93), Statistics(X) - : +- ReusedExchange (92) - +- ^ InputIteratorTransformer (107) - +- ShuffleQueryStage (105), Statistics(X) - +- ReusedExchange (104) + VeloxColumnarToRow (113) + +- ^ ProjectExecTransformer (111) + +- ^ RegularHashAggregateExecTransformer (110) + +- ^ ProjectExecTransformer (109) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (108) + :- ^ InputIteratorTransformer (103) + : +- ShuffleQueryStage (101), Statistics(X) + : +- ColumnarExchange (100) + : +- VeloxResizeBatches (99) + : +- ^ ProjectExecTransformer (97) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) + : :- ^ InputIteratorTransformer (91) + : : +- ShuffleQueryStage (89), Statistics(X) + : : +- ColumnarExchange (88) + : : +- VeloxResizeBatches (87) + : : +- ^ ProjectExecTransformer (85) + : : +- ^ FilterExecTransformer (84) + : : +- ^ ScanTransformer parquet (83) + : +- ^ InputIteratorTransformer (95) + : +- ShuffleQueryStage (93), Statistics(X) + : +- ReusedExchange (92) + +- ^ InputIteratorTransformer (107) + +- ShuffleQueryStage (105), Statistics(X) + +- ReusedExchange (104) +- == Initial Plan == - HashAggregate (135) - +- HashAggregate (134) - +- Project (133) - +- SortMergeJoin Inner (132) - :- Sort (126) - : +- Exchange (125) - : +- Project (124) - : +- SortMergeJoin Inner (123) - : :- Sort (118) - : : +- Exchange (117) - : : +- Filter (116) - : : +- Scan parquet (115) - : +- Sort (122) - : +- Exchange (121) - : +- Filter (120) - : +- Scan parquet (119) - +- Sort (131) - +- Exchange (130) - +- Project (129) - +- Filter (128) - +- Scan parquet (127) + HashAggregate (134) + +- HashAggregate (133) + +- Project (132) + +- SortMergeJoin Inner (131) + :- Sort (125) + : +- Exchange (124) + : +- Project (123) + : +- SortMergeJoin Inner (122) + : :- Sort (117) + : : +- Exchange (116) + : : +- Filter (115) + : : +- Scan parquet (114) + : +- Sort (121) + : +- Exchange (120) + : +- Filter (119) + : +- Scan parquet (118) + +- Sort (130) + +- Exchange (129) + +- Project (128) + +- Filter (127) + +- Scan parquet (126) (83) ScanTransformer parquet @@ -581,129 +580,122 @@ Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] (110) RegularHashAggregateExecTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, _pre_X#X] Keys: [] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(111) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] -Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] -(112) ProjectExecTransformer +(111) ProjectExecTransformer Output [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X as decimal(38,10))) * 0.0001000000), DecimalType(38,6)) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Input [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] -(113) WholeStageCodegenTransformer (X) +(112) WholeStageCodegenTransformer (X) Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: false -(114) VeloxColumnarToRow +(113) VeloxColumnarToRow Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(115) Scan parquet +(114) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(116) Filter +(115) Filter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Condition : isnotnull(ps_suppkey#X) -(117) Exchange +(116) Exchange Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(118) Sort +(117) Sort Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 -(119) Scan parquet +(118) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(120) Filter +(119) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(121) Exchange +(120) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) Sort +(121) Sort Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 -(123) SortMergeJoin +(122) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(124) Project +(123) Project Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(125) Exchange +(124) Exchange Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) Sort +(125) Sort Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 -(127) Scan parquet +(126) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(128) Filter +(127) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(129) Project +(128) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(130) Exchange +(129) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(131) Sort +(130) Sort Input [1]: [n_nationkey#X] Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 -(132) SortMergeJoin +(131) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(133) Project +(132) Project Output [2]: [ps_availqty#X, ps_supplycost#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(134) HashAggregate +(133) HashAggregate Input [2]: [ps_availqty#X, ps_supplycost#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(135) HashAggregate +(134) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X as decimal(38,10))) * 0.0001000000), DecimalType(38,6)) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(136) AdaptiveSparkPlan +(135) AdaptiveSparkPlan Output [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt index bd84f11c6ac55..7c3509c3e726b 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt @@ -1,55 +1,54 @@ == Physical Plan == -AdaptiveSparkPlan (58) +AdaptiveSparkPlan (57) +- == Final Plan == - VeloxColumnarToRow (40) - +- ^ SortExecTransformer (38) - +- ^ InputIteratorTransformer (37) - +- ShuffleQueryStage (35), Statistics(X) - +- ColumnarExchange (34) - +- VeloxResizeBatches (33) - +- ^ RegularHashAggregateExecTransformer (31) - +- ^ InputIteratorTransformer (30) - +- ShuffleQueryStage (28), Statistics(X) - +- ColumnarExchange (27) - +- VeloxResizeBatches (26) - +- ^ ProjectExecTransformer (24) - +- ^ FlushableHashAggregateExecTransformer (23) - +- ^ ProjectExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ RegularHashAggregateExecTransformer (20) - +- ^ ProjectExecTransformer (19) - +- ^ ShuffledHashJoinExecTransformer LeftOuter BuildLeft (18) - :- ^ InputIteratorTransformer (8) - : +- ShuffleQueryStage (6), Statistics(X) - : +- ColumnarExchange (5) - : +- VeloxResizeBatches (4) - : +- ^ ProjectExecTransformer (2) - : +- ^ ScanTransformer parquet (1) - +- ^ InputIteratorTransformer (17) - +- ShuffleQueryStage (15), Statistics(X) - +- ColumnarExchange (14) - +- VeloxResizeBatches (13) - +- ^ ProjectExecTransformer (11) - +- ^ FilterExecTransformer (10) - +- ^ ScanTransformer parquet (9) + VeloxColumnarToRow (39) + +- ^ SortExecTransformer (37) + +- ^ InputIteratorTransformer (36) + +- ShuffleQueryStage (34), Statistics(X) + +- ColumnarExchange (33) + +- VeloxResizeBatches (32) + +- ^ RegularHashAggregateExecTransformer (30) + +- ^ InputIteratorTransformer (29) + +- ShuffleQueryStage (27), Statistics(X) + +- ColumnarExchange (26) + +- VeloxResizeBatches (25) + +- ^ ProjectExecTransformer (23) + +- ^ FlushableHashAggregateExecTransformer (22) + +- ^ ProjectExecTransformer (21) + +- ^ RegularHashAggregateExecTransformer (20) + +- ^ ProjectExecTransformer (19) + +- ^ ShuffledHashJoinExecTransformer LeftOuter BuildLeft (18) + :- ^ InputIteratorTransformer (8) + : +- ShuffleQueryStage (6), Statistics(X) + : +- ColumnarExchange (5) + : +- VeloxResizeBatches (4) + : +- ^ ProjectExecTransformer (2) + : +- ^ ScanTransformer parquet (1) + +- ^ InputIteratorTransformer (17) + +- ShuffleQueryStage (15), Statistics(X) + +- ColumnarExchange (14) + +- VeloxResizeBatches (13) + +- ^ ProjectExecTransformer (11) + +- ^ FilterExecTransformer (10) + +- ^ ScanTransformer parquet (9) +- == Initial Plan == - Sort (57) - +- Exchange (56) - +- HashAggregate (55) - +- Exchange (54) - +- HashAggregate (53) - +- HashAggregate (52) - +- HashAggregate (51) - +- Project (50) - +- SortMergeJoin LeftOuter (49) - :- Sort (43) - : +- Exchange (42) - : +- Scan parquet (41) - +- Sort (48) - +- Exchange (47) - +- Project (46) - +- Filter (45) - +- Scan parquet (44) + Sort (56) + +- Exchange (55) + +- HashAggregate (54) + +- Exchange (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin LeftOuter (48) + :- Sort (42) + : +- Exchange (41) + : +- Scan parquet (40) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) ScanTransformer parquet @@ -133,180 +132,173 @@ Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] (20) RegularHashAggregateExecTransformer Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] -Functions [1]: [partial_count(o_orderkey#X)] -Aggregate Attributes [1]: [count#X] -Results [2]: [c_custkey#X, count#X] - -(21) RegularHashAggregateExecTransformer -Input [2]: [c_custkey#X, count#X] -Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [2]: [c_custkey#X, count(o_orderkey#X)#X] -(22) ProjectExecTransformer +(21) ProjectExecTransformer Output [1]: [count(o_orderkey#X)#X AS c_count#X] Input [2]: [c_custkey#X, count(o_orderkey#X)#X] -(23) FlushableHashAggregateExecTransformer +(22) FlushableHashAggregateExecTransformer Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(24) ProjectExecTransformer +(23) ProjectExecTransformer Output [3]: [hash(c_count#X, 42) AS hash_partition_key#X, c_count#X, count#X] Input [2]: [c_count#X, count#X] -(25) WholeStageCodegenTransformer (X) +(24) WholeStageCodegenTransformer (X) Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: false -(26) VeloxResizeBatches +(25) VeloxResizeBatches Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: X, X -(27) ColumnarExchange +(26) ColumnarExchange Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [c_count#X, count#X], [plan_id=X], [shuffle_writer_type=hash] -(28) ShuffleQueryStage +(27) ShuffleQueryStage Output [2]: [c_count#X, count#X] Arguments: X -(29) InputAdapter +(28) InputAdapter Input [2]: [c_count#X, count#X] -(30) InputIteratorTransformer +(29) InputIteratorTransformer Input [2]: [c_count#X, count#X] -(31) RegularHashAggregateExecTransformer +(30) RegularHashAggregateExecTransformer Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(32) WholeStageCodegenTransformer (X) +(31) WholeStageCodegenTransformer (X) Input [2]: [c_count#X, custdist#X] Arguments: false -(33) VeloxResizeBatches +(32) VeloxResizeBatches Input [2]: [c_count#X, custdist#X] Arguments: X, X -(34) ColumnarExchange +(33) ColumnarExchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [shuffle_writer_type=hash] -(35) ShuffleQueryStage +(34) ShuffleQueryStage Output [2]: [c_count#X, custdist#X] Arguments: X -(36) InputAdapter +(35) InputAdapter Input [2]: [c_count#X, custdist#X] -(37) InputIteratorTransformer +(36) InputIteratorTransformer Input [2]: [c_count#X, custdist#X] -(38) SortExecTransformer +(37) SortExecTransformer Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(39) WholeStageCodegenTransformer (X) +(38) WholeStageCodegenTransformer (X) Input [2]: [c_count#X, custdist#X] Arguments: false -(40) VeloxColumnarToRow +(39) VeloxColumnarToRow Input [2]: [c_count#X, custdist#X] -(41) Scan parquet +(40) Scan parquet Output [1]: [c_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(41) Exchange Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Sort +(42) Sort Input [1]: [c_custkey#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(44) Scan parquet +(43) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(45) Filter +(44) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(46) Project +(45) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(47) Exchange +(46) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(47) Sort Input [2]: [o_orderkey#X, o_custkey#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(49) SortMergeJoin +(48) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(50) Project +(49) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(51) HashAggregate +(50) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(52) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(53) HashAggregate +(52) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(54) Exchange +(53) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) HashAggregate +(54) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(56) Exchange +(55) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(57) Sort +(56) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(58) AdaptiveSparkPlan +(57) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt index 5fdc418d02efe..1161b32937bc6 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt @@ -1,40 +1,39 @@ == Physical Plan == -AdaptiveSparkPlan (39) +AdaptiveSparkPlan (38) +- == Final Plan == - VeloxColumnarToRow (25) - +- ^ ProjectExecTransformer (23) - +- ^ RegularHashAggregateExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ ProjectExecTransformer (20) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) - :- ^ InputIteratorTransformer (9) - : +- ShuffleQueryStage (7), Statistics(X) - : +- ColumnarExchange (6) - : +- VeloxResizeBatches (5) - : +- ^ ProjectExecTransformer (3) - : +- ^ FilterExecTransformer (2) - : +- ^ ScanTransformer parquet (1) - +- ^ InputIteratorTransformer (18) - +- ShuffleQueryStage (16), Statistics(X) - +- ColumnarExchange (15) - +- VeloxResizeBatches (14) - +- ^ ProjectExecTransformer (12) - +- ^ FilterExecTransformer (11) - +- ^ ScanTransformer parquet (10) + VeloxColumnarToRow (24) + +- ^ ProjectExecTransformer (22) + +- ^ RegularHashAggregateExecTransformer (21) + +- ^ ProjectExecTransformer (20) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) + :- ^ InputIteratorTransformer (9) + : +- ShuffleQueryStage (7), Statistics(X) + : +- ColumnarExchange (6) + : +- VeloxResizeBatches (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ ScanTransformer parquet (1) + +- ^ InputIteratorTransformer (18) + +- ShuffleQueryStage (16), Statistics(X) + +- ColumnarExchange (15) + +- VeloxResizeBatches (14) + +- ^ ProjectExecTransformer (12) + +- ^ FilterExecTransformer (11) + +- ^ ScanTransformer parquet (10) +- == Initial Plan == - HashAggregate (38) - +- HashAggregate (37) - +- Project (36) - +- SortMergeJoin Inner (35) - :- Sort (30) - : +- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Sort (34) - +- Exchange (33) - +- Filter (32) - +- Scan parquet (31) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) ScanTransformer parquet @@ -123,93 +122,86 @@ Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] (21) RegularHashAggregateExecTransformer Input [5]: [l_extendedprice#X, l_discount#X, p_type#X, _pre_X#X, _pre_X#X] Keys: [] -Functions [2]: [partial_sum(_pre_X#X), partial_sum(_pre_X#X)] -Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] - -(22) RegularHashAggregateExecTransformer -Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -Keys: [] -Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] +Functions [2]: [sum(_pre_X#X), sum(_pre_X#X)] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(23) ProjectExecTransformer +(22) ProjectExecTransformer Output [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X)), DecimalType(38,6))) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X as decimal(38,6)))), DecimalType(38,6)) AS promo_revenue#X] Input [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(24) WholeStageCodegenTransformer (X) +(23) WholeStageCodegenTransformer (X) Input [1]: [promo_revenue#X] Arguments: false -(25) VeloxColumnarToRow +(24) VeloxColumnarToRow Input [1]: [promo_revenue#X] -(26) Scan parquet +(25) Scan parquet Output [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-09-01), LessThan(l_shipdate,1995-10-01), IsNotNull(l_partkey)] ReadSchema: struct -(27) Filter +(26) Filter Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-09-01)) AND (l_shipdate#X < 1995-10-01)) AND isnotnull(l_partkey#X)) -(28) Project +(27) Project Output [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(29) Exchange +(28) Exchange Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Sort +(29) Sort Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(31) Scan parquet +(30) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(32) Filter +(31) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(33) Exchange +(32) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(34) Sort +(33) Sort Input [2]: [p_partkey#X, p_type#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(35) SortMergeJoin +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(36) Project +(35) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(37) HashAggregate +(36) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(38) HashAggregate +(37) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X)), DecimalType(38,6))) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X as decimal(38,6)))), DecimalType(38,6)) AS promo_revenue#X] -(39) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt index 2d327719c3bdd..398cc1f5b8b90 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt @@ -257,31 +257,30 @@ Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 22 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (73) +AdaptiveSparkPlan (72) +- == Final Plan == - VeloxColumnarToRow (64) - +- ^ RegularHashAggregateExecTransformer (62) - +- ^ RegularHashAggregateExecTransformer (61) - +- ^ ProjectExecTransformer (60) - +- ^ RegularHashAggregateExecTransformer (59) - +- ^ InputIteratorTransformer (58) - +- ShuffleQueryStage (56), Statistics(X) - +- ColumnarExchange (55) - +- VeloxResizeBatches (54) - +- ^ ProjectExecTransformer (52) - +- ^ FlushableHashAggregateExecTransformer (51) - +- ^ ProjectExecTransformer (50) - +- ^ FilterExecTransformer (49) - +- ^ ScanTransformer parquet (48) + VeloxColumnarToRow (63) + +- ^ RegularHashAggregateExecTransformer (61) + +- ^ ProjectExecTransformer (60) + +- ^ RegularHashAggregateExecTransformer (59) + +- ^ InputIteratorTransformer (58) + +- ShuffleQueryStage (56), Statistics(X) + +- ColumnarExchange (55) + +- VeloxResizeBatches (54) + +- ^ ProjectExecTransformer (52) + +- ^ FlushableHashAggregateExecTransformer (51) + +- ^ ProjectExecTransformer (50) + +- ^ FilterExecTransformer (49) + +- ^ ScanTransformer parquet (48) +- == Initial Plan == - HashAggregate (72) - +- HashAggregate (71) - +- HashAggregate (70) - +- Exchange (69) - +- HashAggregate (68) - +- Project (67) - +- Filter (66) - +- Scan parquet (65) + HashAggregate (71) + +- HashAggregate (70) + +- HashAggregate (69) + +- Exchange (68) + +- HashAggregate (67) + +- Project (66) + +- Filter (65) + +- Scan parquet (64) (48) ScanTransformer parquet @@ -346,71 +345,64 @@ Input [2]: [l_suppkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedpri (61) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] -Functions [1]: [partial_max(total_revenue#X)] -Aggregate Attributes [1]: [max#X] -Results [1]: [max#X] - -(62) RegularHashAggregateExecTransformer -Input [1]: [max#X] -Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(63) WholeStageCodegenTransformer (X) +(62) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(64) VeloxColumnarToRow +(63) VeloxColumnarToRow Input [1]: [max(total_revenue)#X] -(65) Scan parquet +(64) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(66) Filter +(65) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(67) Project +(66) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(68) HashAggregate +(67) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(69) Exchange +(68) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(70) HashAggregate +(69) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] -(71) HashAggregate +(70) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(72) HashAggregate +(71) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(73) AdaptiveSparkPlan +(72) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt index c5033fa773a54..843ce9b39eb6d 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt @@ -1,62 +1,61 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (62) +- == Final Plan == - VeloxColumnarToRow (40) - +- ^ ProjectExecTransformer (38) - +- ^ RegularHashAggregateExecTransformer (37) - +- ^ RegularHashAggregateExecTransformer (36) - +- ^ ProjectExecTransformer (35) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (34) - :- ^ ProjectExecTransformer (20) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7), Statistics(X) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (18) - : +- ShuffleQueryStage (16), Statistics(X) - : +- ColumnarExchange (15) - : +- VeloxResizeBatches (14) - : +- ^ ProjectExecTransformer (12) - : +- ^ FilterExecTransformer (11) - : +- ^ ScanTransformer parquet (10) - +- ^ FilterExecTransformer (33) - +- ^ ProjectExecTransformer (32) - +- ^ RegularHashAggregateExecTransformer (31) - +- ^ InputIteratorTransformer (30) - +- ShuffleQueryStage (28), Statistics(X) - +- ColumnarExchange (27) - +- VeloxResizeBatches (26) - +- ^ ProjectExecTransformer (24) - +- ^ FlushableHashAggregateExecTransformer (23) - +- ^ FilterExecTransformer (22) - +- ^ ScanTransformer parquet (21) + VeloxColumnarToRow (39) + +- ^ ProjectExecTransformer (37) + +- ^ RegularHashAggregateExecTransformer (36) + +- ^ ProjectExecTransformer (35) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (34) + :- ^ ProjectExecTransformer (20) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7), Statistics(X) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ ScanTransformer parquet (1) + : +- ^ InputIteratorTransformer (18) + : +- ShuffleQueryStage (16), Statistics(X) + : +- ColumnarExchange (15) + : +- VeloxResizeBatches (14) + : +- ^ ProjectExecTransformer (12) + : +- ^ FilterExecTransformer (11) + : +- ^ ScanTransformer parquet (10) + +- ^ FilterExecTransformer (33) + +- ^ ProjectExecTransformer (32) + +- ^ RegularHashAggregateExecTransformer (31) + +- ^ InputIteratorTransformer (30) + +- ShuffleQueryStage (28), Statistics(X) + +- ColumnarExchange (27) + +- VeloxResizeBatches (26) + +- ^ ProjectExecTransformer (24) + +- ^ FlushableHashAggregateExecTransformer (23) + +- ^ FilterExecTransformer (22) + +- ^ ScanTransformer parquet (21) +- == Initial Plan == - HashAggregate (62) - +- HashAggregate (61) - +- Project (60) - +- SortMergeJoin Inner (59) - :- Project (51) - : +- SortMergeJoin Inner (50) - : :- Sort (44) - : : +- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Sort (49) - : +- Exchange (48) - : +- Project (47) - : +- Filter (46) - : +- Scan parquet (45) - +- Sort (58) - +- Filter (57) - +- HashAggregate (56) - +- Exchange (55) - +- HashAggregate (54) - +- Filter (53) - +- Scan parquet (52) + HashAggregate (61) + +- HashAggregate (60) + +- Project (59) + +- SortMergeJoin Inner (58) + :- Project (50) + : +- SortMergeJoin Inner (49) + : :- Sort (43) + : : +- Exchange (42) + : : +- Filter (41) + : : +- Scan parquet (40) + : +- Sort (48) + : +- Exchange (47) + : +- Project (46) + : +- Filter (45) + : +- Scan parquet (44) + +- Sort (57) + +- Filter (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- Filter (52) + +- Scan parquet (51) (1) ScanTransformer parquet @@ -213,139 +212,132 @@ Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity) (36) RegularHashAggregateExecTransformer Input [1]: [l_extendedprice#X] Keys: [] -Functions [1]: [partial_sum(l_extendedprice#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(37) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [sum(l_extendedprice#X)#X] -(38) ProjectExecTransformer +(37) ProjectExecTransformer Output [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6)) AS avg_yearly#X] Input [1]: [sum(l_extendedprice#X)#X] -(39) WholeStageCodegenTransformer (X) +(38) WholeStageCodegenTransformer (X) Input [1]: [avg_yearly#X] Arguments: false -(40) VeloxColumnarToRow +(39) VeloxColumnarToRow Input [1]: [avg_yearly#X] -(41) Scan parquet +(40) Scan parquet Output [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_quantity)] ReadSchema: struct -(42) Filter +(41) Filter Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) -(43) Exchange +(42) Exchange Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Sort +(43) Sort Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(45) Scan parquet +(44) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(46) Filter +(45) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(47) Project +(46) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(48) Exchange +(47) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(48) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(50) SortMergeJoin +(49) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(51) Project +(50) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(52) Scan parquet +(51) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(53) Filter +(52) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(54) HashAggregate +(53) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(55) Exchange +(54) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) HashAggregate +(55) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [CheckOverflow((0.200000 * promote_precision(avg(l_quantity#X)#X)), DecimalType(18,7)) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(57) Filter +(56) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(58) Sort +(57) Sort Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(59) SortMergeJoin +(58) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(60) Project +(59) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(61) HashAggregate +(60) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(62) HashAggregate +(61) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6)) AS avg_yearly#X] -(63) AdaptiveSparkPlan +(62) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt index b07b354959229..1119ddcf80bee 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt @@ -1,102 +1,101 @@ == Physical Plan == -AdaptiveSparkPlan (110) +AdaptiveSparkPlan (109) +- == Final Plan == - VeloxColumnarToRow (70) - +- TakeOrderedAndProjectExecTransformer (69) - +- ^ RegularHashAggregateExecTransformer (67) - +- ^ RegularHashAggregateExecTransformer (66) - +- ^ ProjectExecTransformer (65) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (64) - :- ^ InputIteratorTransformer (46) - : +- ShuffleQueryStage (44), Statistics(X) - : +- ColumnarExchange (43) - : +- VeloxResizeBatches (42) - : +- ^ ProjectExecTransformer (40) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (39) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7), Statistics(X) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (38) - : +- ShuffleQueryStage (36), Statistics(X) - : +- ColumnarExchange (35) - : +- VeloxResizeBatches (34) - : +- ^ ProjectExecTransformer (32) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (31) - : :- ^ InputIteratorTransformer (18) - : : +- ShuffleQueryStage (16), Statistics(X) - : : +- ColumnarExchange (15) - : : +- VeloxResizeBatches (14) - : : +- ^ ProjectExecTransformer (12) - : : +- ^ FilterExecTransformer (11) - : : +- ^ ScanTransformer parquet (10) - : +- ^ ProjectExecTransformer (30) - : +- ^ FilterExecTransformer (29) - : +- ^ RegularHashAggregateExecTransformer (28) - : +- ^ InputIteratorTransformer (27) - : +- ShuffleQueryStage (25), Statistics(X) - : +- ColumnarExchange (24) - : +- VeloxResizeBatches (23) - : +- ^ ProjectExecTransformer (21) - : +- ^ FlushableHashAggregateExecTransformer (20) - : +- ^ ScanTransformer parquet (19) - +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (63) - :- ^ InputIteratorTransformer (55) - : +- ShuffleQueryStage (53), Statistics(X) - : +- ColumnarExchange (52) - : +- VeloxResizeBatches (51) - : +- ^ ProjectExecTransformer (49) - : +- ^ FilterExecTransformer (48) - : +- ^ ScanTransformer parquet (47) - +- ^ ProjectExecTransformer (62) - +- ^ FilterExecTransformer (61) - +- ^ RegularHashAggregateExecTransformer (60) - +- ^ InputIteratorTransformer (59) - +- ShuffleQueryStage (57), Statistics(X) - +- ReusedExchange (56) + VeloxColumnarToRow (69) + +- TakeOrderedAndProjectExecTransformer (68) + +- ^ RegularHashAggregateExecTransformer (66) + +- ^ ProjectExecTransformer (65) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (64) + :- ^ InputIteratorTransformer (46) + : +- ShuffleQueryStage (44), Statistics(X) + : +- ColumnarExchange (43) + : +- VeloxResizeBatches (42) + : +- ^ ProjectExecTransformer (40) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (39) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7), Statistics(X) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ ScanTransformer parquet (1) + : +- ^ InputIteratorTransformer (38) + : +- ShuffleQueryStage (36), Statistics(X) + : +- ColumnarExchange (35) + : +- VeloxResizeBatches (34) + : +- ^ ProjectExecTransformer (32) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (31) + : :- ^ InputIteratorTransformer (18) + : : +- ShuffleQueryStage (16), Statistics(X) + : : +- ColumnarExchange (15) + : : +- VeloxResizeBatches (14) + : : +- ^ ProjectExecTransformer (12) + : : +- ^ FilterExecTransformer (11) + : : +- ^ ScanTransformer parquet (10) + : +- ^ ProjectExecTransformer (30) + : +- ^ FilterExecTransformer (29) + : +- ^ RegularHashAggregateExecTransformer (28) + : +- ^ InputIteratorTransformer (27) + : +- ShuffleQueryStage (25), Statistics(X) + : +- ColumnarExchange (24) + : +- VeloxResizeBatches (23) + : +- ^ ProjectExecTransformer (21) + : +- ^ FlushableHashAggregateExecTransformer (20) + : +- ^ ScanTransformer parquet (19) + +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (63) + :- ^ InputIteratorTransformer (55) + : +- ShuffleQueryStage (53), Statistics(X) + : +- ColumnarExchange (52) + : +- VeloxResizeBatches (51) + : +- ^ ProjectExecTransformer (49) + : +- ^ FilterExecTransformer (48) + : +- ^ ScanTransformer parquet (47) + +- ^ ProjectExecTransformer (62) + +- ^ FilterExecTransformer (61) + +- ^ RegularHashAggregateExecTransformer (60) + +- ^ InputIteratorTransformer (59) + +- ShuffleQueryStage (57), Statistics(X) + +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (109) - +- HashAggregate (108) - +- HashAggregate (107) - +- Project (106) - +- SortMergeJoin Inner (105) - :- Sort (92) - : +- Exchange (91) - : +- Project (90) - : +- SortMergeJoin Inner (89) - : :- Sort (74) - : : +- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Sort (88) - : +- Exchange (87) - : +- SortMergeJoin LeftSemi (86) - : :- Sort (78) - : : +- Exchange (77) - : : +- Filter (76) - : : +- Scan parquet (75) - : +- Sort (85) - : +- Project (84) - : +- Filter (83) - : +- HashAggregate (82) - : +- Exchange (81) - : +- HashAggregate (80) - : +- Scan parquet (79) - +- SortMergeJoin LeftSemi (104) - :- Sort (96) - : +- Exchange (95) - : +- Filter (94) - : +- Scan parquet (93) - +- Sort (103) - +- Project (102) - +- Filter (101) - +- HashAggregate (100) - +- Exchange (99) - +- HashAggregate (98) - +- Scan parquet (97) + TakeOrderedAndProject (108) + +- HashAggregate (107) + +- HashAggregate (106) + +- Project (105) + +- SortMergeJoin Inner (104) + :- Sort (91) + : +- Exchange (90) + : +- Project (89) + : +- SortMergeJoin Inner (88) + : :- Sort (73) + : : +- Exchange (72) + : : +- Filter (71) + : : +- Scan parquet (70) + : +- Sort (87) + : +- Exchange (86) + : +- SortMergeJoin LeftSemi (85) + : :- Sort (77) + : : +- Exchange (76) + : : +- Filter (75) + : : +- Scan parquet (74) + : +- Sort (84) + : +- Project (83) + : +- Filter (82) + : +- HashAggregate (81) + : +- Exchange (80) + : +- HashAggregate (79) + : +- Scan parquet (78) + +- SortMergeJoin LeftSemi (103) + :- Sort (95) + : +- Exchange (94) + : +- Filter (93) + : +- Scan parquet (92) + +- Sort (102) + +- Project (101) + +- Filter (100) + +- HashAggregate (99) + +- Exchange (98) + +- HashAggregate (97) + +- Scan parquet (96) (1) ScanTransformer parquet @@ -371,219 +370,212 @@ Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, (66) RegularHashAggregateExecTransformer Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] -Functions [1]: [partial_sum(l_quantity#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] - -(67) RegularHashAggregateExecTransformer -Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(68) WholeStageCodegenTransformer (X) +(67) WholeStageCodegenTransformer (X) Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: false -(69) TakeOrderedAndProjectExecTransformer +(68) TakeOrderedAndProjectExecTransformer Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X], 0 -(70) VeloxColumnarToRow +(69) VeloxColumnarToRow Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(71) Scan parquet +(70) Scan parquet Output [2]: [c_custkey#X, c_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey)] ReadSchema: struct -(72) Filter +(71) Filter Input [2]: [c_custkey#X, c_name#X] Condition : isnotnull(c_custkey#X) -(73) Exchange +(72) Exchange Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Sort +(73) Sort Input [2]: [c_custkey#X, c_name#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(75) Scan parquet +(74) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(76) Filter +(75) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(77) Exchange +(76) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Sort +(77) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(79) Scan parquet +(78) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(80) HashAggregate +(79) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(81) Exchange +(80) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) HashAggregate +(81) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(83) Filter +(82) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(84) Project +(83) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(85) Sort +(84) Sort Input [1]: [l_orderkey#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(86) SortMergeJoin +(85) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(87) Exchange +(86) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Sort +(87) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(89) SortMergeJoin +(88) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(90) Project +(89) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(91) Exchange +(90) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) Sort +(91) Sort Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(93) Scan parquet +(92) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(94) Filter +(93) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(95) Exchange +(94) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(96) Sort +(95) Sort Input [2]: [l_orderkey#X, l_quantity#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(97) Scan parquet +(96) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(98) HashAggregate +(97) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(99) Exchange +(98) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) HashAggregate +(99) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(101) Filter +(100) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(102) Project +(101) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(103) Sort +(102) Sort Input [1]: [l_orderkey#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(104) SortMergeJoin +(103) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(105) SortMergeJoin +(104) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(106) Project +(105) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(107) HashAggregate +(106) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(108) HashAggregate +(107) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(109) TakeOrderedAndProject +(108) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(110) AdaptiveSparkPlan +(109) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt index 348cc4ce85d5d..31341db339e1f 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt @@ -1,39 +1,38 @@ == Physical Plan == -AdaptiveSparkPlan (38) +AdaptiveSparkPlan (37) +- == Final Plan == - VeloxColumnarToRow (24) - +- ^ RegularHashAggregateExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ ProjectExecTransformer (20) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) - :- ^ InputIteratorTransformer (9) - : +- ShuffleQueryStage (7), Statistics(X) - : +- ColumnarExchange (6) - : +- VeloxResizeBatches (5) - : +- ^ ProjectExecTransformer (3) - : +- ^ FilterExecTransformer (2) - : +- ^ ScanTransformer parquet (1) - +- ^ InputIteratorTransformer (18) - +- ShuffleQueryStage (16), Statistics(X) - +- ColumnarExchange (15) - +- VeloxResizeBatches (14) - +- ^ ProjectExecTransformer (12) - +- ^ FilterExecTransformer (11) - +- ^ ScanTransformer parquet (10) + VeloxColumnarToRow (23) + +- ^ RegularHashAggregateExecTransformer (21) + +- ^ ProjectExecTransformer (20) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) + :- ^ InputIteratorTransformer (9) + : +- ShuffleQueryStage (7), Statistics(X) + : +- ColumnarExchange (6) + : +- VeloxResizeBatches (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ ScanTransformer parquet (1) + +- ^ InputIteratorTransformer (18) + +- ShuffleQueryStage (16), Statistics(X) + +- ColumnarExchange (15) + +- VeloxResizeBatches (14) + +- ^ ProjectExecTransformer (12) + +- ^ FilterExecTransformer (11) + +- ^ ScanTransformer parquet (10) +- == Initial Plan == - HashAggregate (37) - +- HashAggregate (36) - +- Project (35) - +- SortMergeJoin Inner (34) - :- Sort (29) - : +- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Sort (33) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (36) + +- HashAggregate (35) + +- Project (34) + +- SortMergeJoin Inner (33) + :- Sort (28) + : +- Exchange (27) + : +- Project (26) + : +- Filter (25) + : +- Scan parquet (24) + +- Sort (32) + +- Exchange (31) + +- Filter (30) + +- Scan parquet (29) (1) ScanTransformer parquet @@ -122,89 +121,82 @@ Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partke (21) RegularHashAggregateExecTransformer Input [3]: [l_extendedprice#X, l_discount#X, _pre_X#X] Keys: [] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(22) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X] -(23) WholeStageCodegenTransformer (X) +(22) WholeStageCodegenTransformer (X) Input [1]: [revenue#X] Arguments: false -(24) VeloxColumnarToRow +(23) VeloxColumnarToRow Input [1]: [revenue#X] -(25) Scan parquet +(24) Scan parquet Output [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipinstruct), In(l_shipmode, [AIR,AIR REG]), EqualTo(l_shipinstruct,DELIVER IN PERSON), IsNotNull(l_partkey), Or(Or(And(GreaterThanOrEqual(l_quantity,1.00),LessThanOrEqual(l_quantity,11.00)),And(GreaterThanOrEqual(l_quantity,10.00),LessThanOrEqual(l_quantity,20.00))),And(GreaterThanOrEqual(l_quantity,20.00),LessThanOrEqual(l_quantity,30.00)))] ReadSchema: struct -(26) Filter +(25) Filter Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] Condition : ((((isnotnull(l_shipinstruct#X) AND l_shipmode#X IN (AIR,AIR REG)) AND (l_shipinstruct#X = DELIVER IN PERSON)) AND isnotnull(l_partkey#X)) AND ((((l_quantity#X >= 1.00) AND (l_quantity#X <= 11.00)) OR ((l_quantity#X >= 10.00) AND (l_quantity#X <= 20.00))) OR ((l_quantity#X >= 20.00) AND (l_quantity#X <= 30.00)))) -(27) Project +(26) Project Output [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] -(28) Exchange +(27) Exchange Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Sort +(28) Sort Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(30) Scan parquet +(29) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(31) Filter +(30) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(32) Exchange +(31) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) Sort +(32) Sort Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(34) SortMergeJoin +(33) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(35) Project +(34) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(36) HashAggregate +(35) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(37) HashAggregate +(36) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X] -(38) AdaptiveSparkPlan +(37) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt index 703c4a99d63c3..9a2c56040b38b 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt @@ -1,19 +1,19 @@ == Physical Plan == -AdaptiveSparkPlan (143) +AdaptiveSparkPlan (142) +- == Final Plan == - VeloxColumnarToRow (93) - +- AQEShuffleRead (92) - +- ShuffleQueryStage (91), Statistics(X) - +- ColumnarExchange (90) - +- VeloxResizeBatches (89) - +- ^ ProjectExecTransformer (87) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (86) - :- ^ InputIteratorTransformer (76) - : +- ShuffleQueryStage (74), Statistics(X) - : +- ColumnarExchange (73) - : +- VeloxResizeBatches (72) - : +- ^ ProjectExecTransformer (70) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (69) + VeloxColumnarToRow (92) + +- AQEShuffleRead (91) + +- ShuffleQueryStage (90), Statistics(X) + +- ColumnarExchange (89) + +- VeloxResizeBatches (88) + +- ^ ProjectExecTransformer (86) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (85) + :- ^ InputIteratorTransformer (75) + : +- ShuffleQueryStage (73), Statistics(X) + : +- ColumnarExchange (72) + : +- VeloxResizeBatches (71) + : +- ^ ProjectExecTransformer (69) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (68) : :- ^ InputIteratorTransformer (9) : : +- ShuffleQueryStage (7), Statistics(X) : : +- ColumnarExchange (6) @@ -21,12 +21,12 @@ AdaptiveSparkPlan (143) : : +- ^ ProjectExecTransformer (3) : : +- ^ FilterExecTransformer (2) : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (68) - : +- ShuffleQueryStage (66), Statistics(X) - : +- ColumnarExchange (65) - : +- VeloxResizeBatches (64) - : +- ^ ProjectExecTransformer (62) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (61) + : +- ^ InputIteratorTransformer (67) + : +- ShuffleQueryStage (65), Statistics(X) + : +- ColumnarExchange (64) + : +- VeloxResizeBatches (63) + : +- ^ ProjectExecTransformer (61) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (60) : :- ^ InputIteratorTransformer (35) : : +- ShuffleQueryStage (33), Statistics(X) : : +- ColumnarExchange (32) @@ -47,83 +47,82 @@ AdaptiveSparkPlan (143) : : +- ^ ProjectExecTransformer (21) : : +- ^ FilterExecTransformer (20) : : +- ^ ScanTransformer parquet (19) - : +- ^ InputIteratorTransformer (60) - : +- ShuffleQueryStage (58), Statistics(X) - : +- ColumnarExchange (57) - : +- VeloxResizeBatches (56) - : +- ^ ProjectExecTransformer (54) - : +- ^ FilterExecTransformer (53) - : +- ^ ProjectExecTransformer (52) - : +- ^ RegularHashAggregateExecTransformer (51) - : +- ^ RegularHashAggregateExecTransformer (50) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (49) - : :- ^ InputIteratorTransformer (44) - : : +- ShuffleQueryStage (42), Statistics(X) - : : +- ColumnarExchange (41) - : : +- VeloxResizeBatches (40) - : : +- ^ ProjectExecTransformer (38) - : : +- ^ FilterExecTransformer (37) - : : +- ^ ScanTransformer parquet (36) - : +- ^ InputIteratorTransformer (48) - : +- ShuffleQueryStage (46), Statistics(X) - : +- ReusedExchange (45) - +- ^ InputIteratorTransformer (85) - +- ShuffleQueryStage (83), Statistics(X) - +- ColumnarExchange (82) - +- VeloxResizeBatches (81) - +- ^ ProjectExecTransformer (79) - +- ^ FilterExecTransformer (78) - +- ^ ScanTransformer parquet (77) + : +- ^ InputIteratorTransformer (59) + : +- ShuffleQueryStage (57), Statistics(X) + : +- ColumnarExchange (56) + : +- VeloxResizeBatches (55) + : +- ^ ProjectExecTransformer (53) + : +- ^ FilterExecTransformer (52) + : +- ^ ProjectExecTransformer (51) + : +- ^ RegularHashAggregateExecTransformer (50) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (49) + : :- ^ InputIteratorTransformer (44) + : : +- ShuffleQueryStage (42), Statistics(X) + : : +- ColumnarExchange (41) + : : +- VeloxResizeBatches (40) + : : +- ^ ProjectExecTransformer (38) + : : +- ^ FilterExecTransformer (37) + : : +- ^ ScanTransformer parquet (36) + : +- ^ InputIteratorTransformer (48) + : +- ShuffleQueryStage (46), Statistics(X) + : +- ReusedExchange (45) + +- ^ InputIteratorTransformer (84) + +- ShuffleQueryStage (82), Statistics(X) + +- ColumnarExchange (81) + +- VeloxResizeBatches (80) + +- ^ ProjectExecTransformer (78) + +- ^ FilterExecTransformer (77) + +- ^ ScanTransformer parquet (76) +- == Initial Plan == - Sort (142) - +- Exchange (141) - +- Project (140) - +- SortMergeJoin Inner (139) - :- Sort (133) - : +- Exchange (132) - : +- Project (131) - : +- SortMergeJoin LeftSemi (130) - : :- Sort (97) - : : +- Exchange (96) - : : +- Filter (95) - : : +- Scan parquet (94) - : +- Sort (129) - : +- Exchange (128) - : +- Project (127) - : +- SortMergeJoin Inner (126) - : :- Sort (109) - : : +- Exchange (108) - : : +- SortMergeJoin LeftSemi (107) - : : :- Sort (101) - : : : +- Exchange (100) - : : : +- Filter (99) - : : : +- Scan parquet (98) - : : +- Sort (106) - : : +- Exchange (105) - : : +- Project (104) - : : +- Filter (103) - : : +- Scan parquet (102) - : +- Sort (125) - : +- Exchange (124) - : +- Filter (123) - : +- HashAggregate (122) - : +- HashAggregate (121) - : +- SortMergeJoin LeftSemi (120) - : :- Sort (114) - : : +- Exchange (113) - : : +- Project (112) - : : +- Filter (111) - : : +- Scan parquet (110) - : +- Sort (119) - : +- Exchange (118) - : +- Project (117) - : +- Filter (116) - : +- Scan parquet (115) - +- Sort (138) - +- Exchange (137) - +- Project (136) - +- Filter (135) - +- Scan parquet (134) + Sort (141) + +- Exchange (140) + +- Project (139) + +- SortMergeJoin Inner (138) + :- Sort (132) + : +- Exchange (131) + : +- Project (130) + : +- SortMergeJoin LeftSemi (129) + : :- Sort (96) + : : +- Exchange (95) + : : +- Filter (94) + : : +- Scan parquet (93) + : +- Sort (128) + : +- Exchange (127) + : +- Project (126) + : +- SortMergeJoin Inner (125) + : :- Sort (108) + : : +- Exchange (107) + : : +- SortMergeJoin LeftSemi (106) + : : :- Sort (100) + : : : +- Exchange (99) + : : : +- Filter (98) + : : : +- Scan parquet (97) + : : +- Sort (105) + : : +- Exchange (104) + : : +- Project (103) + : : +- Filter (102) + : : +- Scan parquet (101) + : +- Sort (124) + : +- Exchange (123) + : +- Filter (122) + : +- HashAggregate (121) + : +- HashAggregate (120) + : +- SortMergeJoin LeftSemi (119) + : :- Sort (113) + : : +- Exchange (112) + : : +- Project (111) + : : +- Filter (110) + : : +- Scan parquet (109) + : +- Sort (118) + : +- Exchange (117) + : +- Project (116) + : +- Filter (115) + : +- Scan parquet (114) + +- Sort (137) + +- Exchange (136) + +- Project (135) + +- Filter (134) + +- Scan parquet (133) (1) ScanTransformer parquet @@ -326,407 +325,400 @@ Join condition: None (50) RegularHashAggregateExecTransformer Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] -Functions [1]: [partial_sum(l_quantity#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] - -(51) RegularHashAggregateExecTransformer -Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] -(52) ProjectExecTransformer +(51) ProjectExecTransformer Output [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3)) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Input [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] -(53) FilterExecTransformer +(52) FilterExecTransformer Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: isnotnull((0.5 * sum(l_quantity))#X) -(54) ProjectExecTransformer +(53) ProjectExecTransformer Output [4]: [hash(l_partkey#X, l_suppkey#X, 42) AS hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(55) WholeStageCodegenTransformer (X) +(54) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: false -(56) VeloxResizeBatches +(55) VeloxResizeBatches Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: X, X -(57) ColumnarExchange +(56) ColumnarExchange Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X], [plan_id=X], [shuffle_writer_type=hash] -(58) ShuffleQueryStage +(57) ShuffleQueryStage Output [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: X -(59) InputAdapter +(58) InputAdapter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(60) InputIteratorTransformer +(59) InputIteratorTransformer Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(61) ShuffledHashJoinExecTransformer +(60) ShuffledHashJoinExecTransformer Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(62) ProjectExecTransformer +(61) ProjectExecTransformer Output [2]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(63) WholeStageCodegenTransformer (X) +(62) WholeStageCodegenTransformer (X) Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: false -(64) VeloxResizeBatches +(63) VeloxResizeBatches Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: X, X -(65) ColumnarExchange +(64) ColumnarExchange Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X], [plan_id=X], [shuffle_writer_type=hash] -(66) ShuffleQueryStage +(65) ShuffleQueryStage Output [1]: [ps_suppkey#X] Arguments: X -(67) InputAdapter +(66) InputAdapter Input [1]: [ps_suppkey#X] -(68) InputIteratorTransformer +(67) InputIteratorTransformer Input [1]: [ps_suppkey#X] -(69) ShuffledHashJoinExecTransformer +(68) ShuffledHashJoinExecTransformer Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(70) ProjectExecTransformer +(69) ProjectExecTransformer Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(71) WholeStageCodegenTransformer (X) +(70) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: false -(72) VeloxResizeBatches +(71) VeloxResizeBatches Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: X, X -(73) ColumnarExchange +(72) ColumnarExchange Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [s_name#X, s_address#X, s_nationkey#X], [plan_id=X], [shuffle_writer_type=hash] -(74) ShuffleQueryStage +(73) ShuffleQueryStage Output [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: X -(75) InputAdapter +(74) InputAdapter Input [3]: [s_name#X, s_address#X, s_nationkey#X] -(76) InputIteratorTransformer +(75) InputIteratorTransformer Input [3]: [s_name#X, s_address#X, s_nationkey#X] -(77) ScanTransformer parquet +(76) ScanTransformer parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(78) FilterExecTransformer +(77) FilterExecTransformer Input [2]: [n_nationkey#X, n_name#X] Arguments: ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(79) ProjectExecTransformer +(78) ProjectExecTransformer Output [2]: [hash(n_nationkey#X, 42) AS hash_partition_key#X, n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(80) WholeStageCodegenTransformer (X) +(79) WholeStageCodegenTransformer (X) Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: false -(81) VeloxResizeBatches +(80) VeloxResizeBatches Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: X, X -(82) ColumnarExchange +(81) ColumnarExchange Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [n_nationkey#X], [plan_id=X], [shuffle_writer_type=hash] -(83) ShuffleQueryStage +(82) ShuffleQueryStage Output [1]: [n_nationkey#X] Arguments: X -(84) InputAdapter +(83) InputAdapter Input [1]: [n_nationkey#X] -(85) InputIteratorTransformer +(84) InputIteratorTransformer Input [1]: [n_nationkey#X] -(86) ShuffledHashJoinExecTransformer +(85) ShuffledHashJoinExecTransformer Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(87) ProjectExecTransformer +(86) ProjectExecTransformer Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(88) WholeStageCodegenTransformer (X) +(87) WholeStageCodegenTransformer (X) Input [2]: [s_name#X, s_address#X] Arguments: false -(89) VeloxResizeBatches +(88) VeloxResizeBatches Input [2]: [s_name#X, s_address#X] Arguments: X, X -(90) ColumnarExchange +(89) ColumnarExchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [shuffle_writer_type=hash] -(91) ShuffleQueryStage +(90) ShuffleQueryStage Output [2]: [s_name#X, s_address#X] Arguments: X -(92) AQEShuffleRead +(91) AQEShuffleRead Input [2]: [s_name#X, s_address#X] Arguments: local -(93) VeloxColumnarToRow +(92) VeloxColumnarToRow Input [2]: [s_name#X, s_address#X] -(94) Scan parquet +(93) Scan parquet Output [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_nationkey)] ReadSchema: struct -(95) Filter +(94) Filter Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Condition : isnotnull(s_nationkey#X) -(96) Exchange +(95) Exchange Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) Sort +(96) Sort Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 -(98) Scan parquet +(97) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(99) Filter +(98) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(100) Exchange +(99) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(101) Sort +(100) Sort Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 -(102) Scan parquet +(101) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(103) Filter +(102) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(104) Project +(103) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(105) Exchange +(104) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Sort +(105) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(107) SortMergeJoin +(106) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(108) Exchange +(107) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) Sort +(108) Sort Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 -(110) Scan parquet +(109) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(111) Filter +(110) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(112) Project +(111) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(113) Exchange +(112) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) Sort +(113) Sort Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(115) Scan parquet +(114) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(116) Filter +(115) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(117) Project +(116) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(118) Exchange +(117) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(119) Sort +(118) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(120) SortMergeJoin +(119) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(121) HashAggregate +(120) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(122) HashAggregate +(121) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3)) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(123) Filter +(122) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(124) Exchange +(123) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) Sort +(124) Sort Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 -(126) SortMergeJoin +(125) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(127) Project +(126) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(128) Exchange +(127) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Sort +(128) Sort Input [1]: [ps_suppkey#X] Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 -(130) SortMergeJoin +(129) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(131) Project +(130) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(132) Exchange +(131) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(133) Sort +(132) Sort Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 -(134) Scan parquet +(133) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(135) Filter +(134) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(136) Project +(135) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(137) Exchange +(136) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) Sort +(137) Sort Input [1]: [n_nationkey#X] Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 -(139) SortMergeJoin +(138) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(140) Project +(139) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(141) Exchange +(140) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) Sort +(141) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(143) AdaptiveSparkPlan +(142) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt index f6dc9c8015ea3..d09fe3e19a9b1 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt @@ -1,64 +1,63 @@ == Physical Plan == -AdaptiveSparkPlan (67) +AdaptiveSparkPlan (66) +- == Final Plan == - VeloxColumnarToRow (43) - +- TakeOrderedAndProjectExecTransformer (42) - +- ^ ProjectExecTransformer (40) - +- ^ RegularHashAggregateExecTransformer (39) - +- ^ RegularHashAggregateExecTransformer (38) - +- ^ ProjectExecTransformer (37) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (36) - :- ^ InputIteratorTransformer (26) - : +- ShuffleQueryStage (24), Statistics(X) - : +- ColumnarExchange (23) - : +- VeloxResizeBatches (22) - : +- ^ ProjectExecTransformer (20) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7), Statistics(X) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (18) - : +- ShuffleQueryStage (16), Statistics(X) - : +- ColumnarExchange (15) - : +- VeloxResizeBatches (14) - : +- ^ ProjectExecTransformer (12) - : +- ^ FilterExecTransformer (11) - : +- ^ ScanTransformer parquet (10) - +- ^ InputIteratorTransformer (35) - +- ShuffleQueryStage (33), Statistics(X) - +- ColumnarExchange (32) - +- VeloxResizeBatches (31) - +- ^ ProjectExecTransformer (29) - +- ^ FilterExecTransformer (28) - +- ^ ScanTransformer parquet (27) + VeloxColumnarToRow (42) + +- TakeOrderedAndProjectExecTransformer (41) + +- ^ ProjectExecTransformer (39) + +- ^ RegularHashAggregateExecTransformer (38) + +- ^ ProjectExecTransformer (37) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (36) + :- ^ InputIteratorTransformer (26) + : +- ShuffleQueryStage (24), Statistics(X) + : +- ColumnarExchange (23) + : +- VeloxResizeBatches (22) + : +- ^ ProjectExecTransformer (20) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7), Statistics(X) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ ScanTransformer parquet (1) + : +- ^ InputIteratorTransformer (18) + : +- ShuffleQueryStage (16), Statistics(X) + : +- ColumnarExchange (15) + : +- VeloxResizeBatches (14) + : +- ^ ProjectExecTransformer (12) + : +- ^ FilterExecTransformer (11) + : +- ^ ScanTransformer parquet (10) + +- ^ InputIteratorTransformer (35) + +- ShuffleQueryStage (33), Statistics(X) + +- ColumnarExchange (32) + +- VeloxResizeBatches (31) + +- ^ ProjectExecTransformer (29) + +- ^ FilterExecTransformer (28) + +- ^ ScanTransformer parquet (27) +- == Initial Plan == - TakeOrderedAndProject (66) - +- HashAggregate (65) - +- HashAggregate (64) - +- Project (63) - +- SortMergeJoin Inner (62) - :- Sort (56) - : +- Exchange (55) - : +- Project (54) - : +- SortMergeJoin Inner (53) - : :- Sort (48) - : : +- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Sort (52) - : +- Exchange (51) - : +- Filter (50) - : +- Scan parquet (49) - +- Sort (61) - +- Exchange (60) - +- Project (59) - +- Filter (58) - +- Scan parquet (57) + TakeOrderedAndProject (65) + +- HashAggregate (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (55) + : +- Exchange (54) + : +- Project (53) + : +- SortMergeJoin Inner (52) + : :- Sort (47) + : : +- Exchange (46) + : : +- Project (45) + : : +- Filter (44) + : : +- Scan parquet (43) + : +- Sort (51) + : +- Exchange (50) + : +- Filter (49) + : +- Scan parquet (48) + +- Sort (60) + +- Exchange (59) + +- Project (58) + +- Filter (57) + +- Scan parquet (56) (1) ScanTransformer parquet @@ -215,141 +214,134 @@ Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_exten (38) RegularHashAggregateExecTransformer Input [6]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] - -(39) RegularHashAggregateExecTransformer -Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(40) ProjectExecTransformer +(39) ProjectExecTransformer Output [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] Input [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(41) WholeStageCodegenTransformer (X) +(40) WholeStageCodegenTransformer (X) Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: false -(42) TakeOrderedAndProjectExecTransformer +(41) TakeOrderedAndProjectExecTransformer Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X], 0 -(43) VeloxColumnarToRow +(42) VeloxColumnarToRow Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(44) Scan parquet +(43) Scan parquet Output [2]: [c_custkey#X, c_mktsegment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_mktsegment), EqualTo(c_mktsegment,BUILDING), IsNotNull(c_custkey)] ReadSchema: struct -(45) Filter +(44) Filter Input [2]: [c_custkey#X, c_mktsegment#X] Condition : ((isnotnull(c_mktsegment#X) AND (c_mktsegment#X = BUILDING)) AND isnotnull(c_custkey#X)) -(46) Project +(45) Project Output [1]: [c_custkey#X] Input [2]: [c_custkey#X, c_mktsegment#X] -(47) Exchange +(46) Exchange Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(47) Sort Input [1]: [c_custkey#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(49) Scan parquet +(48) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(50) Filter +(49) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(51) Exchange +(50) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(51) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(53) SortMergeJoin +(52) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(54) Project +(53) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(55) Exchange +(54) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Sort +(55) Sort Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(57) Scan parquet +(56) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(58) Filter +(57) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(59) Project +(58) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(60) Exchange +(59) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(61) Sort +(60) Sort Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(62) SortMergeJoin +(61) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(63) Project +(62) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(64) HashAggregate +(63) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(65) HashAggregate +(64) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(66) TakeOrderedAndProject +(65) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(67) AdaptiveSparkPlan +(66) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt index 5278039ec3851..e2d5e58eba47f 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt @@ -428,55 +428,54 @@ Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (136) +AdaptiveSparkPlan (135) +- == Final Plan == - VeloxColumnarToRow (114) - +- ^ ProjectExecTransformer (112) - +- ^ RegularHashAggregateExecTransformer (111) - +- ^ RegularHashAggregateExecTransformer (110) - +- ^ ProjectExecTransformer (109) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (108) - :- ^ InputIteratorTransformer (103) - : +- ShuffleQueryStage (101), Statistics(X) - : +- ColumnarExchange (100) - : +- VeloxResizeBatches (99) - : +- ^ ProjectExecTransformer (97) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) - : :- ^ InputIteratorTransformer (91) - : : +- ShuffleQueryStage (89), Statistics(X) - : : +- ColumnarExchange (88) - : : +- VeloxResizeBatches (87) - : : +- ^ ProjectExecTransformer (85) - : : +- ^ FilterExecTransformer (84) - : : +- ^ ScanTransformer parquet (83) - : +- ^ InputIteratorTransformer (95) - : +- ShuffleQueryStage (93), Statistics(X) - : +- ReusedExchange (92) - +- ^ InputIteratorTransformer (107) - +- ShuffleQueryStage (105), Statistics(X) - +- ReusedExchange (104) + VeloxColumnarToRow (113) + +- ^ ProjectExecTransformer (111) + +- ^ RegularHashAggregateExecTransformer (110) + +- ^ ProjectExecTransformer (109) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (108) + :- ^ InputIteratorTransformer (103) + : +- ShuffleQueryStage (101), Statistics(X) + : +- ColumnarExchange (100) + : +- VeloxResizeBatches (99) + : +- ^ ProjectExecTransformer (97) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) + : :- ^ InputIteratorTransformer (91) + : : +- ShuffleQueryStage (89), Statistics(X) + : : +- ColumnarExchange (88) + : : +- VeloxResizeBatches (87) + : : +- ^ ProjectExecTransformer (85) + : : +- ^ FilterExecTransformer (84) + : : +- ^ ScanTransformer parquet (83) + : +- ^ InputIteratorTransformer (95) + : +- ShuffleQueryStage (93), Statistics(X) + : +- ReusedExchange (92) + +- ^ InputIteratorTransformer (107) + +- ShuffleQueryStage (105), Statistics(X) + +- ReusedExchange (104) +- == Initial Plan == - HashAggregate (135) - +- HashAggregate (134) - +- Project (133) - +- SortMergeJoin Inner (132) - :- Sort (126) - : +- Exchange (125) - : +- Project (124) - : +- SortMergeJoin Inner (123) - : :- Sort (118) - : : +- Exchange (117) - : : +- Filter (116) - : : +- Scan parquet (115) - : +- Sort (122) - : +- Exchange (121) - : +- Filter (120) - : +- Scan parquet (119) - +- Sort (131) - +- Exchange (130) - +- Project (129) - +- Filter (128) - +- Scan parquet (127) + HashAggregate (134) + +- HashAggregate (133) + +- Project (132) + +- SortMergeJoin Inner (131) + :- Sort (125) + : +- Exchange (124) + : +- Project (123) + : +- SortMergeJoin Inner (122) + : :- Sort (117) + : : +- Exchange (116) + : : +- Filter (115) + : : +- Scan parquet (114) + : +- Sort (121) + : +- Exchange (120) + : +- Filter (119) + : +- Scan parquet (118) + +- Sort (130) + +- Exchange (129) + +- Project (128) + +- Filter (127) + +- Scan parquet (126) (83) ScanTransformer parquet @@ -587,131 +586,124 @@ Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] (110) RegularHashAggregateExecTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, _pre_X#X] Keys: [] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(111) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] -Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] -(112) ProjectExecTransformer +(111) ProjectExecTransformer Output [1]: [(sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Input [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] -(113) WholeStageCodegenTransformer (X) +(112) WholeStageCodegenTransformer (X) Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: false -(114) VeloxColumnarToRow +(113) VeloxColumnarToRow Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(115) Scan parquet +(114) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(116) Filter +(115) Filter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Condition : isnotnull(ps_suppkey#X) -(117) Exchange +(116) Exchange Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(118) Sort +(117) Sort Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 -(119) Scan parquet +(118) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(120) Filter +(119) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(121) Exchange +(120) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) Sort +(121) Sort Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 -(123) SortMergeJoin +(122) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(124) Project +(123) Project Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(125) Exchange +(124) Exchange Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) Sort +(125) Sort Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 -(127) Scan parquet +(126) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(128) Filter +(127) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(129) Project +(128) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(130) Exchange +(129) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(131) Sort +(130) Sort Input [1]: [n_nationkey#X] Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 -(132) SortMergeJoin +(131) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(133) Project +(132) Project Output [2]: [ps_availqty#X, ps_supplycost#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(134) HashAggregate +(133) HashAggregate Input [2]: [ps_availqty#X, ps_supplycost#X] Keys: [] Functions [1]: [partial_sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(135) HashAggregate +(134) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [1]: [(sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(136) AdaptiveSparkPlan +(135) AdaptiveSparkPlan Output [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt index ca06d654b4322..59abfb682c424 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt @@ -1,55 +1,54 @@ == Physical Plan == -AdaptiveSparkPlan (58) +AdaptiveSparkPlan (57) +- == Final Plan == - VeloxColumnarToRow (40) - +- ^ SortExecTransformer (38) - +- ^ InputIteratorTransformer (37) - +- ShuffleQueryStage (35), Statistics(X) - +- ColumnarExchange (34) - +- VeloxResizeBatches (33) - +- ^ RegularHashAggregateExecTransformer (31) - +- ^ InputIteratorTransformer (30) - +- ShuffleQueryStage (28), Statistics(X) - +- ColumnarExchange (27) - +- VeloxResizeBatches (26) - +- ^ ProjectExecTransformer (24) - +- ^ FlushableHashAggregateExecTransformer (23) - +- ^ ProjectExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ RegularHashAggregateExecTransformer (20) - +- ^ ProjectExecTransformer (19) - +- ^ ShuffledHashJoinExecTransformer LeftOuter BuildLeft (18) - :- ^ InputIteratorTransformer (8) - : +- ShuffleQueryStage (6), Statistics(X) - : +- ColumnarExchange (5) - : +- VeloxResizeBatches (4) - : +- ^ ProjectExecTransformer (2) - : +- ^ ScanTransformer parquet (1) - +- ^ InputIteratorTransformer (17) - +- ShuffleQueryStage (15), Statistics(X) - +- ColumnarExchange (14) - +- VeloxResizeBatches (13) - +- ^ ProjectExecTransformer (11) - +- ^ FilterExecTransformer (10) - +- ^ ScanTransformer parquet (9) + VeloxColumnarToRow (39) + +- ^ SortExecTransformer (37) + +- ^ InputIteratorTransformer (36) + +- ShuffleQueryStage (34), Statistics(X) + +- ColumnarExchange (33) + +- VeloxResizeBatches (32) + +- ^ RegularHashAggregateExecTransformer (30) + +- ^ InputIteratorTransformer (29) + +- ShuffleQueryStage (27), Statistics(X) + +- ColumnarExchange (26) + +- VeloxResizeBatches (25) + +- ^ ProjectExecTransformer (23) + +- ^ FlushableHashAggregateExecTransformer (22) + +- ^ ProjectExecTransformer (21) + +- ^ RegularHashAggregateExecTransformer (20) + +- ^ ProjectExecTransformer (19) + +- ^ ShuffledHashJoinExecTransformer LeftOuter BuildLeft (18) + :- ^ InputIteratorTransformer (8) + : +- ShuffleQueryStage (6), Statistics(X) + : +- ColumnarExchange (5) + : +- VeloxResizeBatches (4) + : +- ^ ProjectExecTransformer (2) + : +- ^ ScanTransformer parquet (1) + +- ^ InputIteratorTransformer (17) + +- ShuffleQueryStage (15), Statistics(X) + +- ColumnarExchange (14) + +- VeloxResizeBatches (13) + +- ^ ProjectExecTransformer (11) + +- ^ FilterExecTransformer (10) + +- ^ ScanTransformer parquet (9) +- == Initial Plan == - Sort (57) - +- Exchange (56) - +- HashAggregate (55) - +- Exchange (54) - +- HashAggregate (53) - +- HashAggregate (52) - +- HashAggregate (51) - +- Project (50) - +- SortMergeJoin LeftOuter (49) - :- Sort (43) - : +- Exchange (42) - : +- Scan parquet (41) - +- Sort (48) - +- Exchange (47) - +- Project (46) - +- Filter (45) - +- Scan parquet (44) + Sort (56) + +- Exchange (55) + +- HashAggregate (54) + +- Exchange (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin LeftOuter (48) + :- Sort (42) + : +- Exchange (41) + : +- Scan parquet (40) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) ScanTransformer parquet @@ -134,181 +133,174 @@ Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] (20) RegularHashAggregateExecTransformer Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] -Functions [1]: [partial_count(o_orderkey#X)] -Aggregate Attributes [1]: [count#X] -Results [2]: [c_custkey#X, count#X] - -(21) RegularHashAggregateExecTransformer -Input [2]: [c_custkey#X, count#X] -Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [2]: [c_custkey#X, count(o_orderkey#X)#X] -(22) ProjectExecTransformer +(21) ProjectExecTransformer Output [1]: [count(o_orderkey#X)#X AS c_count#X] Input [2]: [c_custkey#X, count(o_orderkey#X)#X] -(23) FlushableHashAggregateExecTransformer +(22) FlushableHashAggregateExecTransformer Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(24) ProjectExecTransformer +(23) ProjectExecTransformer Output [3]: [hash(c_count#X, 42) AS hash_partition_key#X, c_count#X, count#X] Input [2]: [c_count#X, count#X] -(25) WholeStageCodegenTransformer (X) +(24) WholeStageCodegenTransformer (X) Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: false -(26) VeloxResizeBatches +(25) VeloxResizeBatches Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: X, X -(27) ColumnarExchange +(26) ColumnarExchange Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [c_count#X, count#X], [plan_id=X], [shuffle_writer_type=hash] -(28) ShuffleQueryStage +(27) ShuffleQueryStage Output [2]: [c_count#X, count#X] Arguments: X -(29) InputAdapter +(28) InputAdapter Input [2]: [c_count#X, count#X] -(30) InputIteratorTransformer +(29) InputIteratorTransformer Input [2]: [c_count#X, count#X] -(31) RegularHashAggregateExecTransformer +(30) RegularHashAggregateExecTransformer Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(32) WholeStageCodegenTransformer (X) +(31) WholeStageCodegenTransformer (X) Input [2]: [c_count#X, custdist#X] Arguments: false -(33) VeloxResizeBatches +(32) VeloxResizeBatches Input [2]: [c_count#X, custdist#X] Arguments: X, X -(34) ColumnarExchange +(33) ColumnarExchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [shuffle_writer_type=hash] -(35) ShuffleQueryStage +(34) ShuffleQueryStage Output [2]: [c_count#X, custdist#X] Arguments: X -(36) InputAdapter +(35) InputAdapter Input [2]: [c_count#X, custdist#X] -(37) InputIteratorTransformer +(36) InputIteratorTransformer Input [2]: [c_count#X, custdist#X] -(38) SortExecTransformer +(37) SortExecTransformer Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(39) WholeStageCodegenTransformer (X) +(38) WholeStageCodegenTransformer (X) Input [2]: [c_count#X, custdist#X] Arguments: false -(40) VeloxColumnarToRow +(39) VeloxColumnarToRow Input [2]: [c_count#X, custdist#X] -(41) Scan parquet +(40) Scan parquet Output [1]: [c_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(41) Exchange Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Sort +(42) Sort Input [1]: [c_custkey#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(44) Scan parquet +(43) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(45) Filter +(44) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(46) Project +(45) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(47) Exchange +(46) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(47) Sort Input [2]: [o_orderkey#X, o_custkey#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(49) SortMergeJoin +(48) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: LeftOuter Join condition: None -(50) Project +(49) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(51) HashAggregate +(50) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(52) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(53) HashAggregate +(52) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(54) Exchange +(53) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) HashAggregate +(54) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(56) Exchange +(55) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(57) Sort +(56) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(58) AdaptiveSparkPlan +(57) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt index c61d21b92ed1e..531cc5aaab551 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt @@ -1,40 +1,39 @@ == Physical Plan == -AdaptiveSparkPlan (39) +AdaptiveSparkPlan (38) +- == Final Plan == - VeloxColumnarToRow (25) - +- ^ ProjectExecTransformer (23) - +- ^ RegularHashAggregateExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ ProjectExecTransformer (20) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) - :- ^ InputIteratorTransformer (9) - : +- ShuffleQueryStage (7), Statistics(X) - : +- ColumnarExchange (6) - : +- VeloxResizeBatches (5) - : +- ^ ProjectExecTransformer (3) - : +- ^ FilterExecTransformer (2) - : +- ^ ScanTransformer parquet (1) - +- ^ InputIteratorTransformer (18) - +- ShuffleQueryStage (16), Statistics(X) - +- ColumnarExchange (15) - +- VeloxResizeBatches (14) - +- ^ ProjectExecTransformer (12) - +- ^ FilterExecTransformer (11) - +- ^ ScanTransformer parquet (10) + VeloxColumnarToRow (24) + +- ^ ProjectExecTransformer (22) + +- ^ RegularHashAggregateExecTransformer (21) + +- ^ ProjectExecTransformer (20) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) + :- ^ InputIteratorTransformer (9) + : +- ShuffleQueryStage (7), Statistics(X) + : +- ColumnarExchange (6) + : +- VeloxResizeBatches (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ ScanTransformer parquet (1) + +- ^ InputIteratorTransformer (18) + +- ShuffleQueryStage (16), Statistics(X) + +- ColumnarExchange (15) + +- VeloxResizeBatches (14) + +- ^ ProjectExecTransformer (12) + +- ^ FilterExecTransformer (11) + +- ^ ScanTransformer parquet (10) +- == Initial Plan == - HashAggregate (38) - +- HashAggregate (37) - +- Project (36) - +- SortMergeJoin Inner (35) - :- Sort (30) - : +- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Sort (34) - +- Exchange (33) - +- Filter (32) - +- Scan parquet (31) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) ScanTransformer parquet @@ -124,94 +123,87 @@ Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] (21) RegularHashAggregateExecTransformer Input [5]: [l_extendedprice#X, l_discount#X, p_type#X, _pre_X#X, _pre_X#X] Keys: [] -Functions [2]: [partial_sum(_pre_X#X), partial_sum(_pre_X#X)] -Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] - -(22) RegularHashAggregateExecTransformer -Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -Keys: [] -Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), sum((l_extendedprice#X * (1 - l_discount#X)))] +Functions [2]: [sum(_pre_X#X), sum(_pre_X#X)] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(23) ProjectExecTransformer +(22) ProjectExecTransformer Output [1]: [((100.00 * sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X) / sum((l_extendedprice#X * (1 - l_discount#X)))#X) AS promo_revenue#X] Input [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(24) WholeStageCodegenTransformer (X) +(23) WholeStageCodegenTransformer (X) Input [1]: [promo_revenue#X] Arguments: false -(25) VeloxColumnarToRow +(24) VeloxColumnarToRow Input [1]: [promo_revenue#X] -(26) Scan parquet +(25) Scan parquet Output [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-09-01), LessThan(l_shipdate,1995-10-01), IsNotNull(l_partkey)] ReadSchema: struct -(27) Filter +(26) Filter Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-09-01)) AND (l_shipdate#X < 1995-10-01)) AND isnotnull(l_partkey#X)) -(28) Project +(27) Project Output [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(29) Exchange +(28) Exchange Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Sort +(29) Sort Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(31) Scan parquet +(30) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(32) Filter +(31) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(33) Exchange +(32) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(34) Sort +(33) Sort Input [2]: [p_partkey#X, p_type#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(35) SortMergeJoin +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(36) Project +(35) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(37) HashAggregate +(36) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(38) HashAggregate +(37) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [((100.00 * sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X) / sum((l_extendedprice#X * (1 - l_discount#X)))#X) AS promo_revenue#X] -(39) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt index 3585c0cc2f34d..f370146f9206e 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt @@ -259,31 +259,30 @@ Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 22 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (73) +AdaptiveSparkPlan (72) +- == Final Plan == - VeloxColumnarToRow (64) - +- ^ RegularHashAggregateExecTransformer (62) - +- ^ RegularHashAggregateExecTransformer (61) - +- ^ ProjectExecTransformer (60) - +- ^ RegularHashAggregateExecTransformer (59) - +- ^ InputIteratorTransformer (58) - +- ShuffleQueryStage (56), Statistics(X) - +- ColumnarExchange (55) - +- VeloxResizeBatches (54) - +- ^ ProjectExecTransformer (52) - +- ^ FlushableHashAggregateExecTransformer (51) - +- ^ ProjectExecTransformer (50) - +- ^ FilterExecTransformer (49) - +- ^ ScanTransformer parquet (48) + VeloxColumnarToRow (63) + +- ^ RegularHashAggregateExecTransformer (61) + +- ^ ProjectExecTransformer (60) + +- ^ RegularHashAggregateExecTransformer (59) + +- ^ InputIteratorTransformer (58) + +- ShuffleQueryStage (56), Statistics(X) + +- ColumnarExchange (55) + +- VeloxResizeBatches (54) + +- ^ ProjectExecTransformer (52) + +- ^ FlushableHashAggregateExecTransformer (51) + +- ^ ProjectExecTransformer (50) + +- ^ FilterExecTransformer (49) + +- ^ ScanTransformer parquet (48) +- == Initial Plan == - HashAggregate (72) - +- HashAggregate (71) - +- HashAggregate (70) - +- Exchange (69) - +- HashAggregate (68) - +- Project (67) - +- Filter (66) - +- Scan parquet (65) + HashAggregate (71) + +- HashAggregate (70) + +- HashAggregate (69) + +- Exchange (68) + +- HashAggregate (67) + +- Project (66) + +- Filter (65) + +- Scan parquet (64) (48) ScanTransformer parquet @@ -348,71 +347,64 @@ Input [2]: [l_suppkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] (61) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] -Functions [1]: [partial_max(total_revenue#X)] -Aggregate Attributes [1]: [max#X] -Results [1]: [max#X] - -(62) RegularHashAggregateExecTransformer -Input [1]: [max#X] -Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(63) WholeStageCodegenTransformer (X) +(62) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(64) VeloxColumnarToRow +(63) VeloxColumnarToRow Input [1]: [max(total_revenue)#X] -(65) Scan parquet +(64) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(66) Filter +(65) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(67) Project +(66) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(68) HashAggregate +(67) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(69) Exchange +(68) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(70) HashAggregate +(69) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] -(71) HashAggregate +(70) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(72) HashAggregate +(71) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(73) AdaptiveSparkPlan +(72) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt index c6d0e01033635..c1b43b7fc421f 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt @@ -1,62 +1,61 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (62) +- == Final Plan == - VeloxColumnarToRow (40) - +- ^ ProjectExecTransformer (38) - +- ^ RegularHashAggregateExecTransformer (37) - +- ^ RegularHashAggregateExecTransformer (36) - +- ^ ProjectExecTransformer (35) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (34) - :- ^ ProjectExecTransformer (20) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7), Statistics(X) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (18) - : +- ShuffleQueryStage (16), Statistics(X) - : +- ColumnarExchange (15) - : +- VeloxResizeBatches (14) - : +- ^ ProjectExecTransformer (12) - : +- ^ FilterExecTransformer (11) - : +- ^ ScanTransformer parquet (10) - +- ^ FilterExecTransformer (33) - +- ^ ProjectExecTransformer (32) - +- ^ RegularHashAggregateExecTransformer (31) - +- ^ InputIteratorTransformer (30) - +- ShuffleQueryStage (28), Statistics(X) - +- ColumnarExchange (27) - +- VeloxResizeBatches (26) - +- ^ ProjectExecTransformer (24) - +- ^ FlushableHashAggregateExecTransformer (23) - +- ^ FilterExecTransformer (22) - +- ^ ScanTransformer parquet (21) + VeloxColumnarToRow (39) + +- ^ ProjectExecTransformer (37) + +- ^ RegularHashAggregateExecTransformer (36) + +- ^ ProjectExecTransformer (35) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (34) + :- ^ ProjectExecTransformer (20) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7), Statistics(X) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ ScanTransformer parquet (1) + : +- ^ InputIteratorTransformer (18) + : +- ShuffleQueryStage (16), Statistics(X) + : +- ColumnarExchange (15) + : +- VeloxResizeBatches (14) + : +- ^ ProjectExecTransformer (12) + : +- ^ FilterExecTransformer (11) + : +- ^ ScanTransformer parquet (10) + +- ^ FilterExecTransformer (33) + +- ^ ProjectExecTransformer (32) + +- ^ RegularHashAggregateExecTransformer (31) + +- ^ InputIteratorTransformer (30) + +- ShuffleQueryStage (28), Statistics(X) + +- ColumnarExchange (27) + +- VeloxResizeBatches (26) + +- ^ ProjectExecTransformer (24) + +- ^ FlushableHashAggregateExecTransformer (23) + +- ^ FilterExecTransformer (22) + +- ^ ScanTransformer parquet (21) +- == Initial Plan == - HashAggregate (62) - +- HashAggregate (61) - +- Project (60) - +- SortMergeJoin Inner (59) - :- Project (51) - : +- SortMergeJoin Inner (50) - : :- Sort (44) - : : +- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Sort (49) - : +- Exchange (48) - : +- Project (47) - : +- Filter (46) - : +- Scan parquet (45) - +- Sort (58) - +- Filter (57) - +- HashAggregate (56) - +- Exchange (55) - +- HashAggregate (54) - +- Filter (53) - +- Scan parquet (52) + HashAggregate (61) + +- HashAggregate (60) + +- Project (59) + +- SortMergeJoin Inner (58) + :- Project (50) + : +- SortMergeJoin Inner (49) + : :- Sort (43) + : : +- Exchange (42) + : : +- Filter (41) + : : +- Scan parquet (40) + : +- Sort (48) + : +- Exchange (47) + : +- Project (46) + : +- Filter (45) + : +- Scan parquet (44) + +- Sort (57) + +- Filter (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- Filter (52) + +- Scan parquet (51) (1) ScanTransformer parquet @@ -215,141 +214,134 @@ Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity) (36) RegularHashAggregateExecTransformer Input [1]: [l_extendedprice#X] Keys: [] -Functions [1]: [partial_sum(l_extendedprice#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(37) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [sum(l_extendedprice#X)#X] -(38) ProjectExecTransformer +(37) ProjectExecTransformer Output [1]: [(sum(l_extendedprice#X)#X / 7.0) AS avg_yearly#X] Input [1]: [sum(l_extendedprice#X)#X] -(39) WholeStageCodegenTransformer (X) +(38) WholeStageCodegenTransformer (X) Input [1]: [avg_yearly#X] Arguments: false -(40) VeloxColumnarToRow +(39) VeloxColumnarToRow Input [1]: [avg_yearly#X] -(41) Scan parquet +(40) Scan parquet Output [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_quantity)] ReadSchema: struct -(42) Filter +(41) Filter Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) -(43) Exchange +(42) Exchange Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Sort +(43) Sort Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(45) Scan parquet +(44) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(46) Filter +(45) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(47) Project +(46) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(48) Exchange +(47) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(48) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(50) SortMergeJoin +(49) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(51) Project +(50) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(52) Scan parquet +(51) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(53) Filter +(52) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(54) HashAggregate +(53) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(55) Exchange +(54) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) HashAggregate +(55) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [(0.2 * avg(l_quantity#X)#X) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(57) Filter +(56) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(58) Sort +(57) Sort Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(59) SortMergeJoin +(58) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join type: Inner Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(60) Project +(59) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(61) HashAggregate +(60) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(62) HashAggregate +(61) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [(sum(l_extendedprice#X)#X / 7.0) AS avg_yearly#X] -(63) AdaptiveSparkPlan +(62) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt index 50a076b1ef8fc..c1a6b181bb34e 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt @@ -1,102 +1,101 @@ == Physical Plan == -AdaptiveSparkPlan (110) +AdaptiveSparkPlan (109) +- == Final Plan == - VeloxColumnarToRow (70) - +- TakeOrderedAndProjectExecTransformer (69) - +- ^ RegularHashAggregateExecTransformer (67) - +- ^ RegularHashAggregateExecTransformer (66) - +- ^ ProjectExecTransformer (65) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (64) - :- ^ InputIteratorTransformer (46) - : +- ShuffleQueryStage (44), Statistics(X) - : +- ColumnarExchange (43) - : +- VeloxResizeBatches (42) - : +- ^ ProjectExecTransformer (40) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (39) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7), Statistics(X) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (38) - : +- ShuffleQueryStage (36), Statistics(X) - : +- ColumnarExchange (35) - : +- VeloxResizeBatches (34) - : +- ^ ProjectExecTransformer (32) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (31) - : :- ^ InputIteratorTransformer (18) - : : +- ShuffleQueryStage (16), Statistics(X) - : : +- ColumnarExchange (15) - : : +- VeloxResizeBatches (14) - : : +- ^ ProjectExecTransformer (12) - : : +- ^ FilterExecTransformer (11) - : : +- ^ ScanTransformer parquet (10) - : +- ^ ProjectExecTransformer (30) - : +- ^ FilterExecTransformer (29) - : +- ^ RegularHashAggregateExecTransformer (28) - : +- ^ InputIteratorTransformer (27) - : +- ShuffleQueryStage (25), Statistics(X) - : +- ColumnarExchange (24) - : +- VeloxResizeBatches (23) - : +- ^ ProjectExecTransformer (21) - : +- ^ FlushableHashAggregateExecTransformer (20) - : +- ^ ScanTransformer parquet (19) - +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (63) - :- ^ InputIteratorTransformer (55) - : +- ShuffleQueryStage (53), Statistics(X) - : +- ColumnarExchange (52) - : +- VeloxResizeBatches (51) - : +- ^ ProjectExecTransformer (49) - : +- ^ FilterExecTransformer (48) - : +- ^ ScanTransformer parquet (47) - +- ^ ProjectExecTransformer (62) - +- ^ FilterExecTransformer (61) - +- ^ RegularHashAggregateExecTransformer (60) - +- ^ InputIteratorTransformer (59) - +- ShuffleQueryStage (57), Statistics(X) - +- ReusedExchange (56) + VeloxColumnarToRow (69) + +- TakeOrderedAndProjectExecTransformer (68) + +- ^ RegularHashAggregateExecTransformer (66) + +- ^ ProjectExecTransformer (65) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (64) + :- ^ InputIteratorTransformer (46) + : +- ShuffleQueryStage (44), Statistics(X) + : +- ColumnarExchange (43) + : +- VeloxResizeBatches (42) + : +- ^ ProjectExecTransformer (40) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (39) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7), Statistics(X) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ ScanTransformer parquet (1) + : +- ^ InputIteratorTransformer (38) + : +- ShuffleQueryStage (36), Statistics(X) + : +- ColumnarExchange (35) + : +- VeloxResizeBatches (34) + : +- ^ ProjectExecTransformer (32) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (31) + : :- ^ InputIteratorTransformer (18) + : : +- ShuffleQueryStage (16), Statistics(X) + : : +- ColumnarExchange (15) + : : +- VeloxResizeBatches (14) + : : +- ^ ProjectExecTransformer (12) + : : +- ^ FilterExecTransformer (11) + : : +- ^ ScanTransformer parquet (10) + : +- ^ ProjectExecTransformer (30) + : +- ^ FilterExecTransformer (29) + : +- ^ RegularHashAggregateExecTransformer (28) + : +- ^ InputIteratorTransformer (27) + : +- ShuffleQueryStage (25), Statistics(X) + : +- ColumnarExchange (24) + : +- VeloxResizeBatches (23) + : +- ^ ProjectExecTransformer (21) + : +- ^ FlushableHashAggregateExecTransformer (20) + : +- ^ ScanTransformer parquet (19) + +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (63) + :- ^ InputIteratorTransformer (55) + : +- ShuffleQueryStage (53), Statistics(X) + : +- ColumnarExchange (52) + : +- VeloxResizeBatches (51) + : +- ^ ProjectExecTransformer (49) + : +- ^ FilterExecTransformer (48) + : +- ^ ScanTransformer parquet (47) + +- ^ ProjectExecTransformer (62) + +- ^ FilterExecTransformer (61) + +- ^ RegularHashAggregateExecTransformer (60) + +- ^ InputIteratorTransformer (59) + +- ShuffleQueryStage (57), Statistics(X) + +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (109) - +- HashAggregate (108) - +- HashAggregate (107) - +- Project (106) - +- SortMergeJoin Inner (105) - :- Sort (92) - : +- Exchange (91) - : +- Project (90) - : +- SortMergeJoin Inner (89) - : :- Sort (74) - : : +- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Sort (88) - : +- Exchange (87) - : +- SortMergeJoin LeftSemi (86) - : :- Sort (78) - : : +- Exchange (77) - : : +- Filter (76) - : : +- Scan parquet (75) - : +- Sort (85) - : +- Project (84) - : +- Filter (83) - : +- HashAggregate (82) - : +- Exchange (81) - : +- HashAggregate (80) - : +- Scan parquet (79) - +- SortMergeJoin LeftSemi (104) - :- Sort (96) - : +- Exchange (95) - : +- Filter (94) - : +- Scan parquet (93) - +- Sort (103) - +- Project (102) - +- Filter (101) - +- HashAggregate (100) - +- Exchange (99) - +- HashAggregate (98) - +- Scan parquet (97) + TakeOrderedAndProject (108) + +- HashAggregate (107) + +- HashAggregate (106) + +- Project (105) + +- SortMergeJoin Inner (104) + :- Sort (91) + : +- Exchange (90) + : +- Project (89) + : +- SortMergeJoin Inner (88) + : :- Sort (73) + : : +- Exchange (72) + : : +- Filter (71) + : : +- Scan parquet (70) + : +- Sort (87) + : +- Exchange (86) + : +- SortMergeJoin LeftSemi (85) + : :- Sort (77) + : : +- Exchange (76) + : : +- Filter (75) + : : +- Scan parquet (74) + : +- Sort (84) + : +- Project (83) + : +- Filter (82) + : +- HashAggregate (81) + : +- Exchange (80) + : +- HashAggregate (79) + : +- Scan parquet (78) + +- SortMergeJoin LeftSemi (103) + :- Sort (95) + : +- Exchange (94) + : +- Filter (93) + : +- Scan parquet (92) + +- Sort (102) + +- Project (101) + +- Filter (100) + +- HashAggregate (99) + +- Exchange (98) + +- HashAggregate (97) + +- Scan parquet (96) (1) ScanTransformer parquet @@ -375,223 +374,216 @@ Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, (66) RegularHashAggregateExecTransformer Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] -Functions [1]: [partial_sum(l_quantity#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] - -(67) RegularHashAggregateExecTransformer -Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(68) WholeStageCodegenTransformer (X) +(67) WholeStageCodegenTransformer (X) Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: false -(69) TakeOrderedAndProjectExecTransformer +(68) TakeOrderedAndProjectExecTransformer Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X], 0 -(70) VeloxColumnarToRow +(69) VeloxColumnarToRow Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(71) Scan parquet +(70) Scan parquet Output [2]: [c_custkey#X, c_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey)] ReadSchema: struct -(72) Filter +(71) Filter Input [2]: [c_custkey#X, c_name#X] Condition : isnotnull(c_custkey#X) -(73) Exchange +(72) Exchange Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Sort +(73) Sort Input [2]: [c_custkey#X, c_name#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(75) Scan parquet +(74) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(76) Filter +(75) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(77) Exchange +(76) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Sort +(77) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(79) Scan parquet +(78) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(80) HashAggregate +(79) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(81) Exchange +(80) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) HashAggregate +(81) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(83) Filter +(82) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(84) Project +(83) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(85) Sort +(84) Sort Input [1]: [l_orderkey#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(86) SortMergeJoin +(85) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(87) Exchange +(86) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Sort +(87) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(89) SortMergeJoin +(88) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(90) Project +(89) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(91) Exchange +(90) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) Sort +(91) Sort Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(93) Scan parquet +(92) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(94) Filter +(93) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(95) Exchange +(94) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(96) Sort +(95) Sort Input [2]: [l_orderkey#X, l_quantity#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(97) Scan parquet +(96) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(98) HashAggregate +(97) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(99) Exchange +(98) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) HashAggregate +(99) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(101) Filter +(100) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(102) Project +(101) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(103) Sort +(102) Sort Input [1]: [l_orderkey#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(104) SortMergeJoin +(103) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(105) SortMergeJoin +(104) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(106) Project +(105) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(107) HashAggregate +(106) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(108) HashAggregate +(107) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(109) TakeOrderedAndProject +(108) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(110) AdaptiveSparkPlan +(109) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt index fb7e231fa5b31..4db9ca0c8393c 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt @@ -1,39 +1,38 @@ == Physical Plan == -AdaptiveSparkPlan (38) +AdaptiveSparkPlan (37) +- == Final Plan == - VeloxColumnarToRow (24) - +- ^ RegularHashAggregateExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ ProjectExecTransformer (20) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) - :- ^ InputIteratorTransformer (9) - : +- ShuffleQueryStage (7), Statistics(X) - : +- ColumnarExchange (6) - : +- VeloxResizeBatches (5) - : +- ^ ProjectExecTransformer (3) - : +- ^ FilterExecTransformer (2) - : +- ^ ScanTransformer parquet (1) - +- ^ InputIteratorTransformer (18) - +- ShuffleQueryStage (16), Statistics(X) - +- ColumnarExchange (15) - +- VeloxResizeBatches (14) - +- ^ ProjectExecTransformer (12) - +- ^ FilterExecTransformer (11) - +- ^ ScanTransformer parquet (10) + VeloxColumnarToRow (23) + +- ^ RegularHashAggregateExecTransformer (21) + +- ^ ProjectExecTransformer (20) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) + :- ^ InputIteratorTransformer (9) + : +- ShuffleQueryStage (7), Statistics(X) + : +- ColumnarExchange (6) + : +- VeloxResizeBatches (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ ScanTransformer parquet (1) + +- ^ InputIteratorTransformer (18) + +- ShuffleQueryStage (16), Statistics(X) + +- ColumnarExchange (15) + +- VeloxResizeBatches (14) + +- ^ ProjectExecTransformer (12) + +- ^ FilterExecTransformer (11) + +- ^ ScanTransformer parquet (10) +- == Initial Plan == - HashAggregate (37) - +- HashAggregate (36) - +- Project (35) - +- SortMergeJoin Inner (34) - :- Sort (29) - : +- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Sort (33) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (36) + +- HashAggregate (35) + +- Project (34) + +- SortMergeJoin Inner (33) + :- Sort (28) + : +- Exchange (27) + : +- Project (26) + : +- Filter (25) + : +- Scan parquet (24) + +- Sort (32) + +- Exchange (31) + +- Filter (30) + +- Scan parquet (29) (1) ScanTransformer parquet @@ -123,90 +122,83 @@ Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partke (21) RegularHashAggregateExecTransformer Input [3]: [l_extendedprice#X, l_discount#X, _pre_X#X] Keys: [] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(22) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] -Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] -(23) WholeStageCodegenTransformer (X) +(22) WholeStageCodegenTransformer (X) Input [1]: [revenue#X] Arguments: false -(24) VeloxColumnarToRow +(23) VeloxColumnarToRow Input [1]: [revenue#X] -(25) Scan parquet +(24) Scan parquet Output [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipinstruct), In(l_shipmode, [AIR,AIR REG]), EqualTo(l_shipinstruct,DELIVER IN PERSON), IsNotNull(l_partkey), Or(Or(And(GreaterThanOrEqual(l_quantity,1.00),LessThanOrEqual(l_quantity,11.00)),And(GreaterThanOrEqual(l_quantity,10.00),LessThanOrEqual(l_quantity,20.00))),And(GreaterThanOrEqual(l_quantity,20.00),LessThanOrEqual(l_quantity,30.00)))] ReadSchema: struct -(26) Filter +(25) Filter Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] Condition : ((((isnotnull(l_shipinstruct#X) AND l_shipmode#X IN (AIR,AIR REG)) AND (l_shipinstruct#X = DELIVER IN PERSON)) AND isnotnull(l_partkey#X)) AND ((((l_quantity#X >= 1.00) AND (l_quantity#X <= 11.00)) OR ((l_quantity#X >= 10.00) AND (l_quantity#X <= 20.00))) OR ((l_quantity#X >= 20.00) AND (l_quantity#X <= 30.00)))) -(27) Project +(26) Project Output [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] -(28) Exchange +(27) Exchange Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Sort +(28) Sort Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(30) Scan parquet +(29) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(31) Filter +(30) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(32) Exchange +(31) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) Sort +(32) Sort Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(34) SortMergeJoin +(33) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(35) Project +(34) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(36) HashAggregate +(35) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(37) HashAggregate +(36) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] -(38) AdaptiveSparkPlan +(37) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt index 4e427961a4f63..4d8eee1f1fb7f 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt @@ -1,19 +1,19 @@ == Physical Plan == -AdaptiveSparkPlan (143) +AdaptiveSparkPlan (142) +- == Final Plan == - VeloxColumnarToRow (93) - +- AQEShuffleRead (92) - +- ShuffleQueryStage (91), Statistics(X) - +- ColumnarExchange (90) - +- VeloxResizeBatches (89) - +- ^ ProjectExecTransformer (87) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (86) - :- ^ InputIteratorTransformer (76) - : +- ShuffleQueryStage (74), Statistics(X) - : +- ColumnarExchange (73) - : +- VeloxResizeBatches (72) - : +- ^ ProjectExecTransformer (70) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (69) + VeloxColumnarToRow (92) + +- AQEShuffleRead (91) + +- ShuffleQueryStage (90), Statistics(X) + +- ColumnarExchange (89) + +- VeloxResizeBatches (88) + +- ^ ProjectExecTransformer (86) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (85) + :- ^ InputIteratorTransformer (75) + : +- ShuffleQueryStage (73), Statistics(X) + : +- ColumnarExchange (72) + : +- VeloxResizeBatches (71) + : +- ^ ProjectExecTransformer (69) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (68) : :- ^ InputIteratorTransformer (9) : : +- ShuffleQueryStage (7), Statistics(X) : : +- ColumnarExchange (6) @@ -21,12 +21,12 @@ AdaptiveSparkPlan (143) : : +- ^ ProjectExecTransformer (3) : : +- ^ FilterExecTransformer (2) : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (68) - : +- ShuffleQueryStage (66), Statistics(X) - : +- ColumnarExchange (65) - : +- VeloxResizeBatches (64) - : +- ^ ProjectExecTransformer (62) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (61) + : +- ^ InputIteratorTransformer (67) + : +- ShuffleQueryStage (65), Statistics(X) + : +- ColumnarExchange (64) + : +- VeloxResizeBatches (63) + : +- ^ ProjectExecTransformer (61) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (60) : :- ^ InputIteratorTransformer (35) : : +- ShuffleQueryStage (33), Statistics(X) : : +- ColumnarExchange (32) @@ -47,83 +47,82 @@ AdaptiveSparkPlan (143) : : +- ^ ProjectExecTransformer (21) : : +- ^ FilterExecTransformer (20) : : +- ^ ScanTransformer parquet (19) - : +- ^ InputIteratorTransformer (60) - : +- ShuffleQueryStage (58), Statistics(X) - : +- ColumnarExchange (57) - : +- VeloxResizeBatches (56) - : +- ^ ProjectExecTransformer (54) - : +- ^ FilterExecTransformer (53) - : +- ^ ProjectExecTransformer (52) - : +- ^ RegularHashAggregateExecTransformer (51) - : +- ^ RegularHashAggregateExecTransformer (50) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (49) - : :- ^ InputIteratorTransformer (44) - : : +- ShuffleQueryStage (42), Statistics(X) - : : +- ColumnarExchange (41) - : : +- VeloxResizeBatches (40) - : : +- ^ ProjectExecTransformer (38) - : : +- ^ FilterExecTransformer (37) - : : +- ^ ScanTransformer parquet (36) - : +- ^ InputIteratorTransformer (48) - : +- ShuffleQueryStage (46), Statistics(X) - : +- ReusedExchange (45) - +- ^ InputIteratorTransformer (85) - +- ShuffleQueryStage (83), Statistics(X) - +- ColumnarExchange (82) - +- VeloxResizeBatches (81) - +- ^ ProjectExecTransformer (79) - +- ^ FilterExecTransformer (78) - +- ^ ScanTransformer parquet (77) + : +- ^ InputIteratorTransformer (59) + : +- ShuffleQueryStage (57), Statistics(X) + : +- ColumnarExchange (56) + : +- VeloxResizeBatches (55) + : +- ^ ProjectExecTransformer (53) + : +- ^ FilterExecTransformer (52) + : +- ^ ProjectExecTransformer (51) + : +- ^ RegularHashAggregateExecTransformer (50) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (49) + : :- ^ InputIteratorTransformer (44) + : : +- ShuffleQueryStage (42), Statistics(X) + : : +- ColumnarExchange (41) + : : +- VeloxResizeBatches (40) + : : +- ^ ProjectExecTransformer (38) + : : +- ^ FilterExecTransformer (37) + : : +- ^ ScanTransformer parquet (36) + : +- ^ InputIteratorTransformer (48) + : +- ShuffleQueryStage (46), Statistics(X) + : +- ReusedExchange (45) + +- ^ InputIteratorTransformer (84) + +- ShuffleQueryStage (82), Statistics(X) + +- ColumnarExchange (81) + +- VeloxResizeBatches (80) + +- ^ ProjectExecTransformer (78) + +- ^ FilterExecTransformer (77) + +- ^ ScanTransformer parquet (76) +- == Initial Plan == - Sort (142) - +- Exchange (141) - +- Project (140) - +- SortMergeJoin Inner (139) - :- Sort (133) - : +- Exchange (132) - : +- Project (131) - : +- SortMergeJoin LeftSemi (130) - : :- Sort (97) - : : +- Exchange (96) - : : +- Filter (95) - : : +- Scan parquet (94) - : +- Sort (129) - : +- Exchange (128) - : +- Project (127) - : +- SortMergeJoin Inner (126) - : :- Sort (109) - : : +- Exchange (108) - : : +- SortMergeJoin LeftSemi (107) - : : :- Sort (101) - : : : +- Exchange (100) - : : : +- Filter (99) - : : : +- Scan parquet (98) - : : +- Sort (106) - : : +- Exchange (105) - : : +- Project (104) - : : +- Filter (103) - : : +- Scan parquet (102) - : +- Sort (125) - : +- Exchange (124) - : +- Filter (123) - : +- HashAggregate (122) - : +- HashAggregate (121) - : +- SortMergeJoin LeftSemi (120) - : :- Sort (114) - : : +- Exchange (113) - : : +- Project (112) - : : +- Filter (111) - : : +- Scan parquet (110) - : +- Sort (119) - : +- Exchange (118) - : +- Project (117) - : +- Filter (116) - : +- Scan parquet (115) - +- Sort (138) - +- Exchange (137) - +- Project (136) - +- Filter (135) - +- Scan parquet (134) + Sort (141) + +- Exchange (140) + +- Project (139) + +- SortMergeJoin Inner (138) + :- Sort (132) + : +- Exchange (131) + : +- Project (130) + : +- SortMergeJoin LeftSemi (129) + : :- Sort (96) + : : +- Exchange (95) + : : +- Filter (94) + : : +- Scan parquet (93) + : +- Sort (128) + : +- Exchange (127) + : +- Project (126) + : +- SortMergeJoin Inner (125) + : :- Sort (108) + : : +- Exchange (107) + : : +- SortMergeJoin LeftSemi (106) + : : :- Sort (100) + : : : +- Exchange (99) + : : : +- Filter (98) + : : : +- Scan parquet (97) + : : +- Sort (105) + : : +- Exchange (104) + : : +- Project (103) + : : +- Filter (102) + : : +- Scan parquet (101) + : +- Sort (124) + : +- Exchange (123) + : +- Filter (122) + : +- HashAggregate (121) + : +- HashAggregate (120) + : +- SortMergeJoin LeftSemi (119) + : :- Sort (113) + : : +- Exchange (112) + : : +- Project (111) + : : +- Filter (110) + : : +- Scan parquet (109) + : +- Sort (118) + : +- Exchange (117) + : +- Project (116) + : +- Filter (115) + : +- Scan parquet (114) + +- Sort (137) + +- Exchange (136) + +- Project (135) + +- Filter (134) + +- Scan parquet (133) (1) ScanTransformer parquet @@ -328,415 +327,408 @@ Join condition: None (50) RegularHashAggregateExecTransformer Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] -Functions [1]: [partial_sum(l_quantity#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] - -(51) RegularHashAggregateExecTransformer -Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] -(52) ProjectExecTransformer +(51) ProjectExecTransformer Output [3]: [(0.5 * sum(l_quantity#X)#X) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Input [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] -(53) FilterExecTransformer +(52) FilterExecTransformer Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: isnotnull((0.5 * sum(l_quantity))#X) -(54) ProjectExecTransformer +(53) ProjectExecTransformer Output [4]: [hash(l_partkey#X, l_suppkey#X, 42) AS hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(55) WholeStageCodegenTransformer (X) +(54) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: false -(56) VeloxResizeBatches +(55) VeloxResizeBatches Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: X, X -(57) ColumnarExchange +(56) ColumnarExchange Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X], [plan_id=X], [shuffle_writer_type=hash] -(58) ShuffleQueryStage +(57) ShuffleQueryStage Output [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: X -(59) InputAdapter +(58) InputAdapter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(60) InputIteratorTransformer +(59) InputIteratorTransformer Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(61) ShuffledHashJoinExecTransformer +(60) ShuffledHashJoinExecTransformer Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join type: Inner Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(62) ProjectExecTransformer +(61) ProjectExecTransformer Output [2]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(63) WholeStageCodegenTransformer (X) +(62) WholeStageCodegenTransformer (X) Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: false -(64) VeloxResizeBatches +(63) VeloxResizeBatches Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: X, X -(65) ColumnarExchange +(64) ColumnarExchange Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X], [plan_id=X], [shuffle_writer_type=hash] -(66) ShuffleQueryStage +(65) ShuffleQueryStage Output [1]: [ps_suppkey#X] Arguments: X -(67) InputAdapter +(66) InputAdapter Input [1]: [ps_suppkey#X] -(68) InputIteratorTransformer +(67) InputIteratorTransformer Input [1]: [ps_suppkey#X] -(69) ShuffledHashJoinExecTransformer +(68) ShuffledHashJoinExecTransformer Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join type: LeftSemi Join condition: None -(70) ProjectExecTransformer +(69) ProjectExecTransformer Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(71) WholeStageCodegenTransformer (X) +(70) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: false -(72) VeloxResizeBatches +(71) VeloxResizeBatches Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: X, X -(73) ColumnarExchange +(72) ColumnarExchange Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [s_name#X, s_address#X, s_nationkey#X], [plan_id=X], [shuffle_writer_type=hash] -(74) ShuffleQueryStage +(73) ShuffleQueryStage Output [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: X -(75) InputAdapter +(74) InputAdapter Input [3]: [s_name#X, s_address#X, s_nationkey#X] -(76) InputIteratorTransformer +(75) InputIteratorTransformer Input [3]: [s_name#X, s_address#X, s_nationkey#X] -(77) ScanTransformer parquet +(76) ScanTransformer parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(78) FilterExecTransformer +(77) FilterExecTransformer Input [2]: [n_nationkey#X, n_name#X] Arguments: ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(79) ProjectExecTransformer +(78) ProjectExecTransformer Output [2]: [hash(n_nationkey#X, 42) AS hash_partition_key#X, n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(80) WholeStageCodegenTransformer (X) +(79) WholeStageCodegenTransformer (X) Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: false -(81) VeloxResizeBatches +(80) VeloxResizeBatches Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: X, X -(82) ColumnarExchange +(81) ColumnarExchange Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [n_nationkey#X], [plan_id=X], [shuffle_writer_type=hash] -(83) ShuffleQueryStage +(82) ShuffleQueryStage Output [1]: [n_nationkey#X] Arguments: X -(84) InputAdapter +(83) InputAdapter Input [1]: [n_nationkey#X] -(85) InputIteratorTransformer +(84) InputIteratorTransformer Input [1]: [n_nationkey#X] -(86) ShuffledHashJoinExecTransformer +(85) ShuffledHashJoinExecTransformer Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(87) ProjectExecTransformer +(86) ProjectExecTransformer Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(88) WholeStageCodegenTransformer (X) +(87) WholeStageCodegenTransformer (X) Input [2]: [s_name#X, s_address#X] Arguments: false -(89) VeloxResizeBatches +(88) VeloxResizeBatches Input [2]: [s_name#X, s_address#X] Arguments: X, X -(90) ColumnarExchange +(89) ColumnarExchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [shuffle_writer_type=hash] -(91) ShuffleQueryStage +(90) ShuffleQueryStage Output [2]: [s_name#X, s_address#X] Arguments: X -(92) AQEShuffleRead +(91) AQEShuffleRead Input [2]: [s_name#X, s_address#X] Arguments: local -(93) VeloxColumnarToRow +(92) VeloxColumnarToRow Input [2]: [s_name#X, s_address#X] -(94) Scan parquet +(93) Scan parquet Output [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_nationkey)] ReadSchema: struct -(95) Filter +(94) Filter Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Condition : isnotnull(s_nationkey#X) -(96) Exchange +(95) Exchange Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) Sort +(96) Sort Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 -(98) Scan parquet +(97) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(99) Filter +(98) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(100) Exchange +(99) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(101) Sort +(100) Sort Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 -(102) Scan parquet +(101) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(103) Filter +(102) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(104) Project +(103) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(105) Exchange +(104) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Sort +(105) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(107) SortMergeJoin +(106) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join type: LeftSemi Join condition: None -(108) Exchange +(107) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) Sort +(108) Sort Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 -(110) Scan parquet +(109) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(111) Filter +(110) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(112) Project +(111) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(113) Exchange +(112) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) Sort +(113) Sort Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(115) Scan parquet +(114) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(116) Filter +(115) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(117) Project +(116) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(118) Exchange +(117) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(119) Sort +(118) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(120) SortMergeJoin +(119) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: LeftSemi Join condition: None -(121) HashAggregate +(120) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(122) HashAggregate +(121) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [(0.5 * sum(l_quantity#X)#X) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(123) Filter +(122) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(124) Exchange +(123) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) Sort +(124) Sort Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 -(126) SortMergeJoin +(125) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join type: Inner Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(127) Project +(126) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(128) Exchange +(127) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Sort +(128) Sort Input [1]: [ps_suppkey#X] Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 -(130) SortMergeJoin +(129) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join type: LeftSemi Join condition: None -(131) Project +(130) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(132) Exchange +(131) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(133) Sort +(132) Sort Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 -(134) Scan parquet +(133) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(135) Filter +(134) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(136) Project +(135) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(137) Exchange +(136) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) Sort +(137) Sort Input [1]: [n_nationkey#X] Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 -(139) SortMergeJoin +(138) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(140) Project +(139) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(141) Exchange +(140) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) Sort +(141) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(143) AdaptiveSparkPlan +(142) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt index 4aa7458326813..1f86369a62af5 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt @@ -1,64 +1,63 @@ == Physical Plan == -AdaptiveSparkPlan (67) +AdaptiveSparkPlan (66) +- == Final Plan == - VeloxColumnarToRow (43) - +- TakeOrderedAndProjectExecTransformer (42) - +- ^ ProjectExecTransformer (40) - +- ^ RegularHashAggregateExecTransformer (39) - +- ^ RegularHashAggregateExecTransformer (38) - +- ^ ProjectExecTransformer (37) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (36) - :- ^ InputIteratorTransformer (26) - : +- ShuffleQueryStage (24), Statistics(X) - : +- ColumnarExchange (23) - : +- VeloxResizeBatches (22) - : +- ^ ProjectExecTransformer (20) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7), Statistics(X) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ ScanTransformer parquet (1) - : +- ^ InputIteratorTransformer (18) - : +- ShuffleQueryStage (16), Statistics(X) - : +- ColumnarExchange (15) - : +- VeloxResizeBatches (14) - : +- ^ ProjectExecTransformer (12) - : +- ^ FilterExecTransformer (11) - : +- ^ ScanTransformer parquet (10) - +- ^ InputIteratorTransformer (35) - +- ShuffleQueryStage (33), Statistics(X) - +- ColumnarExchange (32) - +- VeloxResizeBatches (31) - +- ^ ProjectExecTransformer (29) - +- ^ FilterExecTransformer (28) - +- ^ ScanTransformer parquet (27) + VeloxColumnarToRow (42) + +- TakeOrderedAndProjectExecTransformer (41) + +- ^ ProjectExecTransformer (39) + +- ^ RegularHashAggregateExecTransformer (38) + +- ^ ProjectExecTransformer (37) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (36) + :- ^ InputIteratorTransformer (26) + : +- ShuffleQueryStage (24), Statistics(X) + : +- ColumnarExchange (23) + : +- VeloxResizeBatches (22) + : +- ^ ProjectExecTransformer (20) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7), Statistics(X) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ ScanTransformer parquet (1) + : +- ^ InputIteratorTransformer (18) + : +- ShuffleQueryStage (16), Statistics(X) + : +- ColumnarExchange (15) + : +- VeloxResizeBatches (14) + : +- ^ ProjectExecTransformer (12) + : +- ^ FilterExecTransformer (11) + : +- ^ ScanTransformer parquet (10) + +- ^ InputIteratorTransformer (35) + +- ShuffleQueryStage (33), Statistics(X) + +- ColumnarExchange (32) + +- VeloxResizeBatches (31) + +- ^ ProjectExecTransformer (29) + +- ^ FilterExecTransformer (28) + +- ^ ScanTransformer parquet (27) +- == Initial Plan == - TakeOrderedAndProject (66) - +- HashAggregate (65) - +- HashAggregate (64) - +- Project (63) - +- SortMergeJoin Inner (62) - :- Sort (56) - : +- Exchange (55) - : +- Project (54) - : +- SortMergeJoin Inner (53) - : :- Sort (48) - : : +- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Sort (52) - : +- Exchange (51) - : +- Filter (50) - : +- Scan parquet (49) - +- Sort (61) - +- Exchange (60) - +- Project (59) - +- Filter (58) - +- Scan parquet (57) + TakeOrderedAndProject (65) + +- HashAggregate (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (55) + : +- Exchange (54) + : +- Project (53) + : +- SortMergeJoin Inner (52) + : :- Sort (47) + : : +- Exchange (46) + : : +- Project (45) + : : +- Filter (44) + : : +- Scan parquet (43) + : +- Sort (51) + : +- Exchange (50) + : +- Filter (49) + : +- Scan parquet (48) + +- Sort (60) + +- Exchange (59) + +- Project (58) + +- Filter (57) + +- Scan parquet (56) (1) ScanTransformer parquet @@ -217,143 +216,136 @@ Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_exten (38) RegularHashAggregateExecTransformer Input [6]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] - -(39) RegularHashAggregateExecTransformer -Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] -Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(40) ProjectExecTransformer +(39) ProjectExecTransformer Output [4]: [l_orderkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] Input [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(41) WholeStageCodegenTransformer (X) +(40) WholeStageCodegenTransformer (X) Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: false -(42) TakeOrderedAndProjectExecTransformer +(41) TakeOrderedAndProjectExecTransformer Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X], 0 -(43) VeloxColumnarToRow +(42) VeloxColumnarToRow Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(44) Scan parquet +(43) Scan parquet Output [2]: [c_custkey#X, c_mktsegment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_mktsegment), EqualTo(c_mktsegment,BUILDING), IsNotNull(c_custkey)] ReadSchema: struct -(45) Filter +(44) Filter Input [2]: [c_custkey#X, c_mktsegment#X] Condition : ((isnotnull(c_mktsegment#X) AND (c_mktsegment#X = BUILDING)) AND isnotnull(c_custkey#X)) -(46) Project +(45) Project Output [1]: [c_custkey#X] Input [2]: [c_custkey#X, c_mktsegment#X] -(47) Exchange +(46) Exchange Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(47) Sort Input [1]: [c_custkey#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(49) Scan parquet +(48) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(50) Filter +(49) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(51) Exchange +(50) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(51) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(53) SortMergeJoin +(52) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(54) Project +(53) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(55) Exchange +(54) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Sort +(55) Sort Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(57) Scan parquet +(56) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(58) Filter +(57) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(59) Project +(58) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(60) Exchange +(59) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(61) Sort +(60) Sort Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(62) SortMergeJoin +(61) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(63) Project +(62) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(64) HashAggregate +(63) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(65) HashAggregate +(64) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [4]: [l_orderkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(66) TakeOrderedAndProject +(65) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(67) AdaptiveSparkPlan +(66) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala index 84d1fe8db9194..cf0bf272bc5bb 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala @@ -49,6 +49,7 @@ abstract class VeloxAggregateFunctionsSuite extends VeloxWholeStageTransformerSu .set("spark.unsafe.exceptionOnMemoryLeak", "true") .set("spark.sql.autoBroadcastJoinThreshold", "-1") .set("spark.sql.sources.useV1SourceList", "avro") + .set("spark.gluten.sql.mergeTwoPhasesAggregate.enabled", "false") } test("count") { diff --git a/docs/Configuration.md b/docs/Configuration.md index 76549dd4fe7c7..dd9363074541a 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -58,6 +58,7 @@ You can add these configurations into spark-defaults.conf to enable or disable t | spark.gluten.sql.columnar.fallback.ignoreRowToColumnar | When true, the fallback policy ignores the RowToColumnar when counting fallback number. | true | | spark.gluten.sql.columnar.fallback.preferColumnar | When true, the fallback policy prefers to use Gluten plan rather than vanilla Spark plan if the both of them contains ColumnarToRow and the vanilla Spark plan ColumnarToRow number is not smaller than Gluten plan. | true | | spark.gluten.sql.columnar.force.hashagg | Force to use hash agg to replace sort agg. | true | +| spark.gluten.sql.mergeTwoPhasesAggregate.enabled | Whether to merge two phases aggregate if there are no other operators between them. | true | | spark.gluten.sql.columnar.vanillaReaders | Enable vanilla spark's vectorized reader. Please note it may bring perf. overhead due to extra data transition. We recommend to disable it if most queries can be fully offloaded to gluten. | false | | spark.gluten.sql.native.bloomFilter | Enable or Disable native runtime bloom filter. | true | | spark.gluten.sql.native.arrow.reader.enabled | Enable or Disable native arrow read CSV file format | false | diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/extension/MergeTwoPhasesHashBaseAggregate.scala b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/MergeTwoPhasesHashBaseAggregate.scala similarity index 96% rename from backends-clickhouse/src/main/scala/org/apache/gluten/extension/MergeTwoPhasesHashBaseAggregate.scala rename to gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/MergeTwoPhasesHashBaseAggregate.scala index a10659b6d5e71..a034a3229a881 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/extension/MergeTwoPhasesHashBaseAggregate.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/MergeTwoPhasesHashBaseAggregate.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.gluten.extension +package org.apache.gluten.extension.columnar import org.apache.gluten.GlutenConfig @@ -42,6 +42,7 @@ case class MergeTwoPhasesHashBaseAggregate(session: SparkSession) val scanOnly: Boolean = glutenConf.enableScanOnly val enableColumnarHashAgg: Boolean = !scanOnly && glutenConf.enableColumnarHashAgg val replaceSortAggWithHashAgg: Boolean = GlutenConfig.getConf.forceToUseHashAgg + val mergeTwoPhasesAggEnabled: Boolean = GlutenConfig.getConf.mergeTwoPhasesAggEnabled private def isPartialAgg(partialAgg: BaseAggregateExec, finalAgg: BaseAggregateExec): Boolean = { // TODO: now it can not support to merge agg which there are the filters in the aggregate exprs. @@ -59,7 +60,7 @@ case class MergeTwoPhasesHashBaseAggregate(session: SparkSession) } override def apply(plan: SparkPlan): SparkPlan = { - if (!enableColumnarHashAgg) { + if (!mergeTwoPhasesAggEnabled || !enableColumnarHashAgg) { plan } else { plan.transformDown { diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala index f394b4687d3d5..1b14531c2f1d2 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala @@ -27,6 +27,11 @@ class GlutenReplaceHashWithSortAggSuite extends ReplaceHashWithSortAggSuite with GlutenSQLTestsBaseTrait { + override def sparkConf: SparkConf = { + super.sparkConf + .set("spark.gluten.sql.mergeTwoPhasesAggregate.enabled", "false") + } + private def checkNumAggs(df: DataFrame, hashAggCount: Int, sortAggCount: Int): Unit = { val plan = df.queryExecution.executedPlan assert(collectWithSubqueries(plan) { @@ -85,7 +90,7 @@ class GlutenReplaceHashWithSortAggSuite withTempView("t1", "t2") { spark.range(100).selectExpr("id as key").createOrReplaceTempView("t1") spark.range(50).selectExpr("id as key").createOrReplaceTempView("t2") - Seq(("COUNT", 0, 1, 2, 0), ("COLLECT_LIST", 2, 0, 2, 0)).foreach { + Seq(("COLLECT_LIST", 2, 0, 2, 0)).foreach { aggExprInfo => val query = s""" diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala index 332c21418a9b9..98d6c666f5987 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala @@ -26,6 +26,11 @@ class GlutenReplaceHashWithSortAggSuite extends ReplaceHashWithSortAggSuite with GlutenSQLTestsBaseTrait { + override def sparkConf: SparkConf = { + super.sparkConf + .set("spark.gluten.sql.mergeTwoPhasesAggregate.enabled", "false") + } + private def checkNumAggs(df: DataFrame, hashAggCount: Int, sortAggCount: Int): Unit = { val plan = df.queryExecution.executedPlan assert(collectWithSubqueries(plan) { @@ -84,7 +89,7 @@ class GlutenReplaceHashWithSortAggSuite withTempView("t1", "t2") { spark.range(100).selectExpr("id as key").createOrReplaceTempView("t1") spark.range(50).selectExpr("id as key").createOrReplaceTempView("t2") - Seq(("COUNT", 0, 1, 2, 0), ("COLLECT_LIST", 2, 0, 2, 0)).foreach { + Seq(("COLLECT_LIST", 2, 0, 2, 0)).foreach { aggExprInfo => val query = s""" diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala index 332c21418a9b9..342a570a50d10 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.execution import org.apache.gluten.execution.HashAggregateExecBaseTransformer +import org.apache.spark.SparkConf import org.apache.spark.sql.{DataFrame, GlutenSQLTestsBaseTrait} import org.apache.spark.sql.execution.aggregate.{ObjectHashAggregateExec, SortAggregateExec} import org.apache.spark.sql.internal.SQLConf @@ -26,6 +27,11 @@ class GlutenReplaceHashWithSortAggSuite extends ReplaceHashWithSortAggSuite with GlutenSQLTestsBaseTrait { + override def sparkConf: SparkConf = { + super.sparkConf + .set("spark.gluten.sql.mergeTwoPhasesAggregate.enabled", "false") + } + private def checkNumAggs(df: DataFrame, hashAggCount: Int, sortAggCount: Int): Unit = { val plan = df.queryExecution.executedPlan assert(collectWithSubqueries(plan) { diff --git a/gluten-ut/test/src/test/scala/org/apache/gluten/execution/VeloxMergeTwoPhasesHashBaseAggregateSuite.scala b/gluten-ut/test/src/test/scala/org/apache/gluten/execution/VeloxMergeTwoPhasesHashBaseAggregateSuite.scala new file mode 100644 index 0000000000000..194a1d0b9c3c7 --- /dev/null +++ b/gluten-ut/test/src/test/scala/org/apache/gluten/execution/VeloxMergeTwoPhasesHashBaseAggregateSuite.scala @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.gluten.execution + +import org.apache.spark.SparkConf +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.execution.adaptive.{DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite} + +abstract class BaseMergeTwoPhasesHashBaseAggregateSuite extends WholeStageTransformerSuite { + val fileFormat: String = "parquet" + override protected val resourcePath: String = "/tpch-data-parquet" + + override def beforeAll(): Unit = { + super.beforeAll() + + spark + .sql(s""" + |CREATE TABLE t1 (id int, age int, phone int, date string) + |USING $fileFormat + |PARTITIONED BY (date) + |""".stripMargin) + .show() + + spark + .sql(s""" + |INSERT INTO t1 PARTITION(date = '2020-01-01') + |SELECT id, id % 10 as age, id % 10 as phone + |FROM range(100) + |""".stripMargin) + .show() + } + + override protected def sparkConf: SparkConf = { + super.sparkConf + .set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.ColumnarShuffleManager") + .set("spark.sql.files.maxPartitionBytes", "1g") + .set("spark.sql.shuffle.partitions", "1") + .set("spark.memory.offHeap.size", "2g") + .set("spark.gluten.ras.enabled", "false") + .set("spark.gluten.sql.mergeTwoPhasesAggregate.enabled", "true") + } + + override def afterAll(): Unit = { + try { + sql("DROP TABLE IF EXISTS t1") + } finally { + super.afterAll() + } + } + + test("Merge two phase hash-based aggregate into one aggregate") { + def checkHashAggregateCount(df: DataFrame, expectedCount: Int): Unit = { + df.collect() + val plans = collect(df.queryExecution.executedPlan) { + case agg: HashAggregateExecBaseTransformer => agg + } + assert(plans.size == expectedCount) + } + + withTempView("v1") { + spark.range(100).selectExpr("id as key").createOrReplaceTempView("v1") + // no exchange hash aggregate, merge to one hash aggregate + checkHashAggregateCount( + spark.sql(""" + |SELECT key, count(key) + |FROM v1 + |GROUP BY key + |""".stripMargin), + 1 + ) + + // with filter hash aggregate + checkHashAggregateCount( + spark.sql(""" + |SELECT key, count(key) FILTER (WHERE key LIKE '%1%') AS pc2 + |FROM v1 + |GROUP BY key + |""".stripMargin), + 2 + ) + } + + // with exchange hash aggregate + checkHashAggregateCount( + spark.sql(""" + |SELECT count(1) FROM t1 + |""".stripMargin), + 2) + } + + test("Merge two phase object-based aggregate into one aggregate") { + def checkObjectAggregateCount(df: DataFrame, expectedCount: Int): Unit = { + df.collect() + val plans = collect(df.queryExecution.executedPlan) { + case agg: HashAggregateExecBaseTransformer => agg + } + assert(plans.size == expectedCount) + } + + withTempView("v1") { + spark.range(100).selectExpr("id as key").createOrReplaceTempView("v1") + // no exchange object aggregate, merge to one hash aggregate + checkObjectAggregateCount( + spark.sql(""" + |SELECT key, collect_list(key) + |FROM v1 + |GROUP BY key + |""".stripMargin), + 1 + ) + + // with filter object aggregate + checkObjectAggregateCount( + spark.sql(""" + |SELECT key, collect_list(key) FILTER (WHERE key LIKE '%1%') AS pc2 + |FROM v1 + |GROUP BY key + |""".stripMargin), + 2 + ) + } + + // with exchange object aggregate + checkObjectAggregateCount( + spark.sql(""" + |SELECT collect_list(id) FROM t1 + |""".stripMargin), + 2) + } + + test("Merge two phase sort-based aggregate into one aggregate") { + def checkSortAggregateCount(df: DataFrame, expectedCount: Int): Unit = { + df.collect() + val plans = collect(df.queryExecution.executedPlan) { + case agg: HashAggregateExecBaseTransformer => agg + } + assert(plans.size == expectedCount) + } + + withSQLConf("spark.sql.test.forceApplySortAggregate" -> "true") { + withTempView("v1") { + spark.range(100).selectExpr("id as key").createOrReplaceTempView("v1") + // no exchange sort aggregate, merge to one hash aggregate + checkSortAggregateCount( + spark.sql(""" + |SELECT sum(if(key<0,0,key)) + |FROM v1 + |GROUP BY key + |""".stripMargin), + 1 + ) + + // with filter sort aggregate + checkSortAggregateCount( + spark.sql(""" + |SELECT key, sum(if(key<0,0,key)) FILTER (WHERE key LIKE '%1%') AS pc2 + |FROM v1 + |GROUP BY key + |""".stripMargin), + 2 + ) + } + + // with exchange sort aggregate + checkSortAggregateCount( + spark.sql(""" + |SELECT sum(if(id<0,0,id)) FROM t1 + |""".stripMargin), + 2) + } + } +} + +class MergeTwoPhasesAggregateSuiteAEOn + extends BaseMergeTwoPhasesHashBaseAggregateSuite + with EnableAdaptiveExecutionSuite + +class MergeTwoPhasesAggregateSuiteAEOff + extends BaseMergeTwoPhasesHashBaseAggregateSuite + with DisableAdaptiveExecutionSuite diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala index 3e491eb2753f3..3701fe92e1416 100644 --- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala +++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala @@ -56,6 +56,8 @@ class GlutenConfig(conf: SQLConf) extends Logging { def forceToUseHashAgg: Boolean = conf.getConf(COLUMNAR_FORCE_HASHAGG_ENABLED) + def mergeTwoPhasesAggEnabled: Boolean = conf.getConf(MERGE_TWO_PHASES_ENABLED) + def enableColumnarProject: Boolean = conf.getConf(COLUMNAR_PROJECT_ENABLED) def enableColumnarFilter: Boolean = conf.getConf(COLUMNAR_FILTER_ENABLED) @@ -878,6 +880,13 @@ object GlutenConfig { .booleanConf .createWithDefault(true) + val MERGE_TWO_PHASES_ENABLED = + buildConf("spark.gluten.sql.mergeTwoPhasesAggregate.enabled") + .internal() + .doc("Whether to merge two phases aggregate if there are no other operators between them.") + .booleanConf + .createWithDefault(true) + val COLUMNAR_PROJECT_ENABLED = buildConf("spark.gluten.sql.columnar.project") .internal()