From 67fd737bdacd6c42b3999fea88e95df4719b8c9c Mon Sep 17 00:00:00 2001 From: ulysses Date: Fri, 13 Nov 2020 17:17:04 +0800 Subject: [PATCH 01/16] init --- .../sql/catalyst/optimizer/Optimizer.scala | 22 ++++++++++++++----- .../optimizer/CombiningLimitsSuite.scala | 22 +++++++++++++++++-- .../optimizer/LimitPushdownSuite.scala | 2 +- 3 files changed, 38 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index e492d01650097..b6d4819b09280 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -85,7 +85,7 @@ abstract class Optimizer(catalogManager: CatalogManager) OptimizeWindowFunctions, CollapseWindow, CombineFilters, - CombineLimits, + EliminateLimits, CombineUnions, // Constant folding and strength reduction TransposeWindow, @@ -1452,11 +1452,23 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper { } /** - * Combines two adjacent [[Limit]] operators into one, merging the - * expressions into one single expression. + * 1. Eliminate [[Limit]] operators if it's child max row <= limit. + * 2. Combines two adjacent [[Limit]] operators into one, merging the + * expressions into one single expression. */ -object CombineLimits extends Rule[LogicalPlan] { - def apply(plan: LogicalPlan): LogicalPlan = plan transform { +object EliminateLimits extends Rule[LogicalPlan] { + private def canEliminate(limitExpr: Expression, childMaxRow: Option[Long]): Boolean = { + limitExpr.foldable && + childMaxRow.isDefined && + childMaxRow.get <= limitExpr.eval().toString.toInt + } + + def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { + case GlobalLimit(l, child) if canEliminate(l, child.maxRows) => + child + case LocalLimit(l, child) if canEliminate(l, child.maxRows) => + child + case GlobalLimit(le, GlobalLimit(ne, grandChild)) => GlobalLimit(Least(Seq(ne, le)), grandChild) case LocalLimit(le, LocalLimit(ne, grandChild)) => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala index b190dd5a7c220..ad8c0f75f8617 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala @@ -30,8 +30,8 @@ class CombiningLimitsSuite extends PlanTest { Batch("Column Pruning", FixedPoint(100), ColumnPruning, RemoveNoopOperators) :: - Batch("Combine Limit", FixedPoint(10), - CombineLimits) :: + Batch("Eliminate Limit", FixedPoint(10), + EliminateLimits) :: Batch("Constant Folding", FixedPoint(10), NullPropagation, ConstantFolding, @@ -90,4 +90,22 @@ class CombiningLimitsSuite extends PlanTest { comparePlans(optimized, correctAnswer) } + + test("SPARK-33442: Change Combine Limit to Eliminate limit using max row") { + // test child max row <= limit. + val query1 = testRelation.select().groupBy()(count(1)).limit(1).analyze + val optimized1 = Optimize.execute(query1) + val expected1 = testRelation.select().groupBy()(count(1)).analyze + comparePlans(optimized1, expected1) + + // test child max row > limit. + val query2 = testRelation.select().groupBy()(count(1)).limit(0).analyze + val optimized2 = Optimize.execute(query2) + comparePlans(optimized2, query2) + + // test child max row is none + val query3 = testRelation.select(Symbol("a")).limit(1).analyze + val optimized3 = Optimize.execute(query3) + comparePlans(optimized3, query3) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala index d993aee3d7518..a2efc43fb5bb5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala @@ -33,7 +33,7 @@ class LimitPushdownSuite extends PlanTest { EliminateSubqueryAliases) :: Batch("Limit pushdown", FixedPoint(100), LimitPushDown, - CombineLimits, + EliminateLimits, ConstantFolding, BooleanSimplification) :: Nil } From 46aea2e4ee2ed6fef35ffeaa03e03d9bbcd7293d Mon Sep 17 00:00:00 2001 From: ulysses Date: Fri, 13 Nov 2020 20:29:55 +0800 Subject: [PATCH 02/16] fix LimitPushdownSuite --- .../spark/sql/catalyst/optimizer/LimitPushdownSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala index a2efc43fb5bb5..e365e3300096e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala @@ -74,7 +74,7 @@ class LimitPushdownSuite extends PlanTest { Union(testRelation.limit(1), testRelation2.select('d, 'e, 'f).limit(1)).limit(2) val unionOptimized = Optimize.execute(unionQuery.analyze) val unionCorrectAnswer = - Limit(2, Union(testRelation.limit(1), testRelation2.select('d, 'e, 'f).limit(1))).analyze + Union(testRelation.limit(1), testRelation2.select('d, 'e, 'f).limit(1)).analyze comparePlans(unionOptimized, unionCorrectAnswer) } From 0a893347e93fe114d3ce273006dde878bf0e8cc3 Mon Sep 17 00:00:00 2001 From: ulysses Date: Fri, 13 Nov 2020 21:43:47 +0800 Subject: [PATCH 03/16] keep global limit --- .../org/apache/spark/sql/catalyst/optimizer/Optimizer.scala | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index b6d4819b09280..2314f9913ac94 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -1458,13 +1458,11 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper { */ object EliminateLimits extends Rule[LogicalPlan] { private def canEliminate(limitExpr: Expression, childMaxRow: Option[Long]): Boolean = { - limitExpr.foldable && - childMaxRow.isDefined && - childMaxRow.get <= limitExpr.eval().toString.toInt + limitExpr.foldable && childMaxRow.exists { _ <= limitExpr.eval().toString.toInt } } def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { - case GlobalLimit(l, child) if canEliminate(l, child.maxRows) => + case Limit(l, child) if canEliminate(l, child.maxRows) => child case LocalLimit(l, child) if canEliminate(l, child.maxRows) => child From 2c6b171e1a604400fa89e915d9908bdefd0eab94 Mon Sep 17 00:00:00 2001 From: ulysses Date: Fri, 13 Nov 2020 22:12:19 +0800 Subject: [PATCH 04/16] use long --- .../org/apache/spark/sql/catalyst/optimizer/Optimizer.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 2314f9913ac94..53221bda1b7ca 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -1458,7 +1458,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper { */ object EliminateLimits extends Rule[LogicalPlan] { private def canEliminate(limitExpr: Expression, childMaxRow: Option[Long]): Boolean = { - limitExpr.foldable && childMaxRow.exists { _ <= limitExpr.eval().toString.toInt } + limitExpr.foldable && childMaxRow.exists { _ <= limitExpr.eval().toString.toLong } } def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { From e570b8e26338990f237ea1ae10cd8aadeb178ee4 Mon Sep 17 00:00:00 2001 From: ulysses Date: Sat, 14 Nov 2020 08:51:56 +0800 Subject: [PATCH 05/16] fix StreamSuite --- .../test/scala/org/apache/spark/sql/streaming/StreamSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala index 8797e5ad64149..e64d5f6f3587e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala @@ -1134,7 +1134,7 @@ class StreamSuite extends StreamTest { verifyLocalLimit(inputDF.toDF("value").join(staticDF, "value"), expectStreamingLimit = false) verifyLocalLimit( - inputDF.groupBy().count().limit(1), + inputDF.groupBy("value").count().limit(1), expectStreamingLimit = false, outputMode = OutputMode.Complete()) } From 314cb1e3449ee4328e44f94c7216f7c6917589e6 Mon Sep 17 00:00:00 2001 From: ulysses Date: Sat, 14 Nov 2020 21:48:33 +0800 Subject: [PATCH 06/16] golden file --- .../approved-plans-v1_4/q16/explain.txt | 6 +- .../approved-plans-v1_4/q16/simplified.txt | 4 +- .../approved-plans-v1_4/q23a/explain.txt | 239 ++++++++------- .../approved-plans-v1_4/q23a/simplified.txt | 273 +++++++++--------- .../approved-plans-v1_4/q38/explain.txt | 113 ++++---- .../approved-plans-v1_4/q38/simplified.txt | 125 ++++---- .../approved-plans-v1_4/q92/explain.txt | 6 +- .../approved-plans-v1_4/q92/simplified.txt | 4 +- .../approved-plans-v1_4/q94/explain.txt | 6 +- .../approved-plans-v1_4/q94/simplified.txt | 4 +- .../approved-plans-v1_4/q95/explain.txt | 6 +- .../approved-plans-v1_4/q95/simplified.txt | 4 +- .../approved-plans-v1_4/q96/explain.txt | 6 +- .../approved-plans-v1_4/q96/simplified.txt | 4 +- .../approved-plans-v1_4/q97/explain.txt | 63 ++-- .../approved-plans-v1_4/q97/simplified.txt | 91 +++--- 16 files changed, 468 insertions(+), 486 deletions(-) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt index 2ae939cfe41f3..ea7e298393e4c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (41) +* Sort (41) +- * HashAggregate (40) +- Exchange (39) +- * HashAggregate (38) @@ -229,7 +229,7 @@ Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23, count(cs_order_number#5)#27] Results [3]: [count(cs_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#22,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#23,17,2) AS total net profit #32] -(41) TakeOrderedAndProject +(41) Sort [codegen id : 8] Input [3]: [order count #30, total shipping cost #31, total net profit #32] -Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32] +Arguments: [order count #30 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt index a044b05365f8e..169f07c2d85e5 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (8) +WholeStageCodegen (8) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt index 6d2b5b0013d8f..15ae5bfe24303 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt @@ -1,76 +1,75 @@ == Physical Plan == -CollectLimit (72) -+- * HashAggregate (71) - +- Exchange (70) - +- * HashAggregate (69) - +- Union (68) - :- * Project (51) - : +- * BroadcastHashJoin Inner BuildRight (50) - : :- * Project (44) - : : +- * BroadcastHashJoin LeftSemi BuildRight (43) - : : :- * Project (27) - : : : +- * BroadcastHashJoin LeftSemi BuildRight (26) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.catalog_sales (1) - : : : +- BroadcastExchange (25) - : : : +- * Project (24) - : : : +- * Filter (23) - : : : +- * HashAggregate (22) - : : : +- Exchange (21) - : : : +- * HashAggregate (20) - : : : +- * Project (19) - : : : +- * BroadcastHashJoin Inner BuildRight (18) - : : : :- * Project (13) - : : : : +- * BroadcastHashJoin Inner BuildRight (12) - : : : : :- * Filter (6) - : : : : : +- * ColumnarToRow (5) - : : : : : +- Scan parquet default.store_sales (4) - : : : : +- BroadcastExchange (11) - : : : : +- * Project (10) - : : : : +- * Filter (9) - : : : : +- * ColumnarToRow (8) - : : : : +- Scan parquet default.date_dim (7) - : : : +- BroadcastExchange (17) - : : : +- * Filter (16) - : : : +- * ColumnarToRow (15) - : : : +- Scan parquet default.item (14) - : : +- BroadcastExchange (42) - : : +- * Project (41) - : : +- * Filter (40) - : : +- * HashAggregate (39) - : : +- Exchange (38) - : : +- * HashAggregate (37) - : : +- * Project (36) - : : +- * BroadcastHashJoin Inner BuildRight (35) - : : :- * Filter (30) - : : : +- * ColumnarToRow (29) - : : : +- Scan parquet default.store_sales (28) - : : +- BroadcastExchange (34) - : : +- * Filter (33) - : : +- * ColumnarToRow (32) - : : +- Scan parquet default.customer (31) - : +- BroadcastExchange (49) - : +- * Project (48) - : +- * Filter (47) - : +- * ColumnarToRow (46) - : +- Scan parquet default.date_dim (45) - +- * Project (67) - +- * BroadcastHashJoin Inner BuildRight (66) - :- * Project (64) - : +- * BroadcastHashJoin LeftSemi BuildRight (63) - : :- * Project (57) - : : +- * BroadcastHashJoin LeftSemi BuildRight (56) - : : :- * Filter (54) - : : : +- * ColumnarToRow (53) - : : : +- Scan parquet default.web_sales (52) - : : +- ReusedExchange (55) - : +- BroadcastExchange (62) - : +- * Project (61) - : +- * Filter (60) - : +- * HashAggregate (59) - : +- ReusedExchange (58) - +- ReusedExchange (65) +* HashAggregate (71) ++- Exchange (70) + +- * HashAggregate (69) + +- Union (68) + :- * Project (51) + : +- * BroadcastHashJoin Inner BuildRight (50) + : :- * Project (44) + : : +- * BroadcastHashJoin LeftSemi BuildRight (43) + : : :- * Project (27) + : : : +- * BroadcastHashJoin LeftSemi BuildRight (26) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.catalog_sales (1) + : : : +- BroadcastExchange (25) + : : : +- * Project (24) + : : : +- * Filter (23) + : : : +- * HashAggregate (22) + : : : +- Exchange (21) + : : : +- * HashAggregate (20) + : : : +- * Project (19) + : : : +- * BroadcastHashJoin Inner BuildRight (18) + : : : :- * Project (13) + : : : : +- * BroadcastHashJoin Inner BuildRight (12) + : : : : :- * Filter (6) + : : : : : +- * ColumnarToRow (5) + : : : : : +- Scan parquet default.store_sales (4) + : : : : +- BroadcastExchange (11) + : : : : +- * Project (10) + : : : : +- * Filter (9) + : : : : +- * ColumnarToRow (8) + : : : : +- Scan parquet default.date_dim (7) + : : : +- BroadcastExchange (17) + : : : +- * Filter (16) + : : : +- * ColumnarToRow (15) + : : : +- Scan parquet default.item (14) + : : +- BroadcastExchange (42) + : : +- * Project (41) + : : +- * Filter (40) + : : +- * HashAggregate (39) + : : +- Exchange (38) + : : +- * HashAggregate (37) + : : +- * Project (36) + : : +- * BroadcastHashJoin Inner BuildRight (35) + : : :- * Filter (30) + : : : +- * ColumnarToRow (29) + : : : +- Scan parquet default.store_sales (28) + : : +- BroadcastExchange (34) + : : +- * Filter (33) + : : +- * ColumnarToRow (32) + : : +- Scan parquet default.customer (31) + : +- BroadcastExchange (49) + : +- * Project (48) + : +- * Filter (47) + : +- * ColumnarToRow (46) + : +- Scan parquet default.date_dim (45) + +- * Project (67) + +- * BroadcastHashJoin Inner BuildRight (66) + :- * Project (64) + : +- * BroadcastHashJoin LeftSemi BuildRight (63) + : :- * Project (57) + : : +- * BroadcastHashJoin LeftSemi BuildRight (56) + : : :- * Filter (54) + : : : +- * ColumnarToRow (53) + : : : +- Scan parquet default.web_sales (52) + : : +- ReusedExchange (55) + : +- BroadcastExchange (62) + : +- * Project (61) + : +- * Filter (60) + : +- * HashAggregate (59) + : +- ReusedExchange (58) + +- ReusedExchange (65) (1) Scan parquet default.catalog_sales @@ -398,139 +397,135 @@ Functions [1]: [sum(sales#40)] Aggregate Attributes [1]: [sum(sales#40)#57] Results [1]: [sum(sales#40)#57 AS sum(sales)#58] -(72) CollectLimit -Input [1]: [sum(sales)#58] -Arguments: 100 - ===== Subqueries ===== Subquery:1 Hosting operator id = 40 Hosting Expression = Subquery scalar-subquery#35, [id=#36] -* HashAggregate (94) -+- Exchange (93) - +- * HashAggregate (92) - +- * HashAggregate (91) - +- Exchange (90) - +- * HashAggregate (89) - +- * Project (88) - +- * BroadcastHashJoin Inner BuildRight (87) - :- * Project (81) - : +- * BroadcastHashJoin Inner BuildRight (80) - : :- * Filter (75) - : : +- * ColumnarToRow (74) - : : +- Scan parquet default.store_sales (73) - : +- BroadcastExchange (79) - : +- * Filter (78) - : +- * ColumnarToRow (77) - : +- Scan parquet default.customer (76) - +- BroadcastExchange (86) - +- * Project (85) - +- * Filter (84) - +- * ColumnarToRow (83) - +- Scan parquet default.date_dim (82) - - -(73) Scan parquet default.store_sales +* HashAggregate (93) ++- Exchange (92) + +- * HashAggregate (91) + +- * HashAggregate (90) + +- Exchange (89) + +- * HashAggregate (88) + +- * Project (87) + +- * BroadcastHashJoin Inner BuildRight (86) + :- * Project (80) + : +- * BroadcastHashJoin Inner BuildRight (79) + : :- * Filter (74) + : : +- * ColumnarToRow (73) + : : +- Scan parquet default.store_sales (72) + : +- BroadcastExchange (78) + : +- * Filter (77) + : +- * ColumnarToRow (76) + : +- Scan parquet default.customer (75) + +- BroadcastExchange (85) + +- * Project (84) + +- * Filter (83) + +- * ColumnarToRow (82) + +- Scan parquet default.date_dim (81) + + +(72) Scan parquet default.store_sales Output [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] ReadSchema: struct -(74) ColumnarToRow [codegen id : 3] +(73) ColumnarToRow [codegen id : 3] Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] -(75) Filter [codegen id : 3] +(74) Filter [codegen id : 3] Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25] Condition : (isnotnull(ss_customer_sk#23) AND isnotnull(ss_sold_date_sk#6)) -(76) Scan parquet default.customer +(75) Scan parquet default.customer Output [1]: [c_customer_sk#26] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct -(77) ColumnarToRow [codegen id : 1] +(76) ColumnarToRow [codegen id : 1] Input [1]: [c_customer_sk#26] -(78) Filter [codegen id : 1] +(77) Filter [codegen id : 1] Input [1]: [c_customer_sk#26] Condition : isnotnull(c_customer_sk#26) -(79) BroadcastExchange +(78) BroadcastExchange Input [1]: [c_customer_sk#26] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#59] -(80) BroadcastHashJoin [codegen id : 3] +(79) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_customer_sk#23] Right keys [1]: [c_customer_sk#26] Join condition: None -(81) Project [codegen id : 3] +(80) Project [codegen id : 3] Output [4]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] Input [5]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25, c_customer_sk#26] -(82) Scan parquet default.date_dim +(81) Scan parquet default.date_dim Output [2]: [d_date_sk#8, d_year#10] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] ReadSchema: struct -(83) ColumnarToRow [codegen id : 2] +(82) ColumnarToRow [codegen id : 2] Input [2]: [d_date_sk#8, d_year#10] -(84) Filter [codegen id : 2] +(83) Filter [codegen id : 2] Input [2]: [d_date_sk#8, d_year#10] Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8)) -(85) Project [codegen id : 2] +(84) Project [codegen id : 2] Output [1]: [d_date_sk#8] Input [2]: [d_date_sk#8, d_year#10] -(86) BroadcastExchange +(85) BroadcastExchange Input [1]: [d_date_sk#8] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#60] -(87) BroadcastHashJoin [codegen id : 3] +(86) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_sold_date_sk#6] Right keys [1]: [d_date_sk#8] Join condition: None -(88) Project [codegen id : 3] +(87) Project [codegen id : 3] Output [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] Input [5]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26, d_date_sk#8] -(89) HashAggregate [codegen id : 3] +(88) HashAggregate [codegen id : 3] Input [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26] Keys [1]: [c_customer_sk#26] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] Aggregate Attributes [2]: [sum#61, isEmpty#62] Results [3]: [c_customer_sk#26, sum#63, isEmpty#64] -(90) Exchange +(89) Exchange Input [3]: [c_customer_sk#26, sum#63, isEmpty#64] Arguments: hashpartitioning(c_customer_sk#26, 5), true, [id=#65] -(91) HashAggregate [codegen id : 4] +(90) HashAggregate [codegen id : 4] Input [3]: [c_customer_sk#26, sum#63, isEmpty#64] Keys [1]: [c_customer_sk#26] Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#66] Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#66 AS csales#67] -(92) HashAggregate [codegen id : 4] +(91) HashAggregate [codegen id : 4] Input [1]: [csales#67] Keys: [] Functions [1]: [partial_max(csales#67)] Aggregate Attributes [1]: [max#68] Results [1]: [max#69] -(93) Exchange +(92) Exchange Input [1]: [max#69] Arguments: SinglePartition, true, [id=#70] -(94) HashAggregate [codegen id : 5] +(93) HashAggregate [codegen id : 5] Input [1]: [max#69] Keys: [] Functions [1]: [max(csales#67)] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt index d860e18574f2a..aebe2bd3e1a6c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt @@ -1,143 +1,142 @@ -CollectLimit - WholeStageCodegen (20) - HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty] - InputAdapter - Exchange #1 - WholeStageCodegen (19) - HashAggregate [sales] [sum,isEmpty,sum,isEmpty] - InputAdapter - Union - WholeStageCodegen (9) - Project [cs_quantity,cs_list_price] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_quantity,cs_list_price] - BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] - Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] - BroadcastHashJoin [cs_item_sk,item_sk] - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen (4) - Project [item_sk] - Filter [count(1)] - HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] - InputAdapter - Exchange [substr(i_item_desc, 1, 30),i_item_sk,d_date] #3 - WholeStageCodegen (3) - HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] - Project [d_date,i_item_sk,i_item_desc] - BroadcastHashJoin [ss_item_sk,i_item_sk] - Project [ss_item_sk,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_item_sk] +WholeStageCodegen (20) + HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty] + InputAdapter + Exchange #1 + WholeStageCodegen (19) + HashAggregate [sales] [sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (9) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_quantity,cs_list_price] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] + BroadcastHashJoin [cs_item_sk,item_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (4) + Project [item_sk] + Filter [count(1)] + HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] + InputAdapter + Exchange [substr(i_item_desc, 1, 30),i_item_sk,d_date] #3 + WholeStageCodegen (3) + HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] + Project [d_date,i_item_sk,i_item_desc] + BroadcastHashJoin [ss_item_sk,i_item_sk] + Project [ss_item_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date,d_year] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (2) + Filter [i_item_sk] ColumnarToRow InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen (1) - Project [d_date_sk,d_date] - Filter [d_year,d_date_sk] + Scan parquet default.item [i_item_sk,i_item_desc] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (7) + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #1 + WholeStageCodegen (5) + HashAggregate [max] [max(csales),tpcds_cmax,max] + InputAdapter + Exchange #9 + WholeStageCodegen (4) + HashAggregate [csales] [max,max] + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty] + InputAdapter + Exchange [c_customer_sk] #10 + WholeStageCodegen (3) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Project [ss_sold_date_sk,ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Filter [ss_customer_sk,ss_sold_date_sk] ColumnarToRow InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date,d_year] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (2) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_desc] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (7) - Project [c_customer_sk] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] - Subquery #1 - WholeStageCodegen (5) - HashAggregate [max] [max(csales),tpcds_cmax,max] - InputAdapter - Exchange #9 - WholeStageCodegen (4) - HashAggregate [csales] [max,max] - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty] - InputAdapter - Exchange [c_customer_sk] #10 - WholeStageCodegen (3) - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] - Project [ss_quantity,ss_sales_price,c_customer_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Project [ss_sold_date_sk,ss_quantity,ss_sales_price,c_customer_sk] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Filter [ss_customer_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] - InputAdapter - BroadcastExchange #11 - WholeStageCodegen (1) - Filter [c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk] - InputAdapter - BroadcastExchange #12 - WholeStageCodegen (2) - Project [d_date_sk] - Filter [d_year,d_date_sk] + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (1) + Filter [c_customer_sk] ColumnarToRow InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] - InputAdapter - Exchange [c_customer_sk] #7 - WholeStageCodegen (6) - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] - Project [ss_quantity,ss_sales_price,c_customer_sk] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Filter [ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] - InputAdapter - BroadcastExchange #8 - WholeStageCodegen (5) - Filter [c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk] - InputAdapter - BroadcastExchange #13 - WholeStageCodegen (8) - Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - WholeStageCodegen (18) - Project [ws_quantity,ws_list_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_quantity,ws_list_price] - BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] - Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] - BroadcastHashJoin [ws_item_sk,item_sk] - Filter [ws_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] - InputAdapter - ReusedExchange [item_sk] #2 - InputAdapter - BroadcastExchange #14 - WholeStageCodegen (16) - Project [c_customer_sk] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [tpcds_cmax] #1 - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] - InputAdapter - ReusedExchange [c_customer_sk,sum,isEmpty] #7 - InputAdapter - ReusedExchange [d_date_sk] #13 + Scan parquet default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #12 + WholeStageCodegen (2) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] + InputAdapter + Exchange [c_customer_sk] #7 + WholeStageCodegen (6) + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] + InputAdapter + BroadcastExchange #8 + WholeStageCodegen (5) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (8) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (18) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] + BroadcastHashJoin [ws_item_sk,item_sk] + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] + InputAdapter + ReusedExchange [item_sk] #2 + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (16) + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] + InputAdapter + ReusedExchange [c_customer_sk,sum,isEmpty] #7 + InputAdapter + ReusedExchange [d_date_sk] #13 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt index 09ab60c7cf651..74454cf32afd0 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt @@ -1,59 +1,58 @@ == Physical Plan == -CollectLimit (55) -+- * HashAggregate (54) - +- Exchange (53) - +- * HashAggregate (52) - +- * HashAggregate (51) - +- * HashAggregate (50) - +- * HashAggregate (49) - +- * HashAggregate (48) - +- * HashAggregate (47) - +- Exchange (46) - +- * HashAggregate (45) - +- * BroadcastHashJoin LeftSemi BuildRight (44) - :- * BroadcastHashJoin LeftSemi BuildRight (30) - : :- * Project (16) - : : +- * BroadcastHashJoin Inner BuildRight (15) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- BroadcastExchange (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.customer (11) - : +- BroadcastExchange (29) - : +- * HashAggregate (28) - : +- Exchange (27) - : +- * HashAggregate (26) - : +- * Project (25) - : +- * BroadcastHashJoin Inner BuildRight (24) - : :- * Project (22) - : : +- * BroadcastHashJoin Inner BuildRight (21) - : : :- * Filter (19) - : : : +- * ColumnarToRow (18) - : : : +- Scan parquet default.catalog_sales (17) - : : +- ReusedExchange (20) - : +- ReusedExchange (23) - +- BroadcastExchange (43) - +- * HashAggregate (42) - +- Exchange (41) - +- * HashAggregate (40) - +- * Project (39) - +- * BroadcastHashJoin Inner BuildRight (38) - :- * Project (36) - : +- * BroadcastHashJoin Inner BuildRight (35) - : :- * Filter (33) - : : +- * ColumnarToRow (32) - : : +- Scan parquet default.web_sales (31) - : +- ReusedExchange (34) - +- ReusedExchange (37) +* HashAggregate (54) ++- Exchange (53) + +- * HashAggregate (52) + +- * HashAggregate (51) + +- * HashAggregate (50) + +- * HashAggregate (49) + +- * HashAggregate (48) + +- * HashAggregate (47) + +- Exchange (46) + +- * HashAggregate (45) + +- * BroadcastHashJoin LeftSemi BuildRight (44) + :- * BroadcastHashJoin LeftSemi BuildRight (30) + : :- * Project (16) + : : +- * BroadcastHashJoin Inner BuildRight (15) + : : :- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- BroadcastExchange (14) + : : +- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.customer (11) + : +- BroadcastExchange (29) + : +- * HashAggregate (28) + : +- Exchange (27) + : +- * HashAggregate (26) + : +- * Project (25) + : +- * BroadcastHashJoin Inner BuildRight (24) + : :- * Project (22) + : : +- * BroadcastHashJoin Inner BuildRight (21) + : : :- * Filter (19) + : : : +- * ColumnarToRow (18) + : : : +- Scan parquet default.catalog_sales (17) + : : +- ReusedExchange (20) + : +- ReusedExchange (23) + +- BroadcastExchange (43) + +- * HashAggregate (42) + +- Exchange (41) + +- * HashAggregate (40) + +- * Project (39) + +- * BroadcastHashJoin Inner BuildRight (38) + :- * Project (36) + : +- * BroadcastHashJoin Inner BuildRight (35) + : :- * Filter (33) + : : +- * ColumnarToRow (32) + : : +- Scan parquet default.web_sales (31) + : +- ReusedExchange (34) + +- ReusedExchange (37) (1) Scan parquet default.store_sales @@ -322,7 +321,3 @@ Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#33] Results [1]: [count(1)#33 AS count(1)#34] -(55) CollectLimit -Input [1]: [count(1)#34] -Arguments: 100 - diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt index 10a2166ce761d..a5b57a4ac9450 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt @@ -1,81 +1,80 @@ -CollectLimit - WholeStageCodegen (13) - HashAggregate [count] [count(1),count(1),count] - InputAdapter - Exchange #1 - WholeStageCodegen (12) - HashAggregate [count,count] +WholeStageCodegen (13) + HashAggregate [count] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen (12) + HashAggregate [count,count] + HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] - HashAggregate [c_last_name,c_first_name,d_date] - InputAdapter - Exchange [c_last_name,c_first_name,d_date] #2 - WholeStageCodegen (11) - HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #2 + WholeStageCodegen (11) + HashAggregate [c_last_name,c_first_name,d_date] + BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] - BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] - Project [d_date,c_first_name,c_last_name] - BroadcastHashJoin [ss_customer_sk,c_customer_sk] - Project [ss_customer_sk,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_customer_sk] + Project [d_date,c_first_name,c_last_name] + BroadcastHashJoin [ss_customer_sk,c_customer_sk] + Project [ss_customer_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (2) + Filter [c_customer_sk] ColumnarToRow InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (1) - Project [d_date_sk,d_date] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] - InputAdapter - BroadcastExchange #4 - WholeStageCodegen (2) - Filter [c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (6) - HashAggregate [c_last_name,c_first_name,d_date] - InputAdapter - Exchange [c_last_name,c_first_name,d_date] #6 - WholeStageCodegen (5) - HashAggregate [c_last_name,c_first_name,d_date] - Project [c_last_name,c_first_name,d_date] - BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] - Project [cs_bill_customer_sk,d_date] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk,cs_bill_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] - InputAdapter - ReusedExchange [d_date_sk,d_date] #3 - InputAdapter - ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] InputAdapter - BroadcastExchange #7 - WholeStageCodegen (10) + BroadcastExchange #5 + WholeStageCodegen (6) HashAggregate [c_last_name,c_first_name,d_date] InputAdapter - Exchange [c_last_name,c_first_name,d_date] #8 - WholeStageCodegen (9) + Exchange [c_last_name,c_first_name,d_date] #6 + WholeStageCodegen (5) HashAggregate [c_last_name,c_first_name,d_date] Project [c_last_name,c_first_name,d_date] - BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] - Project [ws_bill_customer_sk,d_date] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_bill_customer_sk] + BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk] + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk,cs_bill_customer_sk] ColumnarToRow InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] InputAdapter ReusedExchange [d_date_sk,d_date] #3 InputAdapter ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 + InputAdapter + BroadcastExchange #7 + WholeStageCodegen (10) + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #8 + WholeStageCodegen (9) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk] + Project [ws_bill_customer_sk,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #3 + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt index b17a48db8baac..8a441392f4165 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (34) +* Sort (34) +- * HashAggregate (33) +- Exchange (32) +- * HashAggregate (31) @@ -190,7 +190,7 @@ Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#3))] Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#3))#22] Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#3))#22,17,2) AS Excess Discount Amount #23] -(34) TakeOrderedAndProject +(34) Sort [codegen id : 7] Input [1]: [Excess Discount Amount #23] -Arguments: 100, [Excess Discount Amount #23 ASC NULLS FIRST], [Excess Discount Amount #23] +Arguments: [Excess Discount Amount #23 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt index 652b2e36cf781..1f24a7c964f20 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [Excess Discount Amount ] - WholeStageCodegen (7) +WholeStageCodegen (7) + Sort [Excess Discount Amount ] HashAggregate [sum] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt index a94e74f66b201..2abbe4f9b8390 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (41) +* Sort (41) +- * HashAggregate (40) +- Exchange (39) +- * HashAggregate (38) @@ -229,7 +229,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23, count(ws_order_number#5)#27] Results [3]: [count(ws_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#22,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#23,17,2) AS total net profit #32] -(41) TakeOrderedAndProject +(41) Sort [codegen id : 8] Input [3]: [order count #30, total shipping cost #31, total net profit #32] -Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32] +Arguments: [order count #30 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt index 9d30b998fe174..5e7d7db5c0a9e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (8) +WholeStageCodegen (8) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt index 3a24e83aff256..1cc99e296383f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (56) +* Sort (56) +- * HashAggregate (55) +- Exchange (54) +- * HashAggregate (53) @@ -312,7 +312,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#32] Results [3]: [count(ws_order_number#4)#32 AS order count #35, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#27,17,2) AS total shipping cost #36, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#28,17,2) AS total net profit #37] -(56) TakeOrderedAndProject +(56) Sort [codegen id : 11] Input [3]: [order count #35, total shipping cost #36, total net profit #37] -Arguments: 100, [order count #35 ASC NULLS FIRST], [order count #35, total shipping cost #36, total net profit #37] +Arguments: [order count #35 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt index 6d35311c810f5..191ff22c1961f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (11) +WholeStageCodegen (11) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt index 3561eff8f57ef..6729910d9cb4a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (28) +* Sort (28) +- * HashAggregate (27) +- Exchange (26) +- * HashAggregate (25) @@ -154,7 +154,7 @@ Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#17] Results [1]: [count(1)#17 AS count(1)#18] -(28) TakeOrderedAndProject +(28) Sort [codegen id : 5] Input [1]: [count(1)#18] -Arguments: 100, [count(1)#18 ASC NULLS FIRST], [count(1)#18] +Arguments: [count(1)#18 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt index b13f28bf69cfd..45400b6c512f4 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [count(1)] - WholeStageCodegen (5) +WholeStageCodegen (5) + Sort [count(1)] HashAggregate [count] [count(1),count(1),count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt index 0a2e88b5bc160..e904ad94dd8fa 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt @@ -1,34 +1,33 @@ == Physical Plan == -CollectLimit (30) -+- * HashAggregate (29) - +- Exchange (28) - +- * HashAggregate (27) - +- * Project (26) - +- SortMergeJoin FullOuter (25) - :- * Sort (14) - : +- * HashAggregate (13) - : +- Exchange (12) - : +- * HashAggregate (11) - : +- * Project (10) - : +- * BroadcastHashJoin Inner BuildRight (9) - : :- * Filter (3) - : : +- * ColumnarToRow (2) - : : +- Scan parquet default.store_sales (1) - : +- BroadcastExchange (8) - : +- * Project (7) - : +- * Filter (6) - : +- * ColumnarToRow (5) - : +- Scan parquet default.date_dim (4) - +- * Sort (24) - +- * HashAggregate (23) - +- Exchange (22) - +- * HashAggregate (21) - +- * Project (20) - +- * BroadcastHashJoin Inner BuildRight (19) - :- * Filter (17) - : +- * ColumnarToRow (16) - : +- Scan parquet default.catalog_sales (15) - +- ReusedExchange (18) +* HashAggregate (29) ++- Exchange (28) + +- * HashAggregate (27) + +- * Project (26) + +- SortMergeJoin FullOuter (25) + :- * Sort (14) + : +- * HashAggregate (13) + : +- Exchange (12) + : +- * HashAggregate (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- * Sort (24) + +- * HashAggregate (23) + +- Exchange (22) + +- * HashAggregate (21) + +- * Project (20) + +- * BroadcastHashJoin Inner BuildRight (19) + :- * Filter (17) + : +- * ColumnarToRow (16) + : +- Scan parquet default.catalog_sales (15) + +- ReusedExchange (18) (1) Scan parquet default.store_sales @@ -173,7 +172,3 @@ Functions [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer Aggregate Attributes [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25] Results [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23 AS store_only#26, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24 AS catalog_only#27, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25 AS store_and_catalog#28] -(30) CollectLimit -Input [3]: [store_only#26, catalog_only#27, store_and_catalog#28] -Arguments: 100 - diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt index bae48ec244faa..c5921a11cd889 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt @@ -1,46 +1,45 @@ -CollectLimit - WholeStageCodegen (8) - HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum] - InputAdapter - Exchange #1 - WholeStageCodegen (7) - HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum] - Project [customer_sk,customer_sk] - InputAdapter - SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] - WholeStageCodegen (3) - Sort [customer_sk,item_sk] - HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] - InputAdapter - Exchange [ss_customer_sk,ss_item_sk] #2 - WholeStageCodegen (2) - HashAggregate [ss_customer_sk,ss_item_sk] - Project [ss_item_sk,ss_customer_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] - WholeStageCodegen (6) - Sort [customer_sk,item_sk] - HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] - InputAdapter - Exchange [cs_bill_customer_sk,cs_item_sk] #4 - WholeStageCodegen (5) - HashAggregate [cs_bill_customer_sk,cs_item_sk] - Project [cs_bill_customer_sk,cs_item_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] - InputAdapter - ReusedExchange [d_date_sk] #3 +WholeStageCodegen (8) + HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (7) + HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum] + Project [customer_sk,customer_sk] + InputAdapter + SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] + WholeStageCodegen (3) + Sort [customer_sk,item_sk] + HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [ss_customer_sk,ss_item_sk] #2 + WholeStageCodegen (2) + HashAggregate [ss_customer_sk,ss_item_sk] + Project [ss_item_sk,ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + WholeStageCodegen (6) + Sort [customer_sk,item_sk] + HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk] #4 + WholeStageCodegen (5) + HashAggregate [cs_bill_customer_sk,cs_item_sk] + Project [cs_bill_customer_sk,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 From c167a0dee6994984532d51d95badbab6e00a317f Mon Sep 17 00:00:00 2001 From: ulysses Date: Sat, 14 Nov 2020 21:51:37 +0800 Subject: [PATCH 07/16] remove eliminate local limit --- .../org/apache/spark/sql/catalyst/optimizer/Optimizer.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 53221bda1b7ca..ac3ad32dc55d9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -1464,8 +1464,6 @@ object EliminateLimits extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { case Limit(l, child) if canEliminate(l, child.maxRows) => child - case LocalLimit(l, child) if canEliminate(l, child.maxRows) => - child case GlobalLimit(le, GlobalLimit(ne, grandChild)) => GlobalLimit(Least(Seq(ne, le)), grandChild) From ff1ab01888f13f829c78e6d4da53b94c2adc6d16 Mon Sep 17 00:00:00 2001 From: ulysses Date: Sun, 15 Nov 2020 10:55:54 +0800 Subject: [PATCH 08/16] golden file --- .../approved-plans-v1_4/q16.sf100/explain.txt | 6 +- .../q16.sf100/simplified.txt | 4 +- .../q23a.sf100/explain.txt | 287 +++++++------ .../q23a.sf100/simplified.txt | 377 +++++++++--------- .../approved-plans-v1_4/q38.sf100/explain.txt | 139 ++++--- .../q38.sf100/simplified.txt | 189 +++++---- .../approved-plans-v1_4/q92.sf100/explain.txt | 6 +- .../q92.sf100/simplified.txt | 4 +- .../approved-plans-v1_4/q94.sf100/explain.txt | 6 +- .../q94.sf100/simplified.txt | 4 +- .../approved-plans-v1_4/q95.sf100/explain.txt | 6 +- .../q95.sf100/simplified.txt | 4 +- .../approved-plans-v1_4/q96.sf100/explain.txt | 6 +- .../q96.sf100/simplified.txt | 4 +- .../approved-plans-v1_4/q97.sf100/explain.txt | 63 ++- .../q97.sf100/simplified.txt | 91 +++-- 16 files changed, 589 insertions(+), 607 deletions(-) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt index 509fb0133095b..a446163e3d29d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (44) +* Sort (44) +- * HashAggregate (43) +- Exchange (42) +- * HashAggregate (41) @@ -244,7 +244,7 @@ Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#23, sum(UnscaledValue(cs_net_profit#7))#24, count(cs_order_number#5)#27] Results [3]: [count(cs_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#23,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#24,17,2) AS total net profit #32] -(44) TakeOrderedAndProject +(44) Sort [codegen id : 12] Input [3]: [order count #30, total shipping cost #31, total net profit #32] -Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32] +Arguments: [order count #30 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt index ea9a0b27ff700..73a9b58010f58 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (12) +WholeStageCodegen (12) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt index c5988072f758d..6eb1008ea4990 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt @@ -1,96 +1,95 @@ == Physical Plan == -CollectLimit (92) -+- * HashAggregate (91) - +- Exchange (90) - +- * HashAggregate (89) - +- Union (88) - :- * Project (60) - : +- * BroadcastHashJoin Inner BuildRight (59) - : :- * Project (53) - : : +- SortMergeJoin LeftSemi (52) - : : :- * Sort (34) - : : : +- Exchange (33) - : : : +- * Project (32) - : : : +- SortMergeJoin LeftSemi (31) - : : : :- * Sort (5) - : : : : +- Exchange (4) - : : : : +- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.catalog_sales (1) - : : : +- * Sort (30) - : : : +- Exchange (29) - : : : +- * Project (28) - : : : +- * Filter (27) - : : : +- * HashAggregate (26) - : : : +- * HashAggregate (25) - : : : +- * Project (24) - : : : +- * SortMergeJoin Inner (23) - : : : :- * Sort (17) - : : : : +- Exchange (16) - : : : : +- * Project (15) - : : : : +- * BroadcastHashJoin Inner BuildRight (14) - : : : : :- * Filter (8) - : : : : : +- * ColumnarToRow (7) - : : : : : +- Scan parquet default.store_sales (6) - : : : : +- BroadcastExchange (13) - : : : : +- * Project (12) - : : : : +- * Filter (11) - : : : : +- * ColumnarToRow (10) - : : : : +- Scan parquet default.date_dim (9) - : : : +- * Sort (22) - : : : +- Exchange (21) - : : : +- * Filter (20) - : : : +- * ColumnarToRow (19) - : : : +- Scan parquet default.item (18) - : : +- * Sort (51) - : : +- * Project (50) - : : +- * Filter (49) - : : +- * HashAggregate (48) - : : +- * HashAggregate (47) - : : +- * Project (46) - : : +- * SortMergeJoin Inner (45) - : : :- * Sort (39) - : : : +- Exchange (38) - : : : +- * Filter (37) - : : : +- * ColumnarToRow (36) - : : : +- Scan parquet default.store_sales (35) - : : +- * Sort (44) - : : +- Exchange (43) - : : +- * Filter (42) - : : +- * ColumnarToRow (41) - : : +- Scan parquet default.customer (40) - : +- BroadcastExchange (58) - : +- * Project (57) - : +- * Filter (56) - : +- * ColumnarToRow (55) - : +- Scan parquet default.date_dim (54) - +- * Project (87) - +- * BroadcastHashJoin Inner BuildRight (86) - :- * Project (84) - : +- SortMergeJoin LeftSemi (83) - : :- * Sort (71) - : : +- Exchange (70) - : : +- * Project (69) - : : +- SortMergeJoin LeftSemi (68) - : : :- * Sort (65) - : : : +- Exchange (64) - : : : +- * Filter (63) - : : : +- * ColumnarToRow (62) - : : : +- Scan parquet default.web_sales (61) - : : +- * Sort (67) - : : +- ReusedExchange (66) - : +- * Sort (82) - : +- * Project (81) - : +- * Filter (80) - : +- * HashAggregate (79) - : +- * HashAggregate (78) - : +- * Project (77) - : +- * SortMergeJoin Inner (76) - : :- * Sort (73) - : : +- ReusedExchange (72) - : +- * Sort (75) - : +- ReusedExchange (74) - +- ReusedExchange (85) +* HashAggregate (91) ++- Exchange (90) + +- * HashAggregate (89) + +- Union (88) + :- * Project (60) + : +- * BroadcastHashJoin Inner BuildRight (59) + : :- * Project (53) + : : +- SortMergeJoin LeftSemi (52) + : : :- * Sort (34) + : : : +- Exchange (33) + : : : +- * Project (32) + : : : +- SortMergeJoin LeftSemi (31) + : : : :- * Sort (5) + : : : : +- Exchange (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.catalog_sales (1) + : : : +- * Sort (30) + : : : +- Exchange (29) + : : : +- * Project (28) + : : : +- * Filter (27) + : : : +- * HashAggregate (26) + : : : +- * HashAggregate (25) + : : : +- * Project (24) + : : : +- * SortMergeJoin Inner (23) + : : : :- * Sort (17) + : : : : +- Exchange (16) + : : : : +- * Project (15) + : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : :- * Filter (8) + : : : : : +- * ColumnarToRow (7) + : : : : : +- Scan parquet default.store_sales (6) + : : : : +- BroadcastExchange (13) + : : : : +- * Project (12) + : : : : +- * Filter (11) + : : : : +- * ColumnarToRow (10) + : : : : +- Scan parquet default.date_dim (9) + : : : +- * Sort (22) + : : : +- Exchange (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet default.item (18) + : : +- * Sort (51) + : : +- * Project (50) + : : +- * Filter (49) + : : +- * HashAggregate (48) + : : +- * HashAggregate (47) + : : +- * Project (46) + : : +- * SortMergeJoin Inner (45) + : : :- * Sort (39) + : : : +- Exchange (38) + : : : +- * Filter (37) + : : : +- * ColumnarToRow (36) + : : : +- Scan parquet default.store_sales (35) + : : +- * Sort (44) + : : +- Exchange (43) + : : +- * Filter (42) + : : +- * ColumnarToRow (41) + : : +- Scan parquet default.customer (40) + : +- BroadcastExchange (58) + : +- * Project (57) + : +- * Filter (56) + : +- * ColumnarToRow (55) + : +- Scan parquet default.date_dim (54) + +- * Project (87) + +- * BroadcastHashJoin Inner BuildRight (86) + :- * Project (84) + : +- SortMergeJoin LeftSemi (83) + : :- * Sort (71) + : : +- Exchange (70) + : : +- * Project (69) + : : +- SortMergeJoin LeftSemi (68) + : : :- * Sort (65) + : : : +- Exchange (64) + : : : +- * Filter (63) + : : : +- * ColumnarToRow (62) + : : : +- Scan parquet default.web_sales (61) + : : +- * Sort (67) + : : +- ReusedExchange (66) + : +- * Sort (82) + : +- * Project (81) + : +- * Filter (80) + : +- * HashAggregate (79) + : +- * HashAggregate (78) + : +- * Project (77) + : +- * SortMergeJoin Inner (76) + : :- * Sort (73) + : : +- ReusedExchange (72) + : +- * Sort (75) + : +- ReusedExchange (74) + +- ReusedExchange (85) (1) Scan parquet default.catalog_sales @@ -501,149 +500,145 @@ Functions [1]: [sum(sales#41)] Aggregate Attributes [1]: [sum(sales#41)#61] Results [1]: [sum(sales#41)#61 AS sum(sales)#62] -(92) CollectLimit -Input [1]: [sum(sales)#62] -Arguments: 100 - ===== Subqueries ===== Subquery:1 Hosting operator id = 49 Hosting Expression = Subquery scalar-subquery#37, [id=#38] -* HashAggregate (116) -+- Exchange (115) - +- * HashAggregate (114) - +- * HashAggregate (113) - +- * HashAggregate (112) - +- * Project (111) - +- * SortMergeJoin Inner (110) - :- * Sort (104) - : +- Exchange (103) - : +- * Project (102) - : +- * BroadcastHashJoin Inner BuildRight (101) - : :- * Filter (95) - : : +- * ColumnarToRow (94) - : : +- Scan parquet default.store_sales (93) - : +- BroadcastExchange (100) - : +- * Project (99) - : +- * Filter (98) - : +- * ColumnarToRow (97) - : +- Scan parquet default.date_dim (96) - +- * Sort (109) - +- Exchange (108) - +- * Filter (107) - +- * ColumnarToRow (106) - +- Scan parquet default.customer (105) - - -(93) Scan parquet default.store_sales +* HashAggregate (115) ++- Exchange (114) + +- * HashAggregate (113) + +- * HashAggregate (112) + +- * HashAggregate (111) + +- * Project (110) + +- * SortMergeJoin Inner (109) + :- * Sort (103) + : +- Exchange (102) + : +- * Project (101) + : +- * BroadcastHashJoin Inner BuildRight (100) + : :- * Filter (94) + : : +- * ColumnarToRow (93) + : : +- Scan parquet default.store_sales (92) + : +- BroadcastExchange (99) + : +- * Project (98) + : +- * Filter (97) + : +- * ColumnarToRow (96) + : +- Scan parquet default.date_dim (95) + +- * Sort (108) + +- Exchange (107) + +- * Filter (106) + +- * ColumnarToRow (105) + +- Scan parquet default.customer (104) + + +(92) Scan parquet default.store_sales Output [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] ReadSchema: struct -(94) ColumnarToRow [codegen id : 2] +(93) ColumnarToRow [codegen id : 2] Input [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] -(95) Filter [codegen id : 2] +(94) Filter [codegen id : 2] Input [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] Condition : (isnotnull(ss_customer_sk#25) AND isnotnull(ss_sold_date_sk#7)) -(96) Scan parquet default.date_dim +(95) Scan parquet default.date_dim Output [2]: [d_date_sk#9, d_year#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] ReadSchema: struct -(97) ColumnarToRow [codegen id : 1] +(96) ColumnarToRow [codegen id : 1] Input [2]: [d_date_sk#9, d_year#11] -(98) Filter [codegen id : 1] +(97) Filter [codegen id : 1] Input [2]: [d_date_sk#9, d_year#11] Condition : (d_year#11 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#9)) -(99) Project [codegen id : 1] +(98) Project [codegen id : 1] Output [1]: [d_date_sk#9] Input [2]: [d_date_sk#9, d_year#11] -(100) BroadcastExchange +(99) BroadcastExchange Input [1]: [d_date_sk#9] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#63] -(101) BroadcastHashJoin [codegen id : 2] +(100) BroadcastHashJoin [codegen id : 2] Left keys [1]: [ss_sold_date_sk#7] Right keys [1]: [d_date_sk#9] Join condition: None -(102) Project [codegen id : 2] +(101) Project [codegen id : 2] Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] Input [5]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, d_date_sk#9] -(103) Exchange +(102) Exchange Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] Arguments: hashpartitioning(ss_customer_sk#25, 5), true, [id=#64] -(104) Sort [codegen id : 3] +(103) Sort [codegen id : 3] Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27] Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0 -(105) Scan parquet default.customer +(104) Scan parquet default.customer Output [1]: [c_customer_sk#29] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct -(106) ColumnarToRow [codegen id : 4] +(105) ColumnarToRow [codegen id : 4] Input [1]: [c_customer_sk#29] -(107) Filter [codegen id : 4] +(106) Filter [codegen id : 4] Input [1]: [c_customer_sk#29] Condition : isnotnull(c_customer_sk#29) -(108) Exchange +(107) Exchange Input [1]: [c_customer_sk#29] Arguments: hashpartitioning(c_customer_sk#29, 5), true, [id=#65] -(109) Sort [codegen id : 5] +(108) Sort [codegen id : 5] Input [1]: [c_customer_sk#29] Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 -(110) SortMergeJoin [codegen id : 6] +(109) SortMergeJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#25] Right keys [1]: [c_customer_sk#29] Join condition: None -(111) Project [codegen id : 6] +(110) Project [codegen id : 6] Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29] Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29] -(112) HashAggregate [codegen id : 6] +(111) HashAggregate [codegen id : 6] Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29] Keys [1]: [c_customer_sk#29] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))] Aggregate Attributes [2]: [sum#66, isEmpty#67] Results [3]: [c_customer_sk#29, sum#68, isEmpty#69] -(113) HashAggregate [codegen id : 6] +(112) HashAggregate [codegen id : 6] Input [3]: [c_customer_sk#29, sum#68, isEmpty#69] Keys [1]: [c_customer_sk#29] Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#70] Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#70 AS csales#71] -(114) HashAggregate [codegen id : 6] +(113) HashAggregate [codegen id : 6] Input [1]: [csales#71] Keys: [] Functions [1]: [partial_max(csales#71)] Aggregate Attributes [1]: [max#72] Results [1]: [max#73] -(115) Exchange +(114) Exchange Input [1]: [max#73] Arguments: SinglePartition, true, [id=#74] -(116) HashAggregate [codegen id : 7] +(115) HashAggregate [codegen id : 7] Input [1]: [max#73] Keys: [] Functions [1]: [max(csales#71)] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt index 9ee444cdd988c..3059af5f716e5 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt @@ -1,198 +1,197 @@ -CollectLimit - WholeStageCodegen (38) - HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty] - InputAdapter - Exchange #1 - WholeStageCodegen (37) - HashAggregate [sales] [sum,isEmpty,sum,isEmpty] - InputAdapter - Union - WholeStageCodegen (18) - Project [cs_quantity,cs_list_price] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_quantity,cs_list_price] - InputAdapter - SortMergeJoin [cs_bill_customer_sk,c_customer_sk] - WholeStageCodegen (11) - Sort [cs_bill_customer_sk] - InputAdapter - Exchange [cs_bill_customer_sk] #2 - WholeStageCodegen (10) - Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] +WholeStageCodegen (38) + HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty] + InputAdapter + Exchange #1 + WholeStageCodegen (37) + HashAggregate [sales] [sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (18) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_quantity,cs_list_price] + InputAdapter + SortMergeJoin [cs_bill_customer_sk,c_customer_sk] + WholeStageCodegen (11) + Sort [cs_bill_customer_sk] + InputAdapter + Exchange [cs_bill_customer_sk] #2 + WholeStageCodegen (10) + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] + InputAdapter + SortMergeJoin [cs_item_sk,item_sk] + WholeStageCodegen (2) + Sort [cs_item_sk] + InputAdapter + Exchange [cs_item_sk] #3 + WholeStageCodegen (1) + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price] + WholeStageCodegen (9) + Sort [item_sk] + InputAdapter + Exchange [item_sk] #4 + WholeStageCodegen (8) + Project [item_sk] + Filter [count(1)] + HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] + HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] + Project [d_date,i_item_sk,i_item_desc] + SortMergeJoin [ss_item_sk,i_item_sk] + InputAdapter + WholeStageCodegen (5) + Sort [ss_item_sk] + InputAdapter + Exchange [ss_item_sk] #5 + WholeStageCodegen (4) + Project [ss_item_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] + InputAdapter + BroadcastExchange #6 + WholeStageCodegen (3) + Project [d_date_sk,d_date] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date,d_year] + InputAdapter + WholeStageCodegen (7) + Sort [i_item_sk] + InputAdapter + Exchange [i_item_sk] #7 + WholeStageCodegen (6) + Filter [i_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.item [i_item_sk,i_item_desc] + WholeStageCodegen (16) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #1 + WholeStageCodegen (7) + HashAggregate [max] [max(csales),tpcds_cmax,max] InputAdapter - SortMergeJoin [cs_item_sk,item_sk] - WholeStageCodegen (2) - Sort [cs_item_sk] - InputAdapter - Exchange [cs_item_sk] #3 - WholeStageCodegen (1) - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price] - WholeStageCodegen (9) - Sort [item_sk] - InputAdapter - Exchange [item_sk] #4 - WholeStageCodegen (8) - Project [item_sk] - Filter [count(1)] - HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] - HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] - Project [d_date,i_item_sk,i_item_desc] - SortMergeJoin [ss_item_sk,i_item_sk] - InputAdapter - WholeStageCodegen (5) - Sort [ss_item_sk] + Exchange #10 + WholeStageCodegen (6) + HashAggregate [csales] [max,max] + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + SortMergeJoin [ss_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #11 + WholeStageCodegen (2) + Project [ss_customer_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] InputAdapter - Exchange [ss_item_sk] #5 - WholeStageCodegen (4) - Project [ss_item_sk,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] - InputAdapter - BroadcastExchange #6 - WholeStageCodegen (3) - Project [d_date_sk,d_date] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date,d_year] - InputAdapter - WholeStageCodegen (7) - Sort [i_item_sk] + BroadcastExchange #12 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (5) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #13 + WholeStageCodegen (4) + Filter [c_customer_sk] + ColumnarToRow InputAdapter - Exchange [i_item_sk] #7 - WholeStageCodegen (6) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_desc] - WholeStageCodegen (16) - Sort [c_customer_sk] - Project [c_customer_sk] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] - Subquery #1 - WholeStageCodegen (7) - HashAggregate [max] [max(csales),tpcds_cmax,max] + Scan parquet default.customer [c_customer_sk] + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + SortMergeJoin [ss_customer_sk,c_customer_sk] InputAdapter - Exchange #10 - WholeStageCodegen (6) - HashAggregate [csales] [max,max] - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty] - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] - Project [ss_quantity,ss_sales_price,c_customer_sk] - SortMergeJoin [ss_customer_sk,c_customer_sk] - InputAdapter - WholeStageCodegen (3) - Sort [ss_customer_sk] - InputAdapter - Exchange [ss_customer_sk] #11 - WholeStageCodegen (2) - Project [ss_customer_sk,ss_quantity,ss_sales_price] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_customer_sk,ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] - InputAdapter - BroadcastExchange #12 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] - InputAdapter - WholeStageCodegen (5) - Sort [c_customer_sk] - InputAdapter - Exchange [c_customer_sk] #13 - WholeStageCodegen (4) - Filter [c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk] - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] - Project [ss_quantity,ss_sales_price,c_customer_sk] - SortMergeJoin [ss_customer_sk,c_customer_sk] - InputAdapter - WholeStageCodegen (13) - Sort [ss_customer_sk] - InputAdapter - Exchange [ss_customer_sk] #8 - WholeStageCodegen (12) - Filter [ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] - InputAdapter - WholeStageCodegen (15) - Sort [c_customer_sk] - InputAdapter - Exchange [c_customer_sk] #9 - WholeStageCodegen (14) - Filter [c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk] - InputAdapter - BroadcastExchange #14 - WholeStageCodegen (17) - Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - WholeStageCodegen (36) - Project [ws_quantity,ws_list_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_quantity,ws_list_price] - InputAdapter - SortMergeJoin [ws_bill_customer_sk,c_customer_sk] - WholeStageCodegen (29) - Sort [ws_bill_customer_sk] - InputAdapter - Exchange [ws_bill_customer_sk] #15 - WholeStageCodegen (28) - Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] - InputAdapter - SortMergeJoin [ws_item_sk,item_sk] - WholeStageCodegen (20) - Sort [ws_item_sk] + WholeStageCodegen (13) + Sort [ss_customer_sk] InputAdapter - Exchange [ws_item_sk] #16 - WholeStageCodegen (19) - Filter [ws_sold_date_sk] + Exchange [ss_customer_sk] #8 + WholeStageCodegen (12) + Filter [ss_customer_sk] ColumnarToRow InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] - WholeStageCodegen (27) - Sort [item_sk] + Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] + InputAdapter + WholeStageCodegen (15) + Sort [c_customer_sk] InputAdapter - ReusedExchange [item_sk] #4 - WholeStageCodegen (34) - Sort [c_customer_sk] - Project [c_customer_sk] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [tpcds_cmax] #1 - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] - Project [ss_quantity,ss_sales_price,c_customer_sk] - SortMergeJoin [ss_customer_sk,c_customer_sk] - InputAdapter - WholeStageCodegen (31) - Sort [ss_customer_sk] - InputAdapter - ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #8 - InputAdapter - WholeStageCodegen (33) - Sort [c_customer_sk] - InputAdapter - ReusedExchange [c_customer_sk] #9 + Exchange [c_customer_sk] #9 + WholeStageCodegen (14) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #14 + WholeStageCodegen (17) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (36) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter - ReusedExchange [d_date_sk] #14 + SortMergeJoin [ws_bill_customer_sk,c_customer_sk] + WholeStageCodegen (29) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #15 + WholeStageCodegen (28) + Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] + InputAdapter + SortMergeJoin [ws_item_sk,item_sk] + WholeStageCodegen (20) + Sort [ws_item_sk] + InputAdapter + Exchange [ws_item_sk] #16 + WholeStageCodegen (19) + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] + WholeStageCodegen (27) + Sort [item_sk] + InputAdapter + ReusedExchange [item_sk] #4 + WholeStageCodegen (34) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + SortMergeJoin [ss_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (31) + Sort [ss_customer_sk] + InputAdapter + ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #8 + InputAdapter + WholeStageCodegen (33) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_sk] #9 + InputAdapter + ReusedExchange [d_date_sk] #14 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt index 92b9c26825e51..7465ddae84e8a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt @@ -1,72 +1,71 @@ == Physical Plan == -CollectLimit (68) -+- * HashAggregate (67) - +- Exchange (66) - +- * HashAggregate (65) - +- * HashAggregate (64) - +- * HashAggregate (63) - +- * HashAggregate (62) - +- * HashAggregate (61) - +- * HashAggregate (60) - +- Exchange (59) - +- * HashAggregate (58) - +- SortMergeJoin LeftSemi (57) - :- SortMergeJoin LeftSemi (39) - : :- * Sort (21) - : : +- Exchange (20) - : : +- * Project (19) - : : +- * SortMergeJoin Inner (18) - : : :- * Sort (12) - : : : +- Exchange (11) - : : : +- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) - : : : :- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- * Sort (17) - : : +- Exchange (16) - : : +- * Filter (15) - : : +- * ColumnarToRow (14) - : : +- Scan parquet default.customer (13) - : +- * Sort (38) - : +- Exchange (37) - : +- * HashAggregate (36) - : +- Exchange (35) - : +- * HashAggregate (34) - : +- * Project (33) - : +- * SortMergeJoin Inner (32) - : :- * Sort (29) - : : +- Exchange (28) - : : +- * Project (27) - : : +- * BroadcastHashJoin Inner BuildRight (26) - : : :- * Filter (24) - : : : +- * ColumnarToRow (23) - : : : +- Scan parquet default.catalog_sales (22) - : : +- ReusedExchange (25) - : +- * Sort (31) - : +- ReusedExchange (30) - +- * Sort (56) - +- Exchange (55) - +- * HashAggregate (54) - +- Exchange (53) - +- * HashAggregate (52) - +- * Project (51) - +- * SortMergeJoin Inner (50) - :- * Sort (47) - : +- Exchange (46) - : +- * Project (45) - : +- * BroadcastHashJoin Inner BuildRight (44) - : :- * Filter (42) - : : +- * ColumnarToRow (41) - : : +- Scan parquet default.web_sales (40) - : +- ReusedExchange (43) - +- * Sort (49) - +- ReusedExchange (48) +* HashAggregate (67) ++- Exchange (66) + +- * HashAggregate (65) + +- * HashAggregate (64) + +- * HashAggregate (63) + +- * HashAggregate (62) + +- * HashAggregate (61) + +- * HashAggregate (60) + +- Exchange (59) + +- * HashAggregate (58) + +- SortMergeJoin LeftSemi (57) + :- SortMergeJoin LeftSemi (39) + : :- * Sort (21) + : : +- Exchange (20) + : : +- * Project (19) + : : +- * SortMergeJoin Inner (18) + : : :- * Sort (12) + : : : +- Exchange (11) + : : : +- * Project (10) + : : : +- * BroadcastHashJoin Inner BuildRight (9) + : : : :- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.store_sales (1) + : : : +- BroadcastExchange (8) + : : : +- * Project (7) + : : : +- * Filter (6) + : : : +- * ColumnarToRow (5) + : : : +- Scan parquet default.date_dim (4) + : : +- * Sort (17) + : : +- Exchange (16) + : : +- * Filter (15) + : : +- * ColumnarToRow (14) + : : +- Scan parquet default.customer (13) + : +- * Sort (38) + : +- Exchange (37) + : +- * HashAggregate (36) + : +- Exchange (35) + : +- * HashAggregate (34) + : +- * Project (33) + : +- * SortMergeJoin Inner (32) + : :- * Sort (29) + : : +- Exchange (28) + : : +- * Project (27) + : : +- * BroadcastHashJoin Inner BuildRight (26) + : : :- * Filter (24) + : : : +- * ColumnarToRow (23) + : : : +- Scan parquet default.catalog_sales (22) + : : +- ReusedExchange (25) + : +- * Sort (31) + : +- ReusedExchange (30) + +- * Sort (56) + +- Exchange (55) + +- * HashAggregate (54) + +- Exchange (53) + +- * HashAggregate (52) + +- * Project (51) + +- * SortMergeJoin Inner (50) + :- * Sort (47) + : +- Exchange (46) + : +- * Project (45) + : +- * BroadcastHashJoin Inner BuildRight (44) + : :- * Filter (42) + : : +- * ColumnarToRow (41) + : : +- Scan parquet default.web_sales (40) + : +- ReusedExchange (43) + +- * Sort (49) + +- ReusedExchange (48) (1) Scan parquet default.store_sales @@ -387,7 +386,3 @@ Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#37] Results [1]: [count(1)#37 AS count(1)#38] -(68) CollectLimit -Input [1]: [count(1)#38] -Arguments: 100 - diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt index 5bcd7dbb93022..8dd59340cf069 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt @@ -1,118 +1,117 @@ -CollectLimit - WholeStageCodegen (26) - HashAggregate [count] [count(1),count(1),count] - InputAdapter - Exchange #1 - WholeStageCodegen (25) - HashAggregate [count,count] +WholeStageCodegen (26) + HashAggregate [count] [count(1),count(1),count] + InputAdapter + Exchange #1 + WholeStageCodegen (25) + HashAggregate [count,count] + HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] HashAggregate [c_last_name,c_first_name,d_date] - HashAggregate [c_last_name,c_first_name,d_date] - InputAdapter - Exchange [c_last_name,c_first_name,d_date] #2 - WholeStageCodegen (24) - HashAggregate [c_last_name,c_first_name,d_date] - InputAdapter + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #2 + WholeStageCodegen (24) + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] - SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date] - WholeStageCodegen (7) - Sort [c_last_name,c_first_name,d_date] - InputAdapter - Exchange [c_last_name,c_first_name,d_date] #3 - WholeStageCodegen (6) - Project [d_date,c_first_name,c_last_name] - SortMergeJoin [ss_customer_sk,c_customer_sk] - InputAdapter - WholeStageCodegen (3) - Sort [ss_customer_sk] - InputAdapter - Exchange [ss_customer_sk] #4 - WholeStageCodegen (2) - Project [ss_customer_sk,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (1) - Project [d_date_sk,d_date] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] - InputAdapter - WholeStageCodegen (5) - Sort [c_customer_sk] - InputAdapter - Exchange [c_customer_sk] #6 - WholeStageCodegen (4) - Filter [c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] - WholeStageCodegen (15) - Sort [c_last_name,c_first_name,d_date] - InputAdapter - Exchange [c_last_name,c_first_name,d_date] #7 - WholeStageCodegen (14) - HashAggregate [c_last_name,c_first_name,d_date] + WholeStageCodegen (7) + Sort [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #3 + WholeStageCodegen (6) + Project [d_date,c_first_name,c_last_name] + SortMergeJoin [ss_customer_sk,c_customer_sk] InputAdapter - Exchange [c_last_name,c_first_name,d_date] #8 - WholeStageCodegen (13) - HashAggregate [c_last_name,c_first_name,d_date] - Project [c_last_name,c_first_name,d_date] - SortMergeJoin [cs_bill_customer_sk,c_customer_sk] - InputAdapter - WholeStageCodegen (10) - Sort [cs_bill_customer_sk] - InputAdapter - Exchange [cs_bill_customer_sk] #9 - WholeStageCodegen (9) - Project [cs_bill_customer_sk,d_date] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk,cs_bill_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] + WholeStageCodegen (3) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #4 + WholeStageCodegen (2) + Project [ss_customer_sk,d_date] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk,ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk,d_date] + Filter [d_month_seq,d_date_sk] + ColumnarToRow InputAdapter - ReusedExchange [d_date_sk,d_date] #5 - InputAdapter - WholeStageCodegen (12) - Sort [c_customer_sk] - InputAdapter - ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6 - WholeStageCodegen (23) + Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq] + InputAdapter + WholeStageCodegen (5) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #6 + WholeStageCodegen (4) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name] + WholeStageCodegen (15) Sort [c_last_name,c_first_name,d_date] InputAdapter - Exchange [c_last_name,c_first_name,d_date] #10 - WholeStageCodegen (22) + Exchange [c_last_name,c_first_name,d_date] #7 + WholeStageCodegen (14) HashAggregate [c_last_name,c_first_name,d_date] InputAdapter - Exchange [c_last_name,c_first_name,d_date] #11 - WholeStageCodegen (21) + Exchange [c_last_name,c_first_name,d_date] #8 + WholeStageCodegen (13) HashAggregate [c_last_name,c_first_name,d_date] Project [c_last_name,c_first_name,d_date] - SortMergeJoin [ws_bill_customer_sk,c_customer_sk] + SortMergeJoin [cs_bill_customer_sk,c_customer_sk] InputAdapter - WholeStageCodegen (18) - Sort [ws_bill_customer_sk] + WholeStageCodegen (10) + Sort [cs_bill_customer_sk] InputAdapter - Exchange [ws_bill_customer_sk] #12 - WholeStageCodegen (17) - Project [ws_bill_customer_sk,d_date] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Filter [ws_sold_date_sk,ws_bill_customer_sk] + Exchange [cs_bill_customer_sk] #9 + WholeStageCodegen (9) + Project [cs_bill_customer_sk,d_date] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk,cs_bill_customer_sk] ColumnarToRow InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk] InputAdapter ReusedExchange [d_date_sk,d_date] #5 InputAdapter - WholeStageCodegen (20) + WholeStageCodegen (12) Sort [c_customer_sk] InputAdapter ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6 + WholeStageCodegen (23) + Sort [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #10 + WholeStageCodegen (22) + HashAggregate [c_last_name,c_first_name,d_date] + InputAdapter + Exchange [c_last_name,c_first_name,d_date] #11 + WholeStageCodegen (21) + HashAggregate [c_last_name,c_first_name,d_date] + Project [c_last_name,c_first_name,d_date] + SortMergeJoin [ws_bill_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (18) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #12 + WholeStageCodegen (17) + Project [ws_bill_customer_sk,d_date] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Filter [ws_sold_date_sk,ws_bill_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk] + InputAdapter + ReusedExchange [d_date_sk,d_date] #5 + InputAdapter + WholeStageCodegen (20) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt index dc4665185b014..99459bfe9a049 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (34) +* Sort (34) +- * HashAggregate (33) +- Exchange (32) +- * HashAggregate (31) @@ -190,7 +190,7 @@ Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#6))] Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#6))#22] Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#6))#22,17,2) AS Excess Discount Amount #23] -(34) TakeOrderedAndProject +(34) Sort [codegen id : 7] Input [1]: [Excess Discount Amount #23] -Arguments: 100, [Excess Discount Amount #23 ASC NULLS FIRST], [Excess Discount Amount #23] +Arguments: [Excess Discount Amount #23 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt index 7fd1cd3637a09..0721155286d17 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [Excess Discount Amount ] - WholeStageCodegen (7) +WholeStageCodegen (7) + Sort [Excess Discount Amount ] HashAggregate [sum] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt index 7720d9dee4170..43390c5048a6d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (47) +* Sort (47) +- * HashAggregate (46) +- Exchange (45) +- * HashAggregate (44) @@ -259,7 +259,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#24, sum(UnscaledValue(ws_net_profit#7))#25, count(ws_order_number#5)#29] Results [3]: [count(ws_order_number#5)#29 AS order count #32, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#24,17,2) AS total shipping cost #33, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#25,17,2) AS total net profit #34] -(47) TakeOrderedAndProject +(47) Sort [codegen id : 14] Input [3]: [order count #32, total shipping cost #33, total net profit #34] -Arguments: 100, [order count #32 ASC NULLS FIRST], [order count #32, total shipping cost #33, total net profit #34] +Arguments: [order count #32 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt index 128a8179ac10b..7b3d461b9e80f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (14) +WholeStageCodegen (14) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt index eae118d46245d..b38f8e5039fd8 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (63) +* Sort (63) +- * HashAggregate (62) +- Exchange (61) +- * HashAggregate (60) @@ -341,7 +341,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#29, sum(UnscaledValue(ws_net_profit#6))#30, count(ws_order_number#4)#34] Results [3]: [count(ws_order_number#4)#34 AS order count #37, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#29,17,2) AS total shipping cost #38, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#30,17,2) AS total net profit #39] -(63) TakeOrderedAndProject +(63) Sort [codegen id : 24] Input [3]: [order count #37, total shipping cost #38, total net profit #39] -Arguments: 100, [order count #37 ASC NULLS FIRST], [order count #37, total shipping cost #38, total net profit #39] +Arguments: [order count #37 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt index bdcbb87b372dc..20c2beda76d04 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (24) +WholeStageCodegen (24) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt index d00029f985471..5ae0e1632f15b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (28) +* Sort (28) +- * HashAggregate (27) +- Exchange (26) +- * HashAggregate (25) @@ -154,7 +154,7 @@ Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#17] Results [1]: [count(1)#17 AS count(1)#18] -(28) TakeOrderedAndProject +(28) Sort [codegen id : 5] Input [1]: [count(1)#18] -Arguments: 100, [count(1)#18 ASC NULLS FIRST], [count(1)#18] +Arguments: [count(1)#18 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt index 1355caffbbfe8..d9ee3e09481ed 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [count(1)] - WholeStageCodegen (5) +WholeStageCodegen (5) + Sort [count(1)] HashAggregate [count] [count(1),count(1),count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt index 0a2e88b5bc160..e904ad94dd8fa 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt @@ -1,34 +1,33 @@ == Physical Plan == -CollectLimit (30) -+- * HashAggregate (29) - +- Exchange (28) - +- * HashAggregate (27) - +- * Project (26) - +- SortMergeJoin FullOuter (25) - :- * Sort (14) - : +- * HashAggregate (13) - : +- Exchange (12) - : +- * HashAggregate (11) - : +- * Project (10) - : +- * BroadcastHashJoin Inner BuildRight (9) - : :- * Filter (3) - : : +- * ColumnarToRow (2) - : : +- Scan parquet default.store_sales (1) - : +- BroadcastExchange (8) - : +- * Project (7) - : +- * Filter (6) - : +- * ColumnarToRow (5) - : +- Scan parquet default.date_dim (4) - +- * Sort (24) - +- * HashAggregate (23) - +- Exchange (22) - +- * HashAggregate (21) - +- * Project (20) - +- * BroadcastHashJoin Inner BuildRight (19) - :- * Filter (17) - : +- * ColumnarToRow (16) - : +- Scan parquet default.catalog_sales (15) - +- ReusedExchange (18) +* HashAggregate (29) ++- Exchange (28) + +- * HashAggregate (27) + +- * Project (26) + +- SortMergeJoin FullOuter (25) + :- * Sort (14) + : +- * HashAggregate (13) + : +- Exchange (12) + : +- * HashAggregate (11) + : +- * Project (10) + : +- * BroadcastHashJoin Inner BuildRight (9) + : :- * Filter (3) + : : +- * ColumnarToRow (2) + : : +- Scan parquet default.store_sales (1) + : +- BroadcastExchange (8) + : +- * Project (7) + : +- * Filter (6) + : +- * ColumnarToRow (5) + : +- Scan parquet default.date_dim (4) + +- * Sort (24) + +- * HashAggregate (23) + +- Exchange (22) + +- * HashAggregate (21) + +- * Project (20) + +- * BroadcastHashJoin Inner BuildRight (19) + :- * Filter (17) + : +- * ColumnarToRow (16) + : +- Scan parquet default.catalog_sales (15) + +- ReusedExchange (18) (1) Scan parquet default.store_sales @@ -173,7 +172,3 @@ Functions [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer Aggregate Attributes [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25] Results [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23 AS store_only#26, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24 AS catalog_only#27, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25 AS store_and_catalog#28] -(30) CollectLimit -Input [3]: [store_only#26, catalog_only#27, store_and_catalog#28] -Arguments: 100 - diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt index bae48ec244faa..c5921a11cd889 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt @@ -1,46 +1,45 @@ -CollectLimit - WholeStageCodegen (8) - HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum] - InputAdapter - Exchange #1 - WholeStageCodegen (7) - HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum] - Project [customer_sk,customer_sk] - InputAdapter - SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] - WholeStageCodegen (3) - Sort [customer_sk,item_sk] - HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] - InputAdapter - Exchange [ss_customer_sk,ss_item_sk] #2 - WholeStageCodegen (2) - HashAggregate [ss_customer_sk,ss_item_sk] - Project [ss_item_sk,ss_customer_sk] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_month_seq,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_month_seq] - WholeStageCodegen (6) - Sort [customer_sk,item_sk] - HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] - InputAdapter - Exchange [cs_bill_customer_sk,cs_item_sk] #4 - WholeStageCodegen (5) - HashAggregate [cs_bill_customer_sk,cs_item_sk] - Project [cs_bill_customer_sk,cs_item_sk] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] - InputAdapter - ReusedExchange [d_date_sk] #3 +WholeStageCodegen (8) + HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (7) + HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum] + Project [customer_sk,customer_sk] + InputAdapter + SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk] + WholeStageCodegen (3) + Sort [customer_sk,item_sk] + HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [ss_customer_sk,ss_item_sk] #2 + WholeStageCodegen (2) + HashAggregate [ss_customer_sk,ss_item_sk] + Project [ss_item_sk,ss_customer_sk] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_month_seq,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_month_seq] + WholeStageCodegen (6) + Sort [customer_sk,item_sk] + HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk] + InputAdapter + Exchange [cs_bill_customer_sk,cs_item_sk] #4 + WholeStageCodegen (5) + HashAggregate [cs_bill_customer_sk,cs_item_sk] + Project [cs_bill_customer_sk,cs_item_sk] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk] + InputAdapter + ReusedExchange [d_date_sk] #3 From 6576929921690d2edd4b9b41136d253f7e7d00f2 Mon Sep 17 00:00:00 2001 From: ulysses Date: Mon, 16 Nov 2020 09:23:04 +0800 Subject: [PATCH 09/16] skip sort after limit --- .../spark/sql/catalyst/optimizer/Optimizer.scala | 14 +++++++++++--- .../catalyst/optimizer/CombiningLimitsSuite.scala | 5 +++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index ac3ad32dc55d9..bd2614bbbc4ca 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -1457,12 +1457,20 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper { * expressions into one single expression. */ object EliminateLimits extends Rule[LogicalPlan] { - private def canEliminate(limitExpr: Expression, childMaxRow: Option[Long]): Boolean = { - limitExpr.foldable && childMaxRow.exists { _ <= limitExpr.eval().toString.toLong } + private def canEliminate(limitExpr: Expression, child: LogicalPlan): Boolean = { + // We skip such case that Sort is after Limit since + // SparkStrategies will convert them to TakeOrderedAndProjectExec + val skipEliminate = child match { + case Sort(_, true, _) => true + case Project(_, Sort(_, true, _)) => true + case _ => false + } + !skipEliminate && limitExpr.foldable && + child.maxRows.exists { _ <= limitExpr.eval().toString.toLong } } def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { - case Limit(l, child) if canEliminate(l, child.maxRows) => + case Limit(l, child) if canEliminate(l, child) => child case GlobalLimit(le, GlobalLimit(ne, grandChild)) => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala index ad8c0f75f8617..b55fd1eac4176 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala @@ -107,5 +107,10 @@ class CombiningLimitsSuite extends PlanTest { val query3 = testRelation.select(Symbol("a")).limit(1).analyze val optimized3 = Optimize.execute(query3) comparePlans(optimized3, query3) + + // test sort after limit + val query4 = testRelation.select(Symbol("a")).orderBy(Symbol("a").asc).limit(1).analyze + val optimized4 = Optimize.execute(query4) + comparePlans(optimized4, optimized4) } } From 7b4e4d6613b14704063883114116143cb97c3c74 Mon Sep 17 00:00:00 2001 From: ulysses Date: Mon, 16 Nov 2020 09:23:17 +0800 Subject: [PATCH 10/16] golden file --- .../approved-plans-v1_4/q16.sf100/explain.txt | 6 +++--- .../approved-plans-v1_4/q16.sf100/simplified.txt | 4 ++-- .../approved-plans-v1_4/q16/explain.txt | 6 +++--- .../approved-plans-v1_4/q16/simplified.txt | 4 ++-- .../approved-plans-v1_4/q92.sf100/explain.txt | 6 +++--- .../approved-plans-v1_4/q92.sf100/simplified.txt | 4 ++-- .../approved-plans-v1_4/q92/explain.txt | 6 +++--- .../approved-plans-v1_4/q92/simplified.txt | 4 ++-- .../approved-plans-v1_4/q94.sf100/explain.txt | 6 +++--- .../approved-plans-v1_4/q94.sf100/simplified.txt | 4 ++-- .../approved-plans-v1_4/q94/explain.txt | 6 +++--- .../approved-plans-v1_4/q94/simplified.txt | 4 ++-- .../approved-plans-v1_4/q95.sf100/explain.txt | 6 +++--- .../approved-plans-v1_4/q95.sf100/simplified.txt | 4 ++-- .../approved-plans-v1_4/q95/explain.txt | 6 +++--- .../approved-plans-v1_4/q95/simplified.txt | 4 ++-- .../approved-plans-v1_4/q96.sf100/explain.txt | 6 +++--- .../approved-plans-v1_4/q96.sf100/simplified.txt | 4 ++-- .../approved-plans-v1_4/q96/explain.txt | 6 +++--- .../approved-plans-v1_4/q96/simplified.txt | 4 ++-- 20 files changed, 50 insertions(+), 50 deletions(-) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt index a446163e3d29d..509fb0133095b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -* Sort (44) +TakeOrderedAndProject (44) +- * HashAggregate (43) +- Exchange (42) +- * HashAggregate (41) @@ -244,7 +244,7 @@ Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#23, sum(UnscaledValue(cs_net_profit#7))#24, count(cs_order_number#5)#27] Results [3]: [count(cs_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#23,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#24,17,2) AS total net profit #32] -(44) Sort [codegen id : 12] +(44) TakeOrderedAndProject Input [3]: [order count #30, total shipping cost #31, total net profit #32] -Arguments: [order count #30 ASC NULLS FIRST], true, 0 +Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt index 73a9b58010f58..ea9a0b27ff700 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt @@ -1,5 +1,5 @@ -WholeStageCodegen (12) - Sort [order count ] +TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] + WholeStageCodegen (12) HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt index ea7e298393e4c..2ae939cfe41f3 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -* Sort (41) +TakeOrderedAndProject (41) +- * HashAggregate (40) +- Exchange (39) +- * HashAggregate (38) @@ -229,7 +229,7 @@ Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23, count(cs_order_number#5)#27] Results [3]: [count(cs_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#22,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#23,17,2) AS total net profit #32] -(41) Sort [codegen id : 8] +(41) TakeOrderedAndProject Input [3]: [order count #30, total shipping cost #31, total net profit #32] -Arguments: [order count #30 ASC NULLS FIRST], true, 0 +Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt index 169f07c2d85e5..a044b05365f8e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt @@ -1,5 +1,5 @@ -WholeStageCodegen (8) - Sort [order count ] +TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] + WholeStageCodegen (8) HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt index 99459bfe9a049..dc4665185b014 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -* Sort (34) +TakeOrderedAndProject (34) +- * HashAggregate (33) +- Exchange (32) +- * HashAggregate (31) @@ -190,7 +190,7 @@ Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#6))] Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#6))#22] Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#6))#22,17,2) AS Excess Discount Amount #23] -(34) Sort [codegen id : 7] +(34) TakeOrderedAndProject Input [1]: [Excess Discount Amount #23] -Arguments: [Excess Discount Amount #23 ASC NULLS FIRST], true, 0 +Arguments: 100, [Excess Discount Amount #23 ASC NULLS FIRST], [Excess Discount Amount #23] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt index 0721155286d17..7fd1cd3637a09 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt @@ -1,5 +1,5 @@ -WholeStageCodegen (7) - Sort [Excess Discount Amount ] +TakeOrderedAndProject [Excess Discount Amount ] + WholeStageCodegen (7) HashAggregate [sum] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt index 8a441392f4165..b17a48db8baac 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -* Sort (34) +TakeOrderedAndProject (34) +- * HashAggregate (33) +- Exchange (32) +- * HashAggregate (31) @@ -190,7 +190,7 @@ Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#3))] Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#3))#22] Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#3))#22,17,2) AS Excess Discount Amount #23] -(34) Sort [codegen id : 7] +(34) TakeOrderedAndProject Input [1]: [Excess Discount Amount #23] -Arguments: [Excess Discount Amount #23 ASC NULLS FIRST], true, 0 +Arguments: 100, [Excess Discount Amount #23 ASC NULLS FIRST], [Excess Discount Amount #23] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt index 1f24a7c964f20..652b2e36cf781 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt @@ -1,5 +1,5 @@ -WholeStageCodegen (7) - Sort [Excess Discount Amount ] +TakeOrderedAndProject [Excess Discount Amount ] + WholeStageCodegen (7) HashAggregate [sum] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt index 43390c5048a6d..7720d9dee4170 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -* Sort (47) +TakeOrderedAndProject (47) +- * HashAggregate (46) +- Exchange (45) +- * HashAggregate (44) @@ -259,7 +259,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#24, sum(UnscaledValue(ws_net_profit#7))#25, count(ws_order_number#5)#29] Results [3]: [count(ws_order_number#5)#29 AS order count #32, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#24,17,2) AS total shipping cost #33, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#25,17,2) AS total net profit #34] -(47) Sort [codegen id : 14] +(47) TakeOrderedAndProject Input [3]: [order count #32, total shipping cost #33, total net profit #34] -Arguments: [order count #32 ASC NULLS FIRST], true, 0 +Arguments: 100, [order count #32 ASC NULLS FIRST], [order count #32, total shipping cost #33, total net profit #34] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt index 7b3d461b9e80f..128a8179ac10b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt @@ -1,5 +1,5 @@ -WholeStageCodegen (14) - Sort [order count ] +TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] + WholeStageCodegen (14) HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt index 2abbe4f9b8390..a94e74f66b201 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -* Sort (41) +TakeOrderedAndProject (41) +- * HashAggregate (40) +- Exchange (39) +- * HashAggregate (38) @@ -229,7 +229,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23, count(ws_order_number#5)#27] Results [3]: [count(ws_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#22,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#23,17,2) AS total net profit #32] -(41) Sort [codegen id : 8] +(41) TakeOrderedAndProject Input [3]: [order count #30, total shipping cost #31, total net profit #32] -Arguments: [order count #30 ASC NULLS FIRST], true, 0 +Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt index 5e7d7db5c0a9e..9d30b998fe174 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt @@ -1,5 +1,5 @@ -WholeStageCodegen (8) - Sort [order count ] +TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] + WholeStageCodegen (8) HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt index b38f8e5039fd8..eae118d46245d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -* Sort (63) +TakeOrderedAndProject (63) +- * HashAggregate (62) +- Exchange (61) +- * HashAggregate (60) @@ -341,7 +341,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#29, sum(UnscaledValue(ws_net_profit#6))#30, count(ws_order_number#4)#34] Results [3]: [count(ws_order_number#4)#34 AS order count #37, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#29,17,2) AS total shipping cost #38, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#30,17,2) AS total net profit #39] -(63) Sort [codegen id : 24] +(63) TakeOrderedAndProject Input [3]: [order count #37, total shipping cost #38, total net profit #39] -Arguments: [order count #37 ASC NULLS FIRST], true, 0 +Arguments: 100, [order count #37 ASC NULLS FIRST], [order count #37, total shipping cost #38, total net profit #39] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt index 20c2beda76d04..bdcbb87b372dc 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt @@ -1,5 +1,5 @@ -WholeStageCodegen (24) - Sort [order count ] +TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] + WholeStageCodegen (24) HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt index 1cc99e296383f..3a24e83aff256 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -* Sort (56) +TakeOrderedAndProject (56) +- * HashAggregate (55) +- Exchange (54) +- * HashAggregate (53) @@ -312,7 +312,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#32] Results [3]: [count(ws_order_number#4)#32 AS order count #35, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#27,17,2) AS total shipping cost #36, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#28,17,2) AS total net profit #37] -(56) Sort [codegen id : 11] +(56) TakeOrderedAndProject Input [3]: [order count #35, total shipping cost #36, total net profit #37] -Arguments: [order count #35 ASC NULLS FIRST], true, 0 +Arguments: 100, [order count #35 ASC NULLS FIRST], [order count #35, total shipping cost #36, total net profit #37] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt index 191ff22c1961f..6d35311c810f5 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt @@ -1,5 +1,5 @@ -WholeStageCodegen (11) - Sort [order count ] +TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] + WholeStageCodegen (11) HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt index 5ae0e1632f15b..d00029f985471 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -* Sort (28) +TakeOrderedAndProject (28) +- * HashAggregate (27) +- Exchange (26) +- * HashAggregate (25) @@ -154,7 +154,7 @@ Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#17] Results [1]: [count(1)#17 AS count(1)#18] -(28) Sort [codegen id : 5] +(28) TakeOrderedAndProject Input [1]: [count(1)#18] -Arguments: [count(1)#18 ASC NULLS FIRST], true, 0 +Arguments: 100, [count(1)#18 ASC NULLS FIRST], [count(1)#18] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt index d9ee3e09481ed..1355caffbbfe8 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt @@ -1,5 +1,5 @@ -WholeStageCodegen (5) - Sort [count(1)] +TakeOrderedAndProject [count(1)] + WholeStageCodegen (5) HashAggregate [count] [count(1),count(1),count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt index 6729910d9cb4a..3561eff8f57ef 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -* Sort (28) +TakeOrderedAndProject (28) +- * HashAggregate (27) +- Exchange (26) +- * HashAggregate (25) @@ -154,7 +154,7 @@ Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#17] Results [1]: [count(1)#17 AS count(1)#18] -(28) Sort [codegen id : 5] +(28) TakeOrderedAndProject Input [1]: [count(1)#18] -Arguments: [count(1)#18 ASC NULLS FIRST], true, 0 +Arguments: 100, [count(1)#18 ASC NULLS FIRST], [count(1)#18] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt index 45400b6c512f4..b13f28bf69cfd 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt @@ -1,5 +1,5 @@ -WholeStageCodegen (5) - Sort [count(1)] +TakeOrderedAndProject [count(1)] + WholeStageCodegen (5) HashAggregate [count] [count(1),count(1),count] InputAdapter Exchange #1 From 65e74062d1abb36e3c2589350407cde9caadfee5 Mon Sep 17 00:00:00 2001 From: ulysses Date: Mon, 16 Nov 2020 21:16:13 +0800 Subject: [PATCH 11/16] remove limit --- .../spark/sql/catalyst/optimizer/Optimizer.scala | 10 +--------- .../sql/catalyst/optimizer/CombiningLimitsSuite.scala | 7 +++++-- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index bd2614bbbc4ca..c92725ebeb91a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -1458,15 +1458,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper { */ object EliminateLimits extends Rule[LogicalPlan] { private def canEliminate(limitExpr: Expression, child: LogicalPlan): Boolean = { - // We skip such case that Sort is after Limit since - // SparkStrategies will convert them to TakeOrderedAndProjectExec - val skipEliminate = child match { - case Sort(_, true, _) => true - case Project(_, Sort(_, true, _)) => true - case _ => false - } - !skipEliminate && limitExpr.foldable && - child.maxRows.exists { _ <= limitExpr.eval().toString.toLong } + limitExpr.foldable && child.maxRows.exists { _ <= limitExpr.eval().toString.toLong } } def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala index b55fd1eac4176..9da13e5ddee1f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala @@ -109,8 +109,11 @@ class CombiningLimitsSuite extends PlanTest { comparePlans(optimized3, query3) // test sort after limit - val query4 = testRelation.select(Symbol("a")).orderBy(Symbol("a").asc).limit(1).analyze + val query4 = testRelation.select().groupBy()(count(1)) + .orderBy(Symbol("a").asc).limit(1).analyze val optimized4 = Optimize.execute(query4) - comparePlans(optimized4, optimized4) + val expected4 = testRelation.select().groupBy()(count(1)) + .orderBy(Symbol("a").asc).analyze + comparePlans(optimized4, expected4) } } From 4d45c0a0523043dcab0f81f08fff9ce9e848c69c Mon Sep 17 00:00:00 2001 From: ulysses Date: Mon, 16 Nov 2020 21:16:28 +0800 Subject: [PATCH 12/16] golden file --- .../approved-plans-v1_4/q16.sf100/explain.txt | 6 +++--- .../approved-plans-v1_4/q16.sf100/simplified.txt | 4 ++-- .../approved-plans-v1_4/q16/explain.txt | 6 +++--- .../approved-plans-v1_4/q16/simplified.txt | 4 ++-- .../approved-plans-v1_4/q92.sf100/explain.txt | 6 +++--- .../approved-plans-v1_4/q92.sf100/simplified.txt | 4 ++-- .../approved-plans-v1_4/q92/explain.txt | 6 +++--- .../approved-plans-v1_4/q92/simplified.txt | 4 ++-- .../approved-plans-v1_4/q94.sf100/explain.txt | 6 +++--- .../approved-plans-v1_4/q94.sf100/simplified.txt | 4 ++-- .../approved-plans-v1_4/q94/explain.txt | 6 +++--- .../approved-plans-v1_4/q94/simplified.txt | 4 ++-- .../approved-plans-v1_4/q95.sf100/explain.txt | 6 +++--- .../approved-plans-v1_4/q95.sf100/simplified.txt | 4 ++-- .../approved-plans-v1_4/q95/explain.txt | 6 +++--- .../approved-plans-v1_4/q95/simplified.txt | 4 ++-- .../approved-plans-v1_4/q96.sf100/explain.txt | 6 +++--- .../approved-plans-v1_4/q96.sf100/simplified.txt | 4 ++-- .../approved-plans-v1_4/q96/explain.txt | 6 +++--- .../approved-plans-v1_4/q96/simplified.txt | 4 ++-- 20 files changed, 50 insertions(+), 50 deletions(-) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt index 509fb0133095b..a446163e3d29d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (44) +* Sort (44) +- * HashAggregate (43) +- Exchange (42) +- * HashAggregate (41) @@ -244,7 +244,7 @@ Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#23, sum(UnscaledValue(cs_net_profit#7))#24, count(cs_order_number#5)#27] Results [3]: [count(cs_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#23,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#24,17,2) AS total net profit #32] -(44) TakeOrderedAndProject +(44) Sort [codegen id : 12] Input [3]: [order count #30, total shipping cost #31, total net profit #32] -Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32] +Arguments: [order count #30 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt index ea9a0b27ff700..73a9b58010f58 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (12) +WholeStageCodegen (12) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt index 2ae939cfe41f3..ea7e298393e4c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (41) +* Sort (41) +- * HashAggregate (40) +- Exchange (39) +- * HashAggregate (38) @@ -229,7 +229,7 @@ Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23, count(cs_order_number#5)#27] Results [3]: [count(cs_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#22,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#23,17,2) AS total net profit #32] -(41) TakeOrderedAndProject +(41) Sort [codegen id : 8] Input [3]: [order count #30, total shipping cost #31, total net profit #32] -Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32] +Arguments: [order count #30 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt index a044b05365f8e..169f07c2d85e5 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (8) +WholeStageCodegen (8) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt index dc4665185b014..99459bfe9a049 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (34) +* Sort (34) +- * HashAggregate (33) +- Exchange (32) +- * HashAggregate (31) @@ -190,7 +190,7 @@ Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#6))] Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#6))#22] Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#6))#22,17,2) AS Excess Discount Amount #23] -(34) TakeOrderedAndProject +(34) Sort [codegen id : 7] Input [1]: [Excess Discount Amount #23] -Arguments: 100, [Excess Discount Amount #23 ASC NULLS FIRST], [Excess Discount Amount #23] +Arguments: [Excess Discount Amount #23 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt index 7fd1cd3637a09..0721155286d17 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [Excess Discount Amount ] - WholeStageCodegen (7) +WholeStageCodegen (7) + Sort [Excess Discount Amount ] HashAggregate [sum] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt index b17a48db8baac..8a441392f4165 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (34) +* Sort (34) +- * HashAggregate (33) +- Exchange (32) +- * HashAggregate (31) @@ -190,7 +190,7 @@ Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#3))] Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#3))#22] Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#3))#22,17,2) AS Excess Discount Amount #23] -(34) TakeOrderedAndProject +(34) Sort [codegen id : 7] Input [1]: [Excess Discount Amount #23] -Arguments: 100, [Excess Discount Amount #23 ASC NULLS FIRST], [Excess Discount Amount #23] +Arguments: [Excess Discount Amount #23 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt index 652b2e36cf781..1f24a7c964f20 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [Excess Discount Amount ] - WholeStageCodegen (7) +WholeStageCodegen (7) + Sort [Excess Discount Amount ] HashAggregate [sum] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt index 7720d9dee4170..43390c5048a6d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (47) +* Sort (47) +- * HashAggregate (46) +- Exchange (45) +- * HashAggregate (44) @@ -259,7 +259,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#24, sum(UnscaledValue(ws_net_profit#7))#25, count(ws_order_number#5)#29] Results [3]: [count(ws_order_number#5)#29 AS order count #32, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#24,17,2) AS total shipping cost #33, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#25,17,2) AS total net profit #34] -(47) TakeOrderedAndProject +(47) Sort [codegen id : 14] Input [3]: [order count #32, total shipping cost #33, total net profit #34] -Arguments: 100, [order count #32 ASC NULLS FIRST], [order count #32, total shipping cost #33, total net profit #34] +Arguments: [order count #32 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt index 128a8179ac10b..7b3d461b9e80f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (14) +WholeStageCodegen (14) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt index a94e74f66b201..2abbe4f9b8390 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (41) +* Sort (41) +- * HashAggregate (40) +- Exchange (39) +- * HashAggregate (38) @@ -229,7 +229,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23, count(ws_order_number#5)#27] Results [3]: [count(ws_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#22,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#23,17,2) AS total net profit #32] -(41) TakeOrderedAndProject +(41) Sort [codegen id : 8] Input [3]: [order count #30, total shipping cost #31, total net profit #32] -Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32] +Arguments: [order count #30 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt index 9d30b998fe174..5e7d7db5c0a9e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (8) +WholeStageCodegen (8) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt index eae118d46245d..b38f8e5039fd8 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (63) +* Sort (63) +- * HashAggregate (62) +- Exchange (61) +- * HashAggregate (60) @@ -341,7 +341,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#29, sum(UnscaledValue(ws_net_profit#6))#30, count(ws_order_number#4)#34] Results [3]: [count(ws_order_number#4)#34 AS order count #37, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#29,17,2) AS total shipping cost #38, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#30,17,2) AS total net profit #39] -(63) TakeOrderedAndProject +(63) Sort [codegen id : 24] Input [3]: [order count #37, total shipping cost #38, total net profit #39] -Arguments: 100, [order count #37 ASC NULLS FIRST], [order count #37, total shipping cost #38, total net profit #39] +Arguments: [order count #37 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt index bdcbb87b372dc..20c2beda76d04 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (24) +WholeStageCodegen (24) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt index 3a24e83aff256..1cc99e296383f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (56) +* Sort (56) +- * HashAggregate (55) +- Exchange (54) +- * HashAggregate (53) @@ -312,7 +312,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#32] Results [3]: [count(ws_order_number#4)#32 AS order count #35, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#27,17,2) AS total shipping cost #36, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#28,17,2) AS total net profit #37] -(56) TakeOrderedAndProject +(56) Sort [codegen id : 11] Input [3]: [order count #35, total shipping cost #36, total net profit #37] -Arguments: 100, [order count #35 ASC NULLS FIRST], [order count #35, total shipping cost #36, total net profit #37] +Arguments: [order count #35 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt index 6d35311c810f5..191ff22c1961f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (11) +WholeStageCodegen (11) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt index d00029f985471..5ae0e1632f15b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (28) +* Sort (28) +- * HashAggregate (27) +- Exchange (26) +- * HashAggregate (25) @@ -154,7 +154,7 @@ Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#17] Results [1]: [count(1)#17 AS count(1)#18] -(28) TakeOrderedAndProject +(28) Sort [codegen id : 5] Input [1]: [count(1)#18] -Arguments: 100, [count(1)#18 ASC NULLS FIRST], [count(1)#18] +Arguments: [count(1)#18 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt index 1355caffbbfe8..d9ee3e09481ed 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [count(1)] - WholeStageCodegen (5) +WholeStageCodegen (5) + Sort [count(1)] HashAggregate [count] [count(1),count(1),count] InputAdapter Exchange #1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt index 3561eff8f57ef..6729910d9cb4a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (28) +* Sort (28) +- * HashAggregate (27) +- Exchange (26) +- * HashAggregate (25) @@ -154,7 +154,7 @@ Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#17] Results [1]: [count(1)#17 AS count(1)#18] -(28) TakeOrderedAndProject +(28) Sort [codegen id : 5] Input [1]: [count(1)#18] -Arguments: 100, [count(1)#18 ASC NULLS FIRST], [count(1)#18] +Arguments: [count(1)#18 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt index b13f28bf69cfd..45400b6c512f4 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [count(1)] - WholeStageCodegen (5) +WholeStageCodegen (5) + Sort [count(1)] HashAggregate [count] [count(1),count(1),count] InputAdapter Exchange #1 From 2f8f13953fca25969dc68b3863b68c691ea8b295 Mon Sep 17 00:00:00 2001 From: ulysses Date: Tue, 17 Nov 2020 08:24:01 +0800 Subject: [PATCH 13/16] fix --- .../org/apache/spark/sql/catalyst/optimizer/Optimizer.scala | 2 +- .../spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index c92725ebeb91a..aa74182993014 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -1458,7 +1458,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper { */ object EliminateLimits extends Rule[LogicalPlan] { private def canEliminate(limitExpr: Expression, child: LogicalPlan): Boolean = { - limitExpr.foldable && child.maxRows.exists { _ <= limitExpr.eval().toString.toLong } + limitExpr.foldable && child.maxRows.exists { _ <= limitExpr.eval().asInstanceOf[Int] } } def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala index 9da13e5ddee1f..651c66fbba7ff 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala @@ -109,10 +109,10 @@ class CombiningLimitsSuite extends PlanTest { comparePlans(optimized3, query3) // test sort after limit - val query4 = testRelation.select().groupBy()(count(1)) + val query4 = testRelation.select(Symbol("a")).groupBy()(count(1)) .orderBy(Symbol("a").asc).limit(1).analyze val optimized4 = Optimize.execute(query4) - val expected4 = testRelation.select().groupBy()(count(1)) + val expected4 = testRelation.select(Symbol("a")).groupBy()(count(1)) .orderBy(Symbol("a").asc).analyze comparePlans(optimized4, expected4) } From 9fb4039521ceb1b321e13588f34c6f272ac0c2a7 Mon Sep 17 00:00:00 2001 From: ulysses Date: Tue, 17 Nov 2020 13:48:04 +0800 Subject: [PATCH 14/16] fix test --- .../sql/catalyst/optimizer/CombiningLimitsSuite.scala | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala index 651c66fbba7ff..70f130f834c68 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala @@ -109,11 +109,12 @@ class CombiningLimitsSuite extends PlanTest { comparePlans(optimized3, query3) // test sort after limit - val query4 = testRelation.select(Symbol("a")).groupBy()(count(1)) - .orderBy(Symbol("a").asc).limit(1).analyze + val query4 = testRelation.select().groupBy()(count(1)) + .orderBy(count(1).asc).limit(1).analyze val optimized4 = Optimize.execute(query4) - val expected4 = testRelation.select(Symbol("a")).groupBy()(count(1)) - .orderBy(Symbol("a").asc).analyze + // the top project has been removed, so we need optimize expected too + val expected4 = Optimize.execute( + testRelation.select().groupBy()(count(1)).orderBy(count(1).asc).analyze) comparePlans(optimized4, expected4) } } From 45ab8b549df1bdcc9f41fd4408c361a4bd58b97c Mon Sep 17 00:00:00 2001 From: ulysses Date: Tue, 17 Nov 2020 14:21:39 +0800 Subject: [PATCH 15/16] golden file --- .../q23a.sf100/explain.txt | 304 +++++++------- .../q23a.sf100/simplified.txt | 381 +++++++++--------- .../approved-plans-v1_4/q95.sf100/explain.txt | 7 +- .../q95.sf100/simplified.txt | 4 +- 4 files changed, 346 insertions(+), 350 deletions(-) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt index e02fcae4ee041..85f71b6cd9388 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt @@ -1,104 +1,103 @@ == Physical Plan == -CollectLimit (100) -+- * HashAggregate (99) - +- Exchange (98) - +- * HashAggregate (97) - +- Union (96) - :- * Project (59) - : +- * BroadcastHashJoin Inner BuildRight (58) - : :- * Project (52) - : : +- SortMergeJoin LeftSemi (51) - : : :- * Sort (33) - : : : +- Exchange (32) - : : : +- * Project (31) - : : : +- SortMergeJoin LeftSemi (30) - : : : :- * Sort (5) - : : : : +- Exchange (4) - : : : : +- * Filter (3) - : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.catalog_sales (1) - : : : +- * Sort (29) - : : : +- * Project (28) - : : : +- * Filter (27) - : : : +- * HashAggregate (26) - : : : +- * HashAggregate (25) - : : : +- * Project (24) - : : : +- * SortMergeJoin Inner (23) - : : : :- * Sort (17) - : : : : +- Exchange (16) - : : : : +- * Project (15) - : : : : +- * BroadcastHashJoin Inner BuildRight (14) - : : : : :- * Filter (8) - : : : : : +- * ColumnarToRow (7) - : : : : : +- Scan parquet default.store_sales (6) - : : : : +- BroadcastExchange (13) - : : : : +- * Project (12) - : : : : +- * Filter (11) - : : : : +- * ColumnarToRow (10) - : : : : +- Scan parquet default.date_dim (9) - : : : +- * Sort (22) - : : : +- Exchange (21) - : : : +- * Filter (20) - : : : +- * ColumnarToRow (19) - : : : +- Scan parquet default.item (18) - : : +- * Sort (50) - : : +- * Project (49) - : : +- * Filter (48) - : : +- * HashAggregate (47) - : : +- * HashAggregate (46) - : : +- * Project (45) - : : +- * SortMergeJoin Inner (44) - : : :- * Sort (38) - : : : +- Exchange (37) - : : : +- * Filter (36) - : : : +- * ColumnarToRow (35) - : : : +- Scan parquet default.store_sales (34) - : : +- * Sort (43) - : : +- Exchange (42) - : : +- * Filter (41) - : : +- * ColumnarToRow (40) - : : +- Scan parquet default.customer (39) - : +- BroadcastExchange (57) - : +- * Project (56) - : +- * Filter (55) - : +- * ColumnarToRow (54) - : +- Scan parquet default.date_dim (53) - +- * Project (95) - +- * BroadcastHashJoin Inner BuildRight (94) - :- * Project (92) - : +- SortMergeJoin LeftSemi (91) - : :- * Sort (79) - : : +- Exchange (78) - : : +- * Project (77) - : : +- SortMergeJoin LeftSemi (76) - : : :- * Sort (64) - : : : +- Exchange (63) - : : : +- * Filter (62) - : : : +- * ColumnarToRow (61) - : : : +- Scan parquet default.web_sales (60) - : : +- * Sort (75) - : : +- * Project (74) - : : +- * Filter (73) - : : +- * HashAggregate (72) - : : +- * HashAggregate (71) - : : +- * Project (70) - : : +- * SortMergeJoin Inner (69) - : : :- * Sort (66) - : : : +- ReusedExchange (65) - : : +- * Sort (68) - : : +- ReusedExchange (67) - : +- * Sort (90) - : +- * Project (89) - : +- * Filter (88) - : +- * HashAggregate (87) - : +- * HashAggregate (86) - : +- * Project (85) - : +- * SortMergeJoin Inner (84) - : :- * Sort (81) - : : +- ReusedExchange (80) - : +- * Sort (83) - : +- ReusedExchange (82) - +- ReusedExchange (93) +* HashAggregate (99) ++- Exchange (98) + +- * HashAggregate (97) + +- Union (96) + :- * Project (59) + : +- * BroadcastHashJoin Inner BuildRight (58) + : :- * Project (52) + : : +- SortMergeJoin LeftSemi (51) + : : :- * Sort (33) + : : : +- Exchange (32) + : : : +- * Project (31) + : : : +- SortMergeJoin LeftSemi (30) + : : : :- * Sort (5) + : : : : +- Exchange (4) + : : : : +- * Filter (3) + : : : : +- * ColumnarToRow (2) + : : : : +- Scan parquet default.catalog_sales (1) + : : : +- * Sort (29) + : : : +- * Project (28) + : : : +- * Filter (27) + : : : +- * HashAggregate (26) + : : : +- * HashAggregate (25) + : : : +- * Project (24) + : : : +- * SortMergeJoin Inner (23) + : : : :- * Sort (17) + : : : : +- Exchange (16) + : : : : +- * Project (15) + : : : : +- * BroadcastHashJoin Inner BuildRight (14) + : : : : :- * Filter (8) + : : : : : +- * ColumnarToRow (7) + : : : : : +- Scan parquet default.store_sales (6) + : : : : +- BroadcastExchange (13) + : : : : +- * Project (12) + : : : : +- * Filter (11) + : : : : +- * ColumnarToRow (10) + : : : : +- Scan parquet default.date_dim (9) + : : : +- * Sort (22) + : : : +- Exchange (21) + : : : +- * Filter (20) + : : : +- * ColumnarToRow (19) + : : : +- Scan parquet default.item (18) + : : +- * Sort (50) + : : +- * Project (49) + : : +- * Filter (48) + : : +- * HashAggregate (47) + : : +- * HashAggregate (46) + : : +- * Project (45) + : : +- * SortMergeJoin Inner (44) + : : :- * Sort (38) + : : : +- Exchange (37) + : : : +- * Filter (36) + : : : +- * ColumnarToRow (35) + : : : +- Scan parquet default.store_sales (34) + : : +- * Sort (43) + : : +- Exchange (42) + : : +- * Filter (41) + : : +- * ColumnarToRow (40) + : : +- Scan parquet default.customer (39) + : +- BroadcastExchange (57) + : +- * Project (56) + : +- * Filter (55) + : +- * ColumnarToRow (54) + : +- Scan parquet default.date_dim (53) + +- * Project (95) + +- * BroadcastHashJoin Inner BuildRight (94) + :- * Project (92) + : +- SortMergeJoin LeftSemi (91) + : :- * Sort (79) + : : +- Exchange (78) + : : +- * Project (77) + : : +- SortMergeJoin LeftSemi (76) + : : :- * Sort (64) + : : : +- Exchange (63) + : : : +- * Filter (62) + : : : +- * ColumnarToRow (61) + : : : +- Scan parquet default.web_sales (60) + : : +- * Sort (75) + : : +- * Project (74) + : : +- * Filter (73) + : : +- * HashAggregate (72) + : : +- * HashAggregate (71) + : : +- * Project (70) + : : +- * SortMergeJoin Inner (69) + : : :- * Sort (66) + : : : +- ReusedExchange (65) + : : +- * Sort (68) + : : +- ReusedExchange (67) + : +- * Sort (90) + : +- * Project (89) + : +- * Filter (88) + : +- * HashAggregate (87) + : +- * HashAggregate (86) + : +- * Project (85) + : +- * SortMergeJoin Inner (84) + : :- * Sort (81) + : : +- ReusedExchange (80) + : +- * Sort (83) + : +- ReusedExchange (82) + +- ReusedExchange (93) (1) Scan parquet default.catalog_sales @@ -547,149 +546,145 @@ Functions [1]: [sum(sales#40)] Aggregate Attributes [1]: [sum(sales#40)#65] Results [1]: [sum(sales#40)#65 AS sum(sales)#66] -(100) CollectLimit -Input [1]: [sum(sales)#66] -Arguments: 100 - ===== Subqueries ===== Subquery:1 Hosting operator id = 48 Hosting Expression = Subquery scalar-subquery#36, [id=#37] -* HashAggregate (124) -+- Exchange (123) - +- * HashAggregate (122) - +- * HashAggregate (121) - +- * HashAggregate (120) - +- * Project (119) - +- * SortMergeJoin Inner (118) - :- * Sort (112) - : +- Exchange (111) - : +- * Project (110) - : +- * BroadcastHashJoin Inner BuildRight (109) - : :- * Filter (103) - : : +- * ColumnarToRow (102) - : : +- Scan parquet default.store_sales (101) - : +- BroadcastExchange (108) - : +- * Project (107) - : +- * Filter (106) - : +- * ColumnarToRow (105) - : +- Scan parquet default.date_dim (104) - +- * Sort (117) - +- Exchange (116) - +- * Filter (115) - +- * ColumnarToRow (114) - +- Scan parquet default.customer (113) - - -(101) Scan parquet default.store_sales +* HashAggregate (123) ++- Exchange (122) + +- * HashAggregate (121) + +- * HashAggregate (120) + +- * HashAggregate (119) + +- * Project (118) + +- * SortMergeJoin Inner (117) + :- * Sort (111) + : +- Exchange (110) + : +- * Project (109) + : +- * BroadcastHashJoin Inner BuildRight (108) + : :- * Filter (102) + : : +- * ColumnarToRow (101) + : : +- Scan parquet default.store_sales (100) + : +- BroadcastExchange (107) + : +- * Project (106) + : +- * Filter (105) + : +- * ColumnarToRow (104) + : +- Scan parquet default.date_dim (103) + +- * Sort (116) + +- Exchange (115) + +- * Filter (114) + +- * ColumnarToRow (113) + +- Scan parquet default.customer (112) + + +(100) Scan parquet default.store_sales Output [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)] ReadSchema: struct -(102) ColumnarToRow [codegen id : 2] +(101) ColumnarToRow [codegen id : 2] Input [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] -(103) Filter [codegen id : 2] +(102) Filter [codegen id : 2] Input [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] Condition : (isnotnull(ss_customer_sk#24) AND isnotnull(ss_sold_date_sk#7)) -(104) Scan parquet default.date_dim +(103) Scan parquet default.date_dim Output [2]: [d_date_sk#9, d_year#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] ReadSchema: struct -(105) ColumnarToRow [codegen id : 1] +(104) ColumnarToRow [codegen id : 1] Input [2]: [d_date_sk#9, d_year#11] -(106) Filter [codegen id : 1] +(105) Filter [codegen id : 1] Input [2]: [d_date_sk#9, d_year#11] Condition : (d_year#11 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#9)) -(107) Project [codegen id : 1] +(106) Project [codegen id : 1] Output [1]: [d_date_sk#9] Input [2]: [d_date_sk#9, d_year#11] -(108) BroadcastExchange +(107) BroadcastExchange Input [1]: [d_date_sk#9] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#67] -(109) BroadcastHashJoin [codegen id : 2] +(108) BroadcastHashJoin [codegen id : 2] Left keys [1]: [ss_sold_date_sk#7] Right keys [1]: [d_date_sk#9] Join condition: None -(110) Project [codegen id : 2] +(109) Project [codegen id : 2] Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] Input [5]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, d_date_sk#9] -(111) Exchange +(110) Exchange Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] Arguments: hashpartitioning(ss_customer_sk#24, 5), true, [id=#68] -(112) Sort [codegen id : 3] +(111) Sort [codegen id : 3] Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0 -(113) Scan parquet default.customer +(112) Scan parquet default.customer Output [1]: [c_customer_sk#28] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct -(114) ColumnarToRow [codegen id : 4] +(113) ColumnarToRow [codegen id : 4] Input [1]: [c_customer_sk#28] -(115) Filter [codegen id : 4] +(114) Filter [codegen id : 4] Input [1]: [c_customer_sk#28] Condition : isnotnull(c_customer_sk#28) -(116) Exchange +(115) Exchange Input [1]: [c_customer_sk#28] Arguments: hashpartitioning(c_customer_sk#28, 5), true, [id=#69] -(117) Sort [codegen id : 5] +(116) Sort [codegen id : 5] Input [1]: [c_customer_sk#28] Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0 -(118) SortMergeJoin [codegen id : 6] +(117) SortMergeJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#24] Right keys [1]: [c_customer_sk#28] Join condition: None -(119) Project [codegen id : 6] +(118) Project [codegen id : 6] Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28] Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28] -(120) HashAggregate [codegen id : 6] +(119) HashAggregate [codegen id : 6] Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28] Keys [1]: [c_customer_sk#28] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))] Aggregate Attributes [2]: [sum#70, isEmpty#71] Results [3]: [c_customer_sk#28, sum#72, isEmpty#73] -(121) HashAggregate [codegen id : 6] +(120) HashAggregate [codegen id : 6] Input [3]: [c_customer_sk#28, sum#72, isEmpty#73] Keys [1]: [c_customer_sk#28] Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#74] Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#74 AS csales#75] -(122) HashAggregate [codegen id : 6] +(121) HashAggregate [codegen id : 6] Input [1]: [csales#75] Keys: [] Functions [1]: [partial_max(csales#75)] Aggregate Attributes [1]: [max#76] Results [1]: [max#77] -(123) Exchange +(122) Exchange Input [1]: [max#77] Arguments: SinglePartition, true, [id=#78] -(124) HashAggregate [codegen id : 7] +(123) HashAggregate [codegen id : 7] Input [1]: [max#77] Keys: [] Functions [1]: [max(csales#75)] @@ -698,3 +693,4 @@ Results [1]: [max(csales#75)#79 AS tpcds_cmax#80] Subquery:2 Hosting operator id = 88 Hosting Expression = ReusedSubquery Subquery scalar-subquery#36, [id=#37] + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt index e4ea640826120..5bb8bc5b99d0c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt @@ -1,209 +1,208 @@ -CollectLimit - WholeStageCodegen (36) - HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty] - InputAdapter - Exchange #1 - WholeStageCodegen (35) - HashAggregate [sales] [sum,isEmpty,sum,isEmpty] - InputAdapter - Union - WholeStageCodegen (17) - Project [cs_quantity,cs_list_price] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk,cs_quantity,cs_list_price] - InputAdapter - SortMergeJoin [cs_bill_customer_sk,c_customer_sk] - WholeStageCodegen (10) - Sort [cs_bill_customer_sk] - InputAdapter - Exchange [cs_bill_customer_sk] #2 - WholeStageCodegen (9) - Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] - InputAdapter - SortMergeJoin [cs_item_sk,item_sk] - WholeStageCodegen (2) - Sort [cs_item_sk] - InputAdapter - Exchange [cs_item_sk] #3 - WholeStageCodegen (1) - Filter [cs_sold_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price] - WholeStageCodegen (8) - Sort [item_sk] - Project [item_sk] - Filter [count(1)] - HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] - HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] - Project [d_date,i_item_sk,i_item_desc] - SortMergeJoin [ss_item_sk,i_item_sk] - InputAdapter - WholeStageCodegen (5) - Sort [ss_item_sk] - InputAdapter - Exchange [ss_item_sk] #4 - WholeStageCodegen (4) - Project [ss_item_sk,d_date] - BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_sold_date_sk,ss_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] - InputAdapter - BroadcastExchange #5 - WholeStageCodegen (3) - Project [d_date_sk,d_date] - Filter [d_year,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date,d_year] - InputAdapter - WholeStageCodegen (7) - Sort [i_item_sk] - InputAdapter - Exchange [i_item_sk] #6 - WholeStageCodegen (6) - Filter [i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_item_desc] - WholeStageCodegen (15) - Sort [c_customer_sk] - Project [c_customer_sk] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] - Subquery #1 - WholeStageCodegen (7) - HashAggregate [max] [max(csales),tpcds_cmax,max] - InputAdapter - Exchange #9 - WholeStageCodegen (6) - HashAggregate [csales] [max,max] - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty] - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] - Project [ss_quantity,ss_sales_price,c_customer_sk] - SortMergeJoin [ss_customer_sk,c_customer_sk] +WholeStageCodegen (36) + HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty] + InputAdapter + Exchange #1 + WholeStageCodegen (35) + HashAggregate [sales] [sum,isEmpty,sum,isEmpty] + InputAdapter + Union + WholeStageCodegen (17) + Project [cs_quantity,cs_list_price] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_sold_date_sk,cs_quantity,cs_list_price] + InputAdapter + SortMergeJoin [cs_bill_customer_sk,c_customer_sk] + WholeStageCodegen (10) + Sort [cs_bill_customer_sk] + InputAdapter + Exchange [cs_bill_customer_sk] #2 + WholeStageCodegen (9) + Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price] + InputAdapter + SortMergeJoin [cs_item_sk,item_sk] + WholeStageCodegen (2) + Sort [cs_item_sk] + InputAdapter + Exchange [cs_item_sk] #3 + WholeStageCodegen (1) + Filter [cs_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price] + WholeStageCodegen (8) + Sort [item_sk] + Project [item_sk] + Filter [count(1)] + HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] + HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] + Project [d_date,i_item_sk,i_item_desc] + SortMergeJoin [ss_item_sk,i_item_sk] InputAdapter - WholeStageCodegen (3) - Sort [ss_customer_sk] + WholeStageCodegen (5) + Sort [ss_item_sk] InputAdapter - Exchange [ss_customer_sk] #10 - WholeStageCodegen (2) - Project [ss_customer_sk,ss_quantity,ss_sales_price] + Exchange [ss_item_sk] #4 + WholeStageCodegen (4) + Project [ss_item_sk,d_date] BroadcastHashJoin [ss_sold_date_sk,d_date_sk] - Filter [ss_customer_sk,ss_sold_date_sk] + Filter [ss_sold_date_sk,ss_item_sk] ColumnarToRow InputAdapter - Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] + Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk] InputAdapter - BroadcastExchange #11 - WholeStageCodegen (1) - Project [d_date_sk] + BroadcastExchange #5 + WholeStageCodegen (3) + Project [d_date_sk,d_date] Filter [d_year,d_date_sk] ColumnarToRow InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year] + Scan parquet default.date_dim [d_date_sk,d_date,d_year] InputAdapter - WholeStageCodegen (5) - Sort [c_customer_sk] + WholeStageCodegen (7) + Sort [i_item_sk] InputAdapter - Exchange [c_customer_sk] #12 - WholeStageCodegen (4) - Filter [c_customer_sk] + Exchange [i_item_sk] #6 + WholeStageCodegen (6) + Filter [i_item_sk] ColumnarToRow InputAdapter - Scan parquet default.customer [c_customer_sk] - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] - Project [ss_quantity,ss_sales_price,c_customer_sk] - SortMergeJoin [ss_customer_sk,c_customer_sk] - InputAdapter - WholeStageCodegen (12) - Sort [ss_customer_sk] - InputAdapter - Exchange [ss_customer_sk] #7 - WholeStageCodegen (11) - Filter [ss_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] - InputAdapter - WholeStageCodegen (14) - Sort [c_customer_sk] - InputAdapter - Exchange [c_customer_sk] #8 - WholeStageCodegen (13) - Filter [c_customer_sk] - ColumnarToRow - InputAdapter - Scan parquet default.customer [c_customer_sk] - InputAdapter - BroadcastExchange #13 - WholeStageCodegen (16) - Project [d_date_sk] - Filter [d_year,d_moy,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_year,d_moy] - WholeStageCodegen (34) - Project [ws_quantity,ws_list_price] - BroadcastHashJoin [ws_sold_date_sk,d_date_sk] - Project [ws_sold_date_sk,ws_quantity,ws_list_price] - InputAdapter - SortMergeJoin [ws_bill_customer_sk,c_customer_sk] - WholeStageCodegen (27) - Sort [ws_bill_customer_sk] - InputAdapter - Exchange [ws_bill_customer_sk] #14 - WholeStageCodegen (26) - Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] + Scan parquet default.item [i_item_sk,i_item_desc] + WholeStageCodegen (15) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + Subquery #1 + WholeStageCodegen (7) + HashAggregate [max] [max(csales),tpcds_cmax,max] InputAdapter - SortMergeJoin [ws_item_sk,item_sk] - WholeStageCodegen (19) - Sort [ws_item_sk] + Exchange #9 + WholeStageCodegen (6) + HashAggregate [csales] [max,max] + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + SortMergeJoin [ss_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (3) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #10 + WholeStageCodegen (2) + Project [ss_customer_sk,ss_quantity,ss_sales_price] + BroadcastHashJoin [ss_sold_date_sk,d_date_sk] + Filter [ss_customer_sk,ss_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price] + InputAdapter + BroadcastExchange #11 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_year,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year] + InputAdapter + WholeStageCodegen (5) + Sort [c_customer_sk] + InputAdapter + Exchange [c_customer_sk] #12 + WholeStageCodegen (4) + Filter [c_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.customer [c_customer_sk] + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + SortMergeJoin [ss_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (12) + Sort [ss_customer_sk] + InputAdapter + Exchange [ss_customer_sk] #7 + WholeStageCodegen (11) + Filter [ss_customer_sk] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price] + InputAdapter + WholeStageCodegen (14) + Sort [c_customer_sk] InputAdapter - Exchange [ws_item_sk] #15 - WholeStageCodegen (18) - Filter [ws_sold_date_sk] + Exchange [c_customer_sk] #8 + WholeStageCodegen (13) + Filter [c_customer_sk] ColumnarToRow InputAdapter - Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] - WholeStageCodegen (25) - Sort [item_sk] - Project [item_sk] - Filter [count(1)] - HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] - HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] - Project [d_date,i_item_sk,i_item_desc] - SortMergeJoin [ss_item_sk,i_item_sk] - InputAdapter - WholeStageCodegen (22) - Sort [ss_item_sk] - InputAdapter - ReusedExchange [ss_item_sk,d_date] #4 - InputAdapter - WholeStageCodegen (24) - Sort [i_item_sk] - InputAdapter - ReusedExchange [i_item_sk,i_item_desc] #6 - WholeStageCodegen (32) - Sort [c_customer_sk] - Project [c_customer_sk] - Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] - ReusedSubquery [tpcds_cmax] #1 - HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] - HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] - Project [ss_quantity,ss_sales_price,c_customer_sk] - SortMergeJoin [ss_customer_sk,c_customer_sk] - InputAdapter - WholeStageCodegen (29) - Sort [ss_customer_sk] - InputAdapter - ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #7 - InputAdapter - WholeStageCodegen (31) - Sort [c_customer_sk] - InputAdapter - ReusedExchange [c_customer_sk] #8 + Scan parquet default.customer [c_customer_sk] + InputAdapter + BroadcastExchange #13 + WholeStageCodegen (16) + Project [d_date_sk] + Filter [d_year,d_moy,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_year,d_moy] + WholeStageCodegen (34) + Project [ws_quantity,ws_list_price] + BroadcastHashJoin [ws_sold_date_sk,d_date_sk] + Project [ws_sold_date_sk,ws_quantity,ws_list_price] InputAdapter - ReusedExchange [d_date_sk] #13 \ No newline at end of file + SortMergeJoin [ws_bill_customer_sk,c_customer_sk] + WholeStageCodegen (27) + Sort [ws_bill_customer_sk] + InputAdapter + Exchange [ws_bill_customer_sk] #14 + WholeStageCodegen (26) + Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] + InputAdapter + SortMergeJoin [ws_item_sk,item_sk] + WholeStageCodegen (19) + Sort [ws_item_sk] + InputAdapter + Exchange [ws_item_sk] #15 + WholeStageCodegen (18) + Filter [ws_sold_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price] + WholeStageCodegen (25) + Sort [item_sk] + Project [item_sk] + Filter [count(1)] + HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count] + HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count] + Project [d_date,i_item_sk,i_item_desc] + SortMergeJoin [ss_item_sk,i_item_sk] + InputAdapter + WholeStageCodegen (22) + Sort [ss_item_sk] + InputAdapter + ReusedExchange [ss_item_sk,d_date] #4 + InputAdapter + WholeStageCodegen (24) + Sort [i_item_sk] + InputAdapter + ReusedExchange [i_item_sk,i_item_desc] #6 + WholeStageCodegen (32) + Sort [c_customer_sk] + Project [c_customer_sk] + Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))] + ReusedSubquery [tpcds_cmax] #1 + HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty] + HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty] + Project [ss_quantity,ss_sales_price,c_customer_sk] + SortMergeJoin [ss_customer_sk,c_customer_sk] + InputAdapter + WholeStageCodegen (29) + Sort [ss_customer_sk] + InputAdapter + ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #7 + InputAdapter + WholeStageCodegen (31) + Sort [c_customer_sk] + InputAdapter + ReusedExchange [c_customer_sk] #8 + InputAdapter + ReusedExchange [d_date_sk] #13 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt index ed9d1d77fca16..547792f3d7ae4 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt @@ -1,5 +1,5 @@ == Physical Plan == -TakeOrderedAndProject (61) +* Sort (61) +- * HashAggregate (60) +- Exchange (59) +- * HashAggregate (58) @@ -331,6 +331,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#28, sum(UnscaledValue(ws_net_profit#6))#29, count(ws_order_number#4)#33] Results [3]: [count(ws_order_number#4)#33 AS order count #36, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#28,17,2) AS total shipping cost #37, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#29,17,2) AS total net profit #38] -(61) TakeOrderedAndProject +(61) Sort [codegen id : 23] Input [3]: [order count #36, total shipping cost #37, total net profit #38] -Arguments: 100, [order count #36 ASC NULLS FIRST], [order count #36, total shipping cost #37, total net profit #38] +Arguments: [order count #36 ASC NULLS FIRST], true, 0 + diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt index da48d34c72a04..7213a9f58d3f8 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt @@ -1,5 +1,5 @@ -TakeOrderedAndProject [order count ,total shipping cost ,total net profit ] - WholeStageCodegen (23) +WholeStageCodegen (23) + Sort [order count ] HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count] InputAdapter Exchange #1 From 2f55ec034ade0d430bd7ab179f68addf0d65f84a Mon Sep 17 00:00:00 2001 From: ulysses Date: Tue, 17 Nov 2020 22:36:58 +0800 Subject: [PATCH 16/16] add comment --- .../org/apache/spark/sql/catalyst/optimizer/Optimizer.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 19cfb72896807..c4b9936fa4c4f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -1451,6 +1451,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper { } /** + * This rule optimizes Limit operators by: * 1. Eliminate [[Limit]] operators if it's child max row <= limit. * 2. Combines two adjacent [[Limit]] operators into one, merging the * expressions into one single expression.