From a6e93b1645ba062ca822dbc3cae1209a71d702dd Mon Sep 17 00:00:00 2001 From: allisonwang-db Date: Tue, 26 Apr 2022 22:39:44 -0700 Subject: [PATCH 1/3] [SPARK-38918][SQL] Nested column pruning should filter out attributes that do not belong to the current relation This PR updates `ProjectionOverSchema` to use the outputs of the data source relation to filter the attributes in the nested schema pruning. This is needed because the attributes in the schema do not necessarily belong to the current data source relation. For example, if a filter contains a correlated subquery, then the subquery's children can contain attributes from both the inner query and the outer query. Since the `RewriteSubquery` batch happens after early scan pushdown rules, nested schema pruning can wrongly use the inner query's attributes to prune the outer query data schema, thus causing wrong results and unexpected exceptions. To fix a bug in `SchemaPruning`. No Unit test Closes #36216 from allisonwang-db/spark-38918-nested-column-pruning. Authored-by: allisonwang-db Signed-off-by: Liang-Chi Hsieh (cherry picked from commit 150434b5d7909dcf8248ffa5ec3d937ea3da09fd) Signed-off-by: Liang-Chi Hsieh (cherry picked from commit 793ba608181b3eba8f1f57fcdd12dcd3fe035362) Signed-off-by: allisonwang-db --- .../expressions/ProjectionOverSchema.scala | 8 +++- .../sql/catalyst/optimizer/Optimizer.scala | 1 + .../sql/catalyst/optimizer/objects.scala | 2 +- .../execution/datasources/SchemaPruning.scala | 2 +- .../v2/V2ScanRelationPushDown.scala | 5 ++- .../datasources/SchemaPruningSuite.scala | 45 ++++++++++++++++++- 6 files changed, 56 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala index a6be98c8a3aa..69d30dd5048d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala @@ -24,15 +24,19 @@ import org.apache.spark.sql.types._ * field indexes and field counts of complex type extractors and attributes * are adjusted to fit the schema. All other expressions are left as-is. This * class is motivated by columnar nested schema pruning. + * + * @param schema nested column schema + * @param output output attributes of the data source relation. They are used to filter out + * attributes in the schema that do not belong to the current relation. */ -case class ProjectionOverSchema(schema: StructType) { +case class ProjectionOverSchema(schema: StructType, output: AttributeSet) { private val fieldNames = schema.fieldNames.toSet def unapply(expr: Expression): Option[Expression] = getProjection(expr) private def getProjection(expr: Expression): Option[Expression] = expr match { - case a: AttributeReference if fieldNames.contains(a.name) => + case a: AttributeReference if fieldNames.contains(a.name) && output.contains(a) => Some(a.copy(dataType = schema(a.name).dataType)(a.exprId, a.qualifier)) case GetArrayItem(child, arrayItemOrdinal, failOnError) => getProjection(child).map { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 4fe00ef0eedf..80af6c037453 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -60,6 +60,7 @@ abstract class Optimizer(catalogManager: CatalogManager) override protected val excludedOnceBatches: Set[String] = Set( "PartitionPruning", + "RewriteSubquery", "Extract Python UDFs") protected def fixedPoint = diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala index 52544ff3e241..ec64895fc302 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala @@ -229,7 +229,7 @@ object ObjectSerializerPruning extends Rule[LogicalPlan] { } // Builds new projection. - val projectionOverSchema = ProjectionOverSchema(prunedSchema) + val projectionOverSchema = ProjectionOverSchema(prunedSchema, AttributeSet(s.output)) val newProjects = p.projectList.map(_.transformDown { case projectionOverSchema(expr) => expr }).map { case expr: NamedExpression => expr } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala index 4f331c7bf487..bf3b54a297c4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala @@ -81,7 +81,7 @@ object SchemaPruning extends Rule[LogicalPlan] { // in dataSchema. if (countLeaves(dataSchema) > countLeaves(prunedDataSchema)) { val prunedRelation = leafNodeBuilder(prunedDataSchema) - val projectionOverSchema = ProjectionOverSchema(prunedDataSchema) + val projectionOverSchema = ProjectionOverSchema(prunedDataSchema, AttributeSet(output)) Some(buildNewProjection(projects, normalizedProjects, normalizedFilters, prunedRelation, projectionOverSchema)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala index 046155b55cc2..9178c840c20d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2 import scala.collection.mutable -import org.apache.spark.sql.catalyst.expressions.{And, Attribute, AttributeReference, Expression, NamedExpression, PredicateHelper, ProjectionOverSchema, SubqueryExpression} +import org.apache.spark.sql.catalyst.expressions.{And, Attribute, AttributeReference, AttributeSet, Expression, NamedExpression, PredicateHelper, ProjectionOverSchema, SubqueryExpression} import org.apache.spark.sql.catalyst.expressions.aggregate import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.planning.ScanOperation @@ -199,7 +199,8 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper { val scanRelation = DataSourceV2ScanRelation(sHolder.relation, wrappedScan, output) - val projectionOverSchema = ProjectionOverSchema(output.toStructType) + val projectionOverSchema = + ProjectionOverSchema(output.toStructType, AttributeSet(output)) val projectionFunc = (expr: Expression) => expr transformDown { case projectionOverSchema(newExpr) => newExpr } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala index 0b745f18768d..3062c7e64809 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala @@ -57,11 +57,15 @@ abstract class SchemaPruningSuite contactId: Int, employer: Employer) + case class Employee(id: Int, name: FullName, employer: Company) + val janeDoe = FullName("Jane", "X.", "Doe") val johnDoe = FullName("John", "Y.", "Doe") val susanSmith = FullName("Susan", "Z.", "Smith") - val employer = Employer(0, Company("abc", "123 Business Street")) + val company = Company("abc", "123 Business Street") + + val employer = Employer(0, company) val employerWithNullCompany = Employer(1, null) val employerWithNullCompany2 = Employer(2, null) @@ -77,6 +81,8 @@ abstract class SchemaPruningSuite Department(1, "Marketing", 1, employerWithNullCompany) :: Department(2, "Operation", 4, employerWithNullCompany2) :: Nil + val employees = Employee(0, janeDoe, company) :: Employee(1, johnDoe, company) :: Nil + case class Name(first: String, last: String) case class BriefContact(id: Int, name: Name, address: String) @@ -617,6 +623,26 @@ abstract class SchemaPruningSuite } } + testSchemaPruning("SPARK-38918: nested schema pruning with correlated subqueries") { + withContacts { + withEmployees { + val query = sql( + """ + |select count(*) + |from contacts c + |where not exists (select null from employees e where e.name.first = c.name.first + | and e.employer.name = c.employer.company.name) + |""".stripMargin) + checkScan(query, + "struct," + + "employer:struct>>", + "struct," + + "employer:struct>") + checkAnswer(query, Row(3)) + } + } + } + protected def testSchemaPruning(testName: String)(testThunk: => Unit): Unit = { test(s"Spark vectorized reader - without partition data column - $testName") { withSQLConf(vectorizedReaderEnabledKey -> "true") { @@ -697,6 +723,23 @@ abstract class SchemaPruningSuite } } + private def withEmployees(testThunk: => Unit): Unit = { + withTempPath { dir => + val path = dir.getCanonicalPath + + makeDataSourceFile(employees, new File(path + "/employees")) + + // Providing user specified schema. Inferred schema from different data sources might + // be different. + val schema = "`id` INT,`name` STRUCT<`first`: STRING, `middle`: STRING, `last`: STRING>, " + + "`employer` STRUCT<`name`: STRING, `address`: STRING>" + spark.read.format(dataSourceName).schema(schema).load(path + "/employees") + .createOrReplaceTempView("employees") + + testThunk + } + } + case class MixedCaseColumn(a: String, B: Int) case class MixedCase(id: Int, CoL1: String, coL2: MixedCaseColumn) From ff8328537846dd52b95f1b5553a4ce4d0947d68c Mon Sep 17 00:00:00 2001 From: allisonwang-db Date: Mon, 9 May 2022 17:46:46 -0700 Subject: [PATCH 2/3] update explain.txt --- .../approved-plans-v1_4/q4/explain.txt | 270 ++++++++++++++++++ .../approved-plans-v1_4/q5/explain.txt | 244 ++++++++++++++++ 2 files changed, 514 insertions(+) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt index 88f9474b5c68..ec2bdfae937a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt @@ -362,13 +362,18 @@ Output [11]: [customer_id#23, year_total#24, customer_id#46, customer_first_name Input [12]: [customer_id#23, year_total#24, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#53, customer_id#77, year_total#78] (54) Scan parquet default.customer +<<<<<<< HEAD Output [8]: [c_customer_sk#79, c_customer_id#80, c_first_name#81, c_last_name#82, c_preferred_cust_flag#83, c_birth_country#84, c_login#85, c_email_address#86] +======= +Output [8]: [c_customer_sk#87, c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58] +>>>>>>> 965221296e (update explain.txt) Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (55) ColumnarToRow [codegen id : 14] +<<<<<<< HEAD Input [8]: [c_customer_sk#79, c_customer_id#80, c_first_name#81, c_last_name#82, c_preferred_cust_flag#83, c_birth_country#84, c_login#85, c_email_address#86] (56) Filter [codegen id : 14] @@ -380,10 +385,24 @@ Output [6]: [cs_bill_customer_sk#87, cs_ext_discount_amt#88, cs_ext_sales_price# Batched: true Location: InMemoryFileIndex [] PartitionFilters: [isnotnull(cs_sold_date_sk#92), dynamicpruningexpression(cs_sold_date_sk#92 IN dynamicpruning#39)] +======= +Input [8]: [c_customer_sk#87, c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58] + +(56) Filter [codegen id : 14] +Input [8]: [c_customer_sk#87, c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58] +Condition : (isnotnull(c_customer_sk#87) AND isnotnull(c_customer_id#88)) + +(57) Scan parquet default.catalog_sales +Output [6]: [cs_bill_customer_sk#94, cs_ext_discount_amt#73, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cs_sold_date_sk#98), dynamicpruningexpression(cs_sold_date_sk#98 IN dynamicpruning#41)] +>>>>>>> 965221296e (update explain.txt) PushedFilters: [IsNotNull(cs_bill_customer_sk)] ReadSchema: struct (58) ColumnarToRow [codegen id : 12] +<<<<<<< HEAD Input [6]: [cs_bill_customer_sk#87, cs_ext_discount_amt#88, cs_ext_sales_price#89, cs_ext_wholesale_cost#90, cs_ext_list_price#91, cs_sold_date_sk#92] (59) Filter [codegen id : 12] @@ -448,12 +467,79 @@ Input [13]: [customer_id#23, year_total#24, customer_id#46, customer_first_name# (72) Scan parquet default.customer Output [8]: [c_customer_sk#101, c_customer_id#102, c_first_name#103, c_last_name#104, c_preferred_cust_flag#105, c_birth_country#106, c_login#107, c_email_address#108] +======= +Input [6]: [cs_bill_customer_sk#94, cs_ext_discount_amt#73, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98] + +(59) Filter [codegen id : 12] +Input [6]: [cs_bill_customer_sk#94, cs_ext_discount_amt#73, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98] +Condition : isnotnull(cs_bill_customer_sk#94) + +(60) BroadcastExchange +Input [6]: [cs_bill_customer_sk#94, cs_ext_discount_amt#73, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#99] + +(61) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [c_customer_sk#87] +Right keys [1]: [cs_bill_customer_sk#94] +Join condition: None + +(62) Project [codegen id : 14] +Output [12]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, cs_ext_discount_amt#73, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98] +Input [14]: [c_customer_sk#87, c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, cs_bill_customer_sk#94, cs_ext_discount_amt#73, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98] + +(63) ReusedExchange [Reuses operator id: 118] +Output [2]: [d_date_sk#100, d_year#101] + +(64) BroadcastHashJoin [codegen id : 14] +Left keys [1]: [cs_sold_date_sk#98] +Right keys [1]: [d_date_sk#100] +Join condition: None + +(65) Project [codegen id : 14] +Output [12]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, cs_ext_discount_amt#73, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, d_year#101] +Input [14]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, cs_ext_discount_amt#73, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98, d_date_sk#100, d_year#101] + +(66) HashAggregate [codegen id : 14] +Input [12]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, cs_ext_discount_amt#73, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, d_year#101] +Keys [8]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, d_year#101] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#97 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#96 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#73 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#95 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#102, isEmpty#103] +Results [10]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, d_year#101, sum#104, isEmpty#105] + +(67) Exchange +Input [10]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, d_year#101, sum#104, isEmpty#105] +Arguments: hashpartitioning(c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, d_year#101, 5), ENSURE_REQUIREMENTS, [id=#106] + +(68) HashAggregate [codegen id : 15] +Input [10]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, d_year#101, sum#104, isEmpty#105] +Keys [8]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, d_year#101] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#97 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#96 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#73 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#95 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#97 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#96 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#73 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#95 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#81] +Results [2]: [c_customer_id#88 AS customer_id#107, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#97 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#96 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#73 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#95 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#81 AS year_total#108] + +(69) BroadcastExchange +Input [2]: [customer_id#107, year_total#108] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#109] + +(70) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#25] +Right keys [1]: [customer_id#107] +Join condition: (CASE WHEN (year_total#85 > 0.000000) THEN CheckOverflow((promote_precision(year_total#108) / promote_precision(year_total#85)), DecimalType(38,14), true) END > CASE WHEN (year_total#26 > 0.000000) THEN CheckOverflow((promote_precision(year_total#57) / promote_precision(year_total#26)), DecimalType(38,14), true) END) + +(71) Project [codegen id : 24] +Output [10]: [customer_id#25, customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56, year_total#85, year_total#108] +Input [13]: [customer_id#25, year_total#26, customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56, year_total#57, year_total#85, customer_id#107, year_total#108] + +(72) Scan parquet default.customer +Output [8]: [c_customer_sk#110, c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117] +>>>>>>> 965221296e (update explain.txt) Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (73) ColumnarToRow [codegen id : 18] +<<<<<<< HEAD Input [8]: [c_customer_sk#101, c_customer_id#102, c_first_name#103, c_last_name#104, c_preferred_cust_flag#105, c_birth_country#106, c_login#107, c_email_address#108] (74) Filter [codegen id : 18] @@ -465,10 +551,24 @@ Output [6]: [ws_bill_customer_sk#109, ws_ext_discount_amt#110, ws_ext_sales_pric Batched: true Location: InMemoryFileIndex [] PartitionFilters: [isnotnull(ws_sold_date_sk#114), dynamicpruningexpression(ws_sold_date_sk#114 IN dynamicpruning#15)] +======= +Input [8]: [c_customer_sk#110, c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117] + +(74) Filter [codegen id : 18] +Input [8]: [c_customer_sk#110, c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117] +Condition : (isnotnull(c_customer_sk#110) AND isnotnull(c_customer_id#111)) + +(75) Scan parquet default.web_sales +Output [6]: [ws_bill_customer_sk#118, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#123), dynamicpruningexpression(ws_sold_date_sk#123 IN dynamicpruning#15)] +>>>>>>> 965221296e (update explain.txt) PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (76) ColumnarToRow [codegen id : 16] +<<<<<<< HEAD Input [6]: [ws_bill_customer_sk#109, ws_ext_discount_amt#110, ws_ext_sales_price#111, ws_ext_wholesale_cost#112, ws_ext_list_price#113, ws_sold_date_sk#114] (77) Filter [codegen id : 16] @@ -541,12 +641,87 @@ Input [12]: [customer_id#23, customer_id#46, customer_first_name#47, customer_la (92) Scan parquet default.customer Output [8]: [c_customer_sk#126, c_customer_id#127, c_first_name#128, c_last_name#129, c_preferred_cust_flag#130, c_birth_country#131, c_login#132, c_email_address#133] +======= +Input [6]: [ws_bill_customer_sk#118, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123] + +(77) Filter [codegen id : 16] +Input [6]: [ws_bill_customer_sk#118, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123] +Condition : isnotnull(ws_bill_customer_sk#118) + +(78) BroadcastExchange +Input [6]: [ws_bill_customer_sk#118, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#124] + +(79) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [c_customer_sk#110] +Right keys [1]: [ws_bill_customer_sk#118] +Join condition: None + +(80) Project [codegen id : 18] +Output [12]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123] +Input [14]: [c_customer_sk#110, c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, ws_bill_customer_sk#118, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123] + +(81) ReusedExchange [Reuses operator id: 114] +Output [2]: [d_date_sk#125, d_year#126] + +(82) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [ws_sold_date_sk#123] +Right keys [1]: [d_date_sk#125] +Join condition: None + +(83) Project [codegen id : 18] +Output [12]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, d_year#126] +Input [14]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123, d_date_sk#125, d_year#126] + +(84) HashAggregate [codegen id : 18] +Input [12]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, d_year#126] +Keys [8]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#122 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#121 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#119 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#120 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#127, isEmpty#128] +Results [10]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126, sum#129, isEmpty#130] + +(85) Exchange +Input [10]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126, sum#129, isEmpty#130] +Arguments: hashpartitioning(c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126, 5), ENSURE_REQUIREMENTS, [id=#131] + +(86) HashAggregate [codegen id : 19] +Input [10]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126, sum#129, isEmpty#130] +Keys [8]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#122 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#121 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#119 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#120 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#122 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#121 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#119 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#120 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#132] +Results [2]: [c_customer_id#111 AS customer_id#133, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#122 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#121 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#119 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#120 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#132 AS year_total#134] + +(87) Filter [codegen id : 19] +Input [2]: [customer_id#133, year_total#134] +Condition : (isnotnull(year_total#134) AND (year_total#134 > 0.000000)) + +(88) Project [codegen id : 19] +Output [2]: [customer_id#133 AS customer_id#135, year_total#134 AS year_total#136] +Input [2]: [customer_id#133, year_total#134] + +(89) BroadcastExchange +Input [2]: [customer_id#135, year_total#136] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#137] + +(90) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#25] +Right keys [1]: [customer_id#135] +Join condition: None + +(91) Project [codegen id : 24] +Output [11]: [customer_id#25, customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56, year_total#85, year_total#108, year_total#136] +Input [12]: [customer_id#25, customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56, year_total#85, year_total#108, customer_id#135, year_total#136] + +(92) Scan parquet default.customer +Output [8]: [c_customer_sk#138, c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145] +>>>>>>> 965221296e (update explain.txt) Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (93) ColumnarToRow [codegen id : 22] +<<<<<<< HEAD Input [8]: [c_customer_sk#126, c_customer_id#127, c_first_name#128, c_last_name#129, c_preferred_cust_flag#130, c_birth_country#131, c_login#132, c_email_address#133] (94) Filter [codegen id : 22] @@ -558,10 +733,24 @@ Output [6]: [ws_bill_customer_sk#134, ws_ext_discount_amt#135, ws_ext_sales_pric Batched: true Location: InMemoryFileIndex [] PartitionFilters: [isnotnull(ws_sold_date_sk#139), dynamicpruningexpression(ws_sold_date_sk#139 IN dynamicpruning#39)] +======= +Input [8]: [c_customer_sk#138, c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145] + +(94) Filter [codegen id : 22] +Input [8]: [c_customer_sk#138, c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145] +Condition : (isnotnull(c_customer_sk#138) AND isnotnull(c_customer_id#139)) + +(95) Scan parquet default.web_sales +Output [6]: [ws_bill_customer_sk#146, ws_ext_discount_amt#147, ws_ext_sales_price#148, ws_ext_wholesale_cost#149, ws_ext_list_price#150, ws_sold_date_sk#151] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#151), dynamicpruningexpression(ws_sold_date_sk#151 IN dynamicpruning#41)] +>>>>>>> 965221296e (update explain.txt) PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (96) ColumnarToRow [codegen id : 20] +<<<<<<< HEAD Input [6]: [ws_bill_customer_sk#134, ws_ext_discount_amt#135, ws_ext_sales_price#136, ws_ext_wholesale_cost#137, ws_ext_list_price#138, ws_sold_date_sk#139] (97) Filter [codegen id : 20] @@ -623,6 +812,69 @@ Join condition: (CASE WHEN (year_total#78 > 0.000000) THEN CheckOverflow((promot (109) Project [codegen id : 24] Output [7]: [customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52] Input [13]: [customer_id#23, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#78, year_total#100, year_total#125, customer_id#146, year_total#147] +======= +Input [6]: [ws_bill_customer_sk#146, ws_ext_discount_amt#147, ws_ext_sales_price#148, ws_ext_wholesale_cost#149, ws_ext_list_price#150, ws_sold_date_sk#151] + +(97) Filter [codegen id : 20] +Input [6]: [ws_bill_customer_sk#146, ws_ext_discount_amt#147, ws_ext_sales_price#148, ws_ext_wholesale_cost#149, ws_ext_list_price#150, ws_sold_date_sk#151] +Condition : isnotnull(ws_bill_customer_sk#146) + +(98) BroadcastExchange +Input [6]: [ws_bill_customer_sk#146, ws_ext_discount_amt#147, ws_ext_sales_price#148, ws_ext_wholesale_cost#149, ws_ext_list_price#150, ws_sold_date_sk#151] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#152] + +(99) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [c_customer_sk#138] +Right keys [1]: [ws_bill_customer_sk#146] +Join condition: None + +(100) Project [codegen id : 22] +Output [12]: [c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, ws_ext_discount_amt#147, ws_ext_sales_price#148, ws_ext_wholesale_cost#149, ws_ext_list_price#150, ws_sold_date_sk#151] +Input [14]: [c_customer_sk#138, c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, ws_bill_customer_sk#146, ws_ext_discount_amt#147, ws_ext_sales_price#148, ws_ext_wholesale_cost#149, ws_ext_list_price#150, ws_sold_date_sk#151] + +(101) ReusedExchange [Reuses operator id: 118] +Output [2]: [d_date_sk#153, d_year#154] + +(102) BroadcastHashJoin [codegen id : 22] +Left keys [1]: [ws_sold_date_sk#151] +Right keys [1]: [d_date_sk#153] +Join condition: None + +(103) Project [codegen id : 22] +Output [12]: [c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, ws_ext_discount_amt#147, ws_ext_sales_price#148, ws_ext_wholesale_cost#149, ws_ext_list_price#150, d_year#154] +Input [14]: [c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, ws_ext_discount_amt#147, ws_ext_sales_price#148, ws_ext_wholesale_cost#149, ws_ext_list_price#150, ws_sold_date_sk#151, d_date_sk#153, d_year#154] + +(104) HashAggregate [codegen id : 22] +Input [12]: [c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, ws_ext_discount_amt#147, ws_ext_sales_price#148, ws_ext_wholesale_cost#149, ws_ext_list_price#150, d_year#154] +Keys [8]: [c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, d_year#154] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#150 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#149 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#147 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#148 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [2]: [sum#155, isEmpty#156] +Results [10]: [c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, d_year#154, sum#157, isEmpty#158] + +(105) Exchange +Input [10]: [c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, d_year#154, sum#157, isEmpty#158] +Arguments: hashpartitioning(c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, d_year#154, 5), ENSURE_REQUIREMENTS, [id=#159] + +(106) HashAggregate [codegen id : 23] +Input [10]: [c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, d_year#154, sum#157, isEmpty#158] +Keys [8]: [c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, d_year#154] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#150 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#149 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#147 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#148 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#150 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#149 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#147 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#148 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#132] +Results [2]: [c_customer_id#139 AS customer_id#160, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#150 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#149 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#147 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#148 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#132 AS year_total#161] + +(107) BroadcastExchange +Input [2]: [customer_id#160, year_total#161] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#162] + +(108) BroadcastHashJoin [codegen id : 24] +Left keys [1]: [customer_id#25] +Right keys [1]: [customer_id#160] +Join condition: (CASE WHEN (year_total#85 > 0.000000) THEN CheckOverflow((promote_precision(year_total#108) / promote_precision(year_total#85)), DecimalType(38,14), true) END > CASE WHEN (year_total#136 > 0.000000) THEN CheckOverflow((promote_precision(year_total#161) / promote_precision(year_total#136)), DecimalType(38,14), true) END) + +(109) Project [codegen id : 24] +Output [7]: [customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56] +Input [13]: [customer_id#25, customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56, year_total#85, year_total#108, year_total#136, customer_id#160, year_total#161] +>>>>>>> 965221296e (update explain.txt) (110) TakeOrderedAndProject Input [7]: [customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52] @@ -652,8 +904,13 @@ Input [2]: [d_date_sk#16, d_year#17] Condition : ((isnotnull(d_year#17) AND (d_year#17 = 2001)) AND isnotnull(d_date_sk#16)) (114) BroadcastExchange +<<<<<<< HEAD Input [2]: [d_date_sk#16, d_year#17] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=18] +======= +Input [2]: [d_date_sk#17, d_year#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#163] +>>>>>>> 965221296e (update explain.txt) Subquery:2 Hosting operator id = 20 Hosting Expression = ss_sold_date_sk#38 IN dynamicpruning#39 BroadcastExchange (118) @@ -677,15 +934,28 @@ Input [2]: [d_date_sk#40, d_year#41] Condition : ((isnotnull(d_year#41) AND (d_year#41 = 2002)) AND isnotnull(d_date_sk#40)) (118) BroadcastExchange +<<<<<<< HEAD Input [2]: [d_date_sk#40, d_year#41] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=19] +======= +Input [2]: [d_date_sk#43, d_year#44] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#164] +>>>>>>> 965221296e (update explain.txt) Subquery:3 Hosting operator id = 37 Hosting Expression = cs_sold_date_sk#67 IN dynamicpruning#15 +<<<<<<< HEAD Subquery:4 Hosting operator id = 57 Hosting Expression = cs_sold_date_sk#92 IN dynamicpruning#39 Subquery:5 Hosting operator id = 75 Hosting Expression = ws_sold_date_sk#114 IN dynamicpruning#15 Subquery:6 Hosting operator id = 95 Hosting Expression = ws_sold_date_sk#139 IN dynamicpruning#39 +======= +Subquery:4 Hosting operator id = 57 Hosting Expression = cs_sold_date_sk#98 IN dynamicpruning#41 + +Subquery:5 Hosting operator id = 75 Hosting Expression = ws_sold_date_sk#123 IN dynamicpruning#15 + +Subquery:6 Hosting operator id = 95 Hosting Expression = ws_sold_date_sk#151 IN dynamicpruning#41 +>>>>>>> 965221296e (update explain.txt) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt index 1f39a204e7c8..d992d197487f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt @@ -188,6 +188,7 @@ Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_s Condition : isnotnull(cs_catalog_page_sk#42) (25) Project [codegen id : 7] +<<<<<<< HEAD Output [6]: [cs_catalog_page_sk#42 AS page_sk#46, cs_sold_date_sk#45 AS date_sk#47, cs_ext_sales_price#43 AS sales_price#48, cs_net_profit#44 AS profit#49, 0.00 AS return_amt#50, 0.00 AS net_loss#51] Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] @@ -196,10 +197,21 @@ Output [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_retu Batched: true Location: InMemoryFileIndex [] PartitionFilters: [isnotnull(cr_returned_date_sk#55), dynamicpruningexpression(cr_returned_date_sk#55 IN dynamicpruning#5)] +======= +Output [6]: [cs_catalog_page_sk#44 AS page_sk#25, cs_sold_date_sk#47 AS date_sk#48, cs_ext_sales_price#45 AS sales_price#49, cs_net_profit#46 AS profit#50, 0.00 AS return_amt#51, 0.00 AS net_loss#52] +Input [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47] + +(26) Scan parquet default.catalog_returns +Output [4]: [cr_catalog_page_sk#53, cr_return_amount#54, cr_net_loss#55, cr_returned_date_sk#56] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(cr_returned_date_sk#56), dynamicpruningexpression(cr_returned_date_sk#56 IN dynamicpruning#5)] +>>>>>>> 965221296e (update explain.txt) PushedFilters: [IsNotNull(cr_catalog_page_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 8] +<<<<<<< HEAD Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] (28) Filter [codegen id : 8] @@ -209,10 +221,22 @@ Condition : isnotnull(cr_catalog_page_sk#52) (29) Project [codegen id : 8] Output [6]: [cr_catalog_page_sk#52 AS page_sk#56, cr_returned_date_sk#55 AS date_sk#57, 0.00 AS sales_price#58, 0.00 AS profit#59, cr_return_amount#53 AS return_amt#60, cr_net_loss#54 AS net_loss#61] Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] +======= +Input [4]: [cr_catalog_page_sk#53, cr_return_amount#54, cr_net_loss#55, cr_returned_date_sk#56] + +(28) Filter [codegen id : 8] +Input [4]: [cr_catalog_page_sk#53, cr_return_amount#54, cr_net_loss#55, cr_returned_date_sk#56] +Condition : isnotnull(cr_catalog_page_sk#53) + +(29) Project [codegen id : 8] +Output [6]: [cr_catalog_page_sk#53 AS page_sk#57, cr_returned_date_sk#56 AS date_sk#58, 0.00 AS sales_price#59, 0.00 AS profit#60, cr_return_amount#54 AS return_amt#61, cr_net_loss#55 AS net_loss#62] +Input [4]: [cr_catalog_page_sk#53, cr_return_amount#54, cr_net_loss#55, cr_returned_date_sk#56] +>>>>>>> 965221296e (update explain.txt) (30) Union (31) ReusedExchange [Reuses operator id: 79] +<<<<<<< HEAD Output [1]: [d_date_sk#62] (32) BroadcastHashJoin [codegen id : 11] @@ -226,12 +250,28 @@ Input [7]: [page_sk#46, date_sk#47, sales_price#48, profit#49, return_amt#50, ne (34) Scan parquet default.catalog_page Output [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] +======= +Output [1]: [d_date_sk#63] + +(32) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [date_sk#48] +Right keys [1]: [d_date_sk#63] +Join condition: None + +(33) Project [codegen id : 11] +Output [5]: [page_sk#25, sales_price#49, profit#50, return_amt#51, net_loss#52] +Input [7]: [page_sk#25, date_sk#48, sales_price#49, profit#50, return_amt#51, net_loss#52, d_date_sk#63] + +(34) Scan parquet default.catalog_page +Output [2]: [cp_catalog_page_sk#64, cp_catalog_page_id#65] +>>>>>>> 965221296e (update explain.txt) Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_page] PushedFilters: [IsNotNull(cp_catalog_page_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 10] +<<<<<<< HEAD Input [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] (36) Filter [codegen id : 10] @@ -274,10 +314,55 @@ Output [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold Batched: true Location: InMemoryFileIndex [] PartitionFilters: [isnotnull(ws_sold_date_sk#85), dynamicpruningexpression(ws_sold_date_sk#85 IN dynamicpruning#5)] +======= +Input [2]: [cp_catalog_page_sk#64, cp_catalog_page_id#65] + +(36) Filter [codegen id : 10] +Input [2]: [cp_catalog_page_sk#64, cp_catalog_page_id#65] +Condition : isnotnull(cp_catalog_page_sk#64) + +(37) BroadcastExchange +Input [2]: [cp_catalog_page_sk#64, cp_catalog_page_id#65] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#66] + +(38) BroadcastHashJoin [codegen id : 11] +Left keys [1]: [page_sk#25] +Right keys [1]: [cp_catalog_page_sk#64] +Join condition: None + +(39) Project [codegen id : 11] +Output [5]: [sales_price#49, profit#50, return_amt#51, net_loss#52, cp_catalog_page_id#65] +Input [7]: [page_sk#25, sales_price#49, profit#50, return_amt#51, net_loss#52, cp_catalog_page_sk#64, cp_catalog_page_id#65] + +(40) HashAggregate [codegen id : 11] +Input [5]: [sales_price#49, profit#50, return_amt#51, net_loss#52, cp_catalog_page_id#65] +Keys [1]: [cp_catalog_page_id#65] +Functions [4]: [partial_sum(UnscaledValue(sales_price#49)), partial_sum(UnscaledValue(return_amt#51)), partial_sum(UnscaledValue(profit#50)), partial_sum(UnscaledValue(net_loss#52))] +Aggregate Attributes [4]: [sum#67, sum#68, sum#69, sum#70] +Results [5]: [cp_catalog_page_id#65, sum#71, sum#72, sum#73, sum#74] + +(41) Exchange +Input [5]: [cp_catalog_page_id#65, sum#71, sum#72, sum#73, sum#74] +Arguments: hashpartitioning(cp_catalog_page_id#65, 5), ENSURE_REQUIREMENTS, [id=#75] + +(42) HashAggregate [codegen id : 12] +Input [5]: [cp_catalog_page_id#65, sum#71, sum#72, sum#73, sum#74] +Keys [1]: [cp_catalog_page_id#65] +Functions [4]: [sum(UnscaledValue(sales_price#49)), sum(UnscaledValue(return_amt#51)), sum(UnscaledValue(profit#50)), sum(UnscaledValue(net_loss#52))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#49))#76, sum(UnscaledValue(return_amt#51))#77, sum(UnscaledValue(profit#50))#78, sum(UnscaledValue(net_loss#52))#79] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#49))#76,17,2) AS sales#80, MakeDecimal(sum(UnscaledValue(return_amt#51))#77,17,2) AS returns#81, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#50))#78,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#52))#79,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#82, catalog channel AS channel#83, concat(catalog_page, cp_catalog_page_id#65) AS id#84] + +(43) Scan parquet default.web_sales +Output [4]: [ws_web_site_sk#85, ws_ext_sales_price#86, ws_net_profit#87, ws_sold_date_sk#88] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(ws_sold_date_sk#88), dynamicpruningexpression(ws_sold_date_sk#88 IN dynamicpruning#5)] +>>>>>>> 965221296e (update explain.txt) PushedFilters: [IsNotNull(ws_web_site_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 13] +<<<<<<< HEAD Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] (45) Filter [codegen id : 13] @@ -304,12 +389,41 @@ Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] (50) Scan parquet default.web_sales Output [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] +======= +Input [4]: [ws_web_site_sk#85, ws_ext_sales_price#86, ws_net_profit#87, ws_sold_date_sk#88] + +(45) Filter [codegen id : 13] +Input [4]: [ws_web_site_sk#85, ws_ext_sales_price#86, ws_net_profit#87, ws_sold_date_sk#88] +Condition : isnotnull(ws_web_site_sk#85) + +(46) Project [codegen id : 13] +Output [6]: [ws_web_site_sk#85 AS wsr_web_site_sk#89, ws_sold_date_sk#88 AS date_sk#90, ws_ext_sales_price#86 AS sales_price#91, ws_net_profit#87 AS profit#92, 0.00 AS return_amt#93, 0.00 AS net_loss#94] +Input [4]: [ws_web_site_sk#85, ws_ext_sales_price#86, ws_net_profit#87, ws_sold_date_sk#88] + +(47) Scan parquet default.web_returns +Output [5]: [wr_item_sk#95, wr_order_number#96, wr_return_amt#97, wr_net_loss#98, wr_returned_date_sk#99] +Batched: true +Location: InMemoryFileIndex [] +PartitionFilters: [isnotnull(wr_returned_date_sk#99), dynamicpruningexpression(wr_returned_date_sk#99 IN dynamicpruning#5)] +ReadSchema: struct + +(48) ColumnarToRow [codegen id : 14] +Input [5]: [wr_item_sk#95, wr_order_number#96, wr_return_amt#97, wr_net_loss#98, wr_returned_date_sk#99] + +(49) BroadcastExchange +Input [5]: [wr_item_sk#95, wr_order_number#96, wr_return_amt#97, wr_net_loss#98, wr_returned_date_sk#99] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295))),false), [id=#100] + +(50) Scan parquet default.web_sales +Output [4]: [ws_item_sk#34, ws_web_site_sk#101, ws_order_number#102, ws_sold_date_sk#103] +>>>>>>> 965221296e (update explain.txt) Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_site_sk)] ReadSchema: struct (51) ColumnarToRow +<<<<<<< HEAD Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] (52) Filter @@ -328,10 +442,31 @@ Join condition: None (55) Project [codegen id : 15] Output [6]: [ws_web_site_sk#98 AS wsr_web_site_sk#101, wr_returned_date_sk#96 AS date_sk#102, 0.00 AS sales_price#103, 0.00 AS profit#104, wr_return_amt#94 AS return_amt#105, wr_net_loss#95 AS net_loss#106] Input [8]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96, ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] +======= +Input [4]: [ws_item_sk#34, ws_web_site_sk#101, ws_order_number#102, ws_sold_date_sk#103] + +(52) Filter +Input [4]: [ws_item_sk#34, ws_web_site_sk#101, ws_order_number#102, ws_sold_date_sk#103] +Condition : ((isnotnull(ws_item_sk#34) AND isnotnull(ws_order_number#102)) AND isnotnull(ws_web_site_sk#101)) + +(53) Project +Output [3]: [ws_item_sk#34, ws_web_site_sk#101, ws_order_number#102] +Input [4]: [ws_item_sk#34, ws_web_site_sk#101, ws_order_number#102, ws_sold_date_sk#103] + +(54) BroadcastHashJoin [codegen id : 15] +Left keys [2]: [wr_item_sk#95, wr_order_number#96] +Right keys [2]: [ws_item_sk#34, ws_order_number#102] +Join condition: None + +(55) Project [codegen id : 15] +Output [6]: [ws_web_site_sk#101 AS wsr_web_site_sk#104, wr_returned_date_sk#99 AS date_sk#105, 0.00 AS sales_price#106, 0.00 AS profit#107, wr_return_amt#97 AS return_amt#108, wr_net_loss#98 AS net_loss#109] +Input [8]: [wr_item_sk#95, wr_order_number#96, wr_return_amt#97, wr_net_loss#98, wr_returned_date_sk#99, ws_item_sk#34, ws_web_site_sk#101, ws_order_number#102] +>>>>>>> 965221296e (update explain.txt) (56) Union (57) ReusedExchange [Reuses operator id: 79] +<<<<<<< HEAD Output [1]: [d_date_sk#107] (58) BroadcastHashJoin [codegen id : 18] @@ -345,12 +480,28 @@ Input [7]: [wsr_web_site_sk#86, date_sk#87, sales_price#88, profit#89, return_am (60) Scan parquet default.web_site Output [2]: [web_site_sk#108, web_site_id#109] +======= +Output [1]: [d_date_sk#110] + +(58) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [date_sk#90] +Right keys [1]: [d_date_sk#110] +Join condition: None + +(59) Project [codegen id : 18] +Output [5]: [wsr_web_site_sk#89, sales_price#91, profit#92, return_amt#93, net_loss#94] +Input [7]: [wsr_web_site_sk#89, date_sk#90, sales_price#91, profit#92, return_amt#93, net_loss#94, d_date_sk#110] + +(60) Scan parquet default.web_site +Output [2]: [web_site_sk#111, web_site_id#112] +>>>>>>> 965221296e (update explain.txt) Batched: true Location [not included in comparison]/{warehouse_dir}/web_site] PushedFilters: [IsNotNull(web_site_sk)] ReadSchema: struct (61) ColumnarToRow [codegen id : 17] +<<<<<<< HEAD Input [2]: [web_site_sk#108, web_site_id#109] (62) Filter [codegen id : 17] @@ -387,10 +538,49 @@ Keys [1]: [web_site_id#109] Functions [4]: [sum(UnscaledValue(sales_price#88)), sum(UnscaledValue(return_amt#90)), sum(UnscaledValue(profit#89)), sum(UnscaledValue(net_loss#91))] Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#88))#118, sum(UnscaledValue(return_amt#90))#119, sum(UnscaledValue(profit#89))#120, sum(UnscaledValue(net_loss#91))#121] Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#88))#118,17,2) AS sales#122, MakeDecimal(sum(UnscaledValue(return_amt#90))#119,17,2) AS returns#123, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#89))#120,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#91))#121,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#124, web channel AS channel#125, concat(web_site, web_site_id#109) AS id#126] +======= +Input [2]: [web_site_sk#111, web_site_id#112] + +(62) Filter [codegen id : 17] +Input [2]: [web_site_sk#111, web_site_id#112] +Condition : isnotnull(web_site_sk#111) + +(63) BroadcastExchange +Input [2]: [web_site_sk#111, web_site_id#112] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#113] + +(64) BroadcastHashJoin [codegen id : 18] +Left keys [1]: [wsr_web_site_sk#89] +Right keys [1]: [web_site_sk#111] +Join condition: None + +(65) Project [codegen id : 18] +Output [5]: [sales_price#91, profit#92, return_amt#93, net_loss#94, web_site_id#112] +Input [7]: [wsr_web_site_sk#89, sales_price#91, profit#92, return_amt#93, net_loss#94, web_site_sk#111, web_site_id#112] + +(66) HashAggregate [codegen id : 18] +Input [5]: [sales_price#91, profit#92, return_amt#93, net_loss#94, web_site_id#112] +Keys [1]: [web_site_id#112] +Functions [4]: [partial_sum(UnscaledValue(sales_price#91)), partial_sum(UnscaledValue(return_amt#93)), partial_sum(UnscaledValue(profit#92)), partial_sum(UnscaledValue(net_loss#94))] +Aggregate Attributes [4]: [sum#114, sum#115, sum#116, sum#117] +Results [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] + +(67) Exchange +Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] +Arguments: hashpartitioning(web_site_id#112, 5), ENSURE_REQUIREMENTS, [id=#122] + +(68) HashAggregate [codegen id : 19] +Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] +Keys [1]: [web_site_id#112] +Functions [4]: [sum(UnscaledValue(sales_price#91)), sum(UnscaledValue(return_amt#93)), sum(UnscaledValue(profit#92)), sum(UnscaledValue(net_loss#94))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#91))#123, sum(UnscaledValue(return_amt#93))#124, sum(UnscaledValue(profit#92))#125, sum(UnscaledValue(net_loss#94))#126] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#91))#123,17,2) AS sales#127, MakeDecimal(sum(UnscaledValue(return_amt#93))#124,17,2) AS returns#128, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#92))#125,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#94))#126,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#129, web channel AS channel#130, concat(web_site, web_site_id#112) AS id#131] +>>>>>>> 965221296e (update explain.txt) (69) Union (70) Expand [codegen id : 20] +<<<<<<< HEAD Input [5]: [sales#37, returns#38, profit#39, channel#40, id#41] Arguments: [[sales#37, returns#38, profit#39, channel#40, id#41, 0], [sales#37, returns#38, profit#39, channel#40, null, 1], [sales#37, returns#38, profit#39, null, null, 3]], [sales#37, returns#38, profit#39, channel#127, id#128, spark_grouping_id#129] @@ -415,6 +605,32 @@ Results [5]: [channel#127, id#128, sum(sales#37)#142 AS sales#145, sum(returns#3 (74) TakeOrderedAndProject Input [5]: [channel#127, id#128, sales#145, returns#146, profit#147] Arguments: 100, [channel#127 ASC NULLS FIRST, id#128 ASC NULLS FIRST], [channel#127, id#128, sales#145, returns#146, profit#147] +======= +Input [5]: [sales#39, returns#40, profit#41, channel#42, id#43] +Arguments: [[sales#39, returns#40, profit#41, channel#42, id#43, 0], [sales#39, returns#40, profit#41, channel#42, null, 1], [sales#39, returns#40, profit#41, null, null, 3]], [sales#39, returns#40, profit#41, channel#132, id#133, spark_grouping_id#134] + +(71) HashAggregate [codegen id : 20] +Input [6]: [sales#39, returns#40, profit#41, channel#132, id#133, spark_grouping_id#134] +Keys [3]: [channel#132, id#133, spark_grouping_id#134] +Functions [3]: [partial_sum(sales#39), partial_sum(returns#40), partial_sum(profit#41)] +Aggregate Attributes [6]: [sum#135, isEmpty#136, sum#137, isEmpty#138, sum#139, isEmpty#140] +Results [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#75, sum#142, isEmpty#143, sum#144, isEmpty#145] + +(72) Exchange +Input [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#75, sum#142, isEmpty#143, sum#144, isEmpty#145] +Arguments: hashpartitioning(channel#132, id#133, spark_grouping_id#134, 5), ENSURE_REQUIREMENTS, [id=#146] + +(73) HashAggregate [codegen id : 21] +Input [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#75, sum#142, isEmpty#143, sum#144, isEmpty#145] +Keys [3]: [channel#132, id#133, spark_grouping_id#134] +Functions [3]: [sum(sales#39), sum(returns#40), sum(profit#41)] +Aggregate Attributes [3]: [sum(sales#39)#147, sum(returns#40)#148, sum(profit#41)#149] +Results [5]: [channel#132, id#133, sum(sales#39)#147 AS sales#150, sum(returns#40)#148 AS returns#151, sum(profit#41)#149 AS profit#152] + +(74) TakeOrderedAndProject +Input [5]: [channel#132, id#133, sales#150, returns#151, profit#152] +Arguments: 100, [channel#132 ASC NULLS FIRST, id#133 ASC NULLS FIRST], [channel#132, id#133, sales#150, returns#151, profit#152] +>>>>>>> 965221296e (update explain.txt) ===== Subqueries ===== @@ -427,13 +643,18 @@ BroadcastExchange (79) (75) Scan parquet default.date_dim +<<<<<<< HEAD Output [2]: [d_date_sk#22, d_date#148] +======= +Output [2]: [d_date_sk#22, d_date#153] +>>>>>>> 965221296e (update explain.txt) Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), IsNotNull(d_date_sk)] ReadSchema: struct (76) ColumnarToRow [codegen id : 1] +<<<<<<< HEAD Input [2]: [d_date_sk#22, d_date#148] (77) Filter [codegen id : 1] @@ -447,15 +668,38 @@ Input [2]: [d_date_sk#22, d_date#148] (79) BroadcastExchange Input [1]: [d_date_sk#22] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] +======= +Input [2]: [d_date_sk#22, d_date#153] + +(77) Filter [codegen id : 1] +Input [2]: [d_date_sk#22, d_date#153] +Condition : (((isnotnull(d_date#153) AND (d_date#153 >= 2000-08-23)) AND (d_date#153 <= 2000-09-06)) AND isnotnull(d_date_sk#22)) + +(78) Project [codegen id : 1] +Output [1]: [d_date_sk#22] +Input [2]: [d_date_sk#22, d_date#153] + +(79) BroadcastExchange +Input [1]: [d_date_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#154] +>>>>>>> 965221296e (update explain.txt) Subquery:2 Hosting operator id = 5 Hosting Expression = sr_returned_date_sk#15 IN dynamicpruning#5 Subquery:3 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#45 IN dynamicpruning#5 +<<<<<<< HEAD Subquery:4 Hosting operator id = 26 Hosting Expression = cr_returned_date_sk#55 IN dynamicpruning#5 Subquery:5 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#85 IN dynamicpruning#5 Subquery:6 Hosting operator id = 47 Hosting Expression = wr_returned_date_sk#96 IN dynamicpruning#5 +======= +Subquery:4 Hosting operator id = 26 Hosting Expression = cr_returned_date_sk#56 IN dynamicpruning#5 + +Subquery:5 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#88 IN dynamicpruning#5 + +Subquery:6 Hosting operator id = 47 Hosting Expression = wr_returned_date_sk#99 IN dynamicpruning#5 +>>>>>>> 965221296e (update explain.txt) From 83dba72b6589178000764e580d1c0b42eb4a787c Mon Sep 17 00:00:00 2001 From: allisonwang-db Date: Fri, 10 Jun 2022 13:52:45 -0700 Subject: [PATCH 3/3] update golden files --- .../approved-plans-v1_4/q4/explain.txt | 270 ------------------ .../approved-plans-v1_4/q5/explain.txt | 244 ---------------- 2 files changed, 514 deletions(-) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt index ec2bdfae937a..88f9474b5c68 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt @@ -362,18 +362,13 @@ Output [11]: [customer_id#23, year_total#24, customer_id#46, customer_first_name Input [12]: [customer_id#23, year_total#24, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#53, customer_id#77, year_total#78] (54) Scan parquet default.customer -<<<<<<< HEAD Output [8]: [c_customer_sk#79, c_customer_id#80, c_first_name#81, c_last_name#82, c_preferred_cust_flag#83, c_birth_country#84, c_login#85, c_email_address#86] -======= -Output [8]: [c_customer_sk#87, c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58] ->>>>>>> 965221296e (update explain.txt) Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (55) ColumnarToRow [codegen id : 14] -<<<<<<< HEAD Input [8]: [c_customer_sk#79, c_customer_id#80, c_first_name#81, c_last_name#82, c_preferred_cust_flag#83, c_birth_country#84, c_login#85, c_email_address#86] (56) Filter [codegen id : 14] @@ -385,24 +380,10 @@ Output [6]: [cs_bill_customer_sk#87, cs_ext_discount_amt#88, cs_ext_sales_price# Batched: true Location: InMemoryFileIndex [] PartitionFilters: [isnotnull(cs_sold_date_sk#92), dynamicpruningexpression(cs_sold_date_sk#92 IN dynamicpruning#39)] -======= -Input [8]: [c_customer_sk#87, c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58] - -(56) Filter [codegen id : 14] -Input [8]: [c_customer_sk#87, c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58] -Condition : (isnotnull(c_customer_sk#87) AND isnotnull(c_customer_id#88)) - -(57) Scan parquet default.catalog_sales -Output [6]: [cs_bill_customer_sk#94, cs_ext_discount_amt#73, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98] -Batched: true -Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#98), dynamicpruningexpression(cs_sold_date_sk#98 IN dynamicpruning#41)] ->>>>>>> 965221296e (update explain.txt) PushedFilters: [IsNotNull(cs_bill_customer_sk)] ReadSchema: struct (58) ColumnarToRow [codegen id : 12] -<<<<<<< HEAD Input [6]: [cs_bill_customer_sk#87, cs_ext_discount_amt#88, cs_ext_sales_price#89, cs_ext_wholesale_cost#90, cs_ext_list_price#91, cs_sold_date_sk#92] (59) Filter [codegen id : 12] @@ -467,79 +448,12 @@ Input [13]: [customer_id#23, year_total#24, customer_id#46, customer_first_name# (72) Scan parquet default.customer Output [8]: [c_customer_sk#101, c_customer_id#102, c_first_name#103, c_last_name#104, c_preferred_cust_flag#105, c_birth_country#106, c_login#107, c_email_address#108] -======= -Input [6]: [cs_bill_customer_sk#94, cs_ext_discount_amt#73, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98] - -(59) Filter [codegen id : 12] -Input [6]: [cs_bill_customer_sk#94, cs_ext_discount_amt#73, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98] -Condition : isnotnull(cs_bill_customer_sk#94) - -(60) BroadcastExchange -Input [6]: [cs_bill_customer_sk#94, cs_ext_discount_amt#73, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#99] - -(61) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [c_customer_sk#87] -Right keys [1]: [cs_bill_customer_sk#94] -Join condition: None - -(62) Project [codegen id : 14] -Output [12]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, cs_ext_discount_amt#73, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98] -Input [14]: [c_customer_sk#87, c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, cs_bill_customer_sk#94, cs_ext_discount_amt#73, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98] - -(63) ReusedExchange [Reuses operator id: 118] -Output [2]: [d_date_sk#100, d_year#101] - -(64) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [cs_sold_date_sk#98] -Right keys [1]: [d_date_sk#100] -Join condition: None - -(65) Project [codegen id : 14] -Output [12]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, cs_ext_discount_amt#73, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, d_year#101] -Input [14]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, cs_ext_discount_amt#73, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98, d_date_sk#100, d_year#101] - -(66) HashAggregate [codegen id : 14] -Input [12]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, cs_ext_discount_amt#73, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, d_year#101] -Keys [8]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, d_year#101] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#97 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#96 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#73 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#95 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] -Aggregate Attributes [2]: [sum#102, isEmpty#103] -Results [10]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, d_year#101, sum#104, isEmpty#105] - -(67) Exchange -Input [10]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, d_year#101, sum#104, isEmpty#105] -Arguments: hashpartitioning(c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, d_year#101, 5), ENSURE_REQUIREMENTS, [id=#106] - -(68) HashAggregate [codegen id : 15] -Input [10]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, d_year#101, sum#104, isEmpty#105] -Keys [8]: [c_customer_id#88, c_first_name#89, c_last_name#90, c_preferred_cust_flag#91, c_birth_country#92, c_login#93, c_email_address#58, d_year#101] -Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#97 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#96 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#73 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#95 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#97 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#96 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#73 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#95 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#81] -Results [2]: [c_customer_id#88 AS customer_id#107, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#97 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#96 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#73 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#95 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#81 AS year_total#108] - -(69) BroadcastExchange -Input [2]: [customer_id#107, year_total#108] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#109] - -(70) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [customer_id#25] -Right keys [1]: [customer_id#107] -Join condition: (CASE WHEN (year_total#85 > 0.000000) THEN CheckOverflow((promote_precision(year_total#108) / promote_precision(year_total#85)), DecimalType(38,14), true) END > CASE WHEN (year_total#26 > 0.000000) THEN CheckOverflow((promote_precision(year_total#57) / promote_precision(year_total#26)), DecimalType(38,14), true) END) - -(71) Project [codegen id : 24] -Output [10]: [customer_id#25, customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56, year_total#85, year_total#108] -Input [13]: [customer_id#25, year_total#26, customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56, year_total#57, year_total#85, customer_id#107, year_total#108] - -(72) Scan parquet default.customer -Output [8]: [c_customer_sk#110, c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117] ->>>>>>> 965221296e (update explain.txt) Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (73) ColumnarToRow [codegen id : 18] -<<<<<<< HEAD Input [8]: [c_customer_sk#101, c_customer_id#102, c_first_name#103, c_last_name#104, c_preferred_cust_flag#105, c_birth_country#106, c_login#107, c_email_address#108] (74) Filter [codegen id : 18] @@ -551,24 +465,10 @@ Output [6]: [ws_bill_customer_sk#109, ws_ext_discount_amt#110, ws_ext_sales_pric Batched: true Location: InMemoryFileIndex [] PartitionFilters: [isnotnull(ws_sold_date_sk#114), dynamicpruningexpression(ws_sold_date_sk#114 IN dynamicpruning#15)] -======= -Input [8]: [c_customer_sk#110, c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117] - -(74) Filter [codegen id : 18] -Input [8]: [c_customer_sk#110, c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117] -Condition : (isnotnull(c_customer_sk#110) AND isnotnull(c_customer_id#111)) - -(75) Scan parquet default.web_sales -Output [6]: [ws_bill_customer_sk#118, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123] -Batched: true -Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#123), dynamicpruningexpression(ws_sold_date_sk#123 IN dynamicpruning#15)] ->>>>>>> 965221296e (update explain.txt) PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (76) ColumnarToRow [codegen id : 16] -<<<<<<< HEAD Input [6]: [ws_bill_customer_sk#109, ws_ext_discount_amt#110, ws_ext_sales_price#111, ws_ext_wholesale_cost#112, ws_ext_list_price#113, ws_sold_date_sk#114] (77) Filter [codegen id : 16] @@ -641,87 +541,12 @@ Input [12]: [customer_id#23, customer_id#46, customer_first_name#47, customer_la (92) Scan parquet default.customer Output [8]: [c_customer_sk#126, c_customer_id#127, c_first_name#128, c_last_name#129, c_preferred_cust_flag#130, c_birth_country#131, c_login#132, c_email_address#133] -======= -Input [6]: [ws_bill_customer_sk#118, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123] - -(77) Filter [codegen id : 16] -Input [6]: [ws_bill_customer_sk#118, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123] -Condition : isnotnull(ws_bill_customer_sk#118) - -(78) BroadcastExchange -Input [6]: [ws_bill_customer_sk#118, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#124] - -(79) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [c_customer_sk#110] -Right keys [1]: [ws_bill_customer_sk#118] -Join condition: None - -(80) Project [codegen id : 18] -Output [12]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123] -Input [14]: [c_customer_sk#110, c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, ws_bill_customer_sk#118, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123] - -(81) ReusedExchange [Reuses operator id: 114] -Output [2]: [d_date_sk#125, d_year#126] - -(82) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [ws_sold_date_sk#123] -Right keys [1]: [d_date_sk#125] -Join condition: None - -(83) Project [codegen id : 18] -Output [12]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, d_year#126] -Input [14]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123, d_date_sk#125, d_year#126] - -(84) HashAggregate [codegen id : 18] -Input [12]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, d_year#126] -Keys [8]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#122 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#121 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#119 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#120 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] -Aggregate Attributes [2]: [sum#127, isEmpty#128] -Results [10]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126, sum#129, isEmpty#130] - -(85) Exchange -Input [10]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126, sum#129, isEmpty#130] -Arguments: hashpartitioning(c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126, 5), ENSURE_REQUIREMENTS, [id=#131] - -(86) HashAggregate [codegen id : 19] -Input [10]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126, sum#129, isEmpty#130] -Keys [8]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126] -Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#122 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#121 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#119 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#120 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#122 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#121 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#119 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#120 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#132] -Results [2]: [c_customer_id#111 AS customer_id#133, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#122 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#121 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#119 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#120 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#132 AS year_total#134] - -(87) Filter [codegen id : 19] -Input [2]: [customer_id#133, year_total#134] -Condition : (isnotnull(year_total#134) AND (year_total#134 > 0.000000)) - -(88) Project [codegen id : 19] -Output [2]: [customer_id#133 AS customer_id#135, year_total#134 AS year_total#136] -Input [2]: [customer_id#133, year_total#134] - -(89) BroadcastExchange -Input [2]: [customer_id#135, year_total#136] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#137] - -(90) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [customer_id#25] -Right keys [1]: [customer_id#135] -Join condition: None - -(91) Project [codegen id : 24] -Output [11]: [customer_id#25, customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56, year_total#85, year_total#108, year_total#136] -Input [12]: [customer_id#25, customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56, year_total#85, year_total#108, customer_id#135, year_total#136] - -(92) Scan parquet default.customer -Output [8]: [c_customer_sk#138, c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145] ->>>>>>> 965221296e (update explain.txt) Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (93) ColumnarToRow [codegen id : 22] -<<<<<<< HEAD Input [8]: [c_customer_sk#126, c_customer_id#127, c_first_name#128, c_last_name#129, c_preferred_cust_flag#130, c_birth_country#131, c_login#132, c_email_address#133] (94) Filter [codegen id : 22] @@ -733,24 +558,10 @@ Output [6]: [ws_bill_customer_sk#134, ws_ext_discount_amt#135, ws_ext_sales_pric Batched: true Location: InMemoryFileIndex [] PartitionFilters: [isnotnull(ws_sold_date_sk#139), dynamicpruningexpression(ws_sold_date_sk#139 IN dynamicpruning#39)] -======= -Input [8]: [c_customer_sk#138, c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145] - -(94) Filter [codegen id : 22] -Input [8]: [c_customer_sk#138, c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145] -Condition : (isnotnull(c_customer_sk#138) AND isnotnull(c_customer_id#139)) - -(95) Scan parquet default.web_sales -Output [6]: [ws_bill_customer_sk#146, ws_ext_discount_amt#147, ws_ext_sales_price#148, ws_ext_wholesale_cost#149, ws_ext_list_price#150, ws_sold_date_sk#151] -Batched: true -Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#151), dynamicpruningexpression(ws_sold_date_sk#151 IN dynamicpruning#41)] ->>>>>>> 965221296e (update explain.txt) PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (96) ColumnarToRow [codegen id : 20] -<<<<<<< HEAD Input [6]: [ws_bill_customer_sk#134, ws_ext_discount_amt#135, ws_ext_sales_price#136, ws_ext_wholesale_cost#137, ws_ext_list_price#138, ws_sold_date_sk#139] (97) Filter [codegen id : 20] @@ -812,69 +623,6 @@ Join condition: (CASE WHEN (year_total#78 > 0.000000) THEN CheckOverflow((promot (109) Project [codegen id : 24] Output [7]: [customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52] Input [13]: [customer_id#23, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#78, year_total#100, year_total#125, customer_id#146, year_total#147] -======= -Input [6]: [ws_bill_customer_sk#146, ws_ext_discount_amt#147, ws_ext_sales_price#148, ws_ext_wholesale_cost#149, ws_ext_list_price#150, ws_sold_date_sk#151] - -(97) Filter [codegen id : 20] -Input [6]: [ws_bill_customer_sk#146, ws_ext_discount_amt#147, ws_ext_sales_price#148, ws_ext_wholesale_cost#149, ws_ext_list_price#150, ws_sold_date_sk#151] -Condition : isnotnull(ws_bill_customer_sk#146) - -(98) BroadcastExchange -Input [6]: [ws_bill_customer_sk#146, ws_ext_discount_amt#147, ws_ext_sales_price#148, ws_ext_wholesale_cost#149, ws_ext_list_price#150, ws_sold_date_sk#151] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#152] - -(99) BroadcastHashJoin [codegen id : 22] -Left keys [1]: [c_customer_sk#138] -Right keys [1]: [ws_bill_customer_sk#146] -Join condition: None - -(100) Project [codegen id : 22] -Output [12]: [c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, ws_ext_discount_amt#147, ws_ext_sales_price#148, ws_ext_wholesale_cost#149, ws_ext_list_price#150, ws_sold_date_sk#151] -Input [14]: [c_customer_sk#138, c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, ws_bill_customer_sk#146, ws_ext_discount_amt#147, ws_ext_sales_price#148, ws_ext_wholesale_cost#149, ws_ext_list_price#150, ws_sold_date_sk#151] - -(101) ReusedExchange [Reuses operator id: 118] -Output [2]: [d_date_sk#153, d_year#154] - -(102) BroadcastHashJoin [codegen id : 22] -Left keys [1]: [ws_sold_date_sk#151] -Right keys [1]: [d_date_sk#153] -Join condition: None - -(103) Project [codegen id : 22] -Output [12]: [c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, ws_ext_discount_amt#147, ws_ext_sales_price#148, ws_ext_wholesale_cost#149, ws_ext_list_price#150, d_year#154] -Input [14]: [c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, ws_ext_discount_amt#147, ws_ext_sales_price#148, ws_ext_wholesale_cost#149, ws_ext_list_price#150, ws_sold_date_sk#151, d_date_sk#153, d_year#154] - -(104) HashAggregate [codegen id : 22] -Input [12]: [c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, ws_ext_discount_amt#147, ws_ext_sales_price#148, ws_ext_wholesale_cost#149, ws_ext_list_price#150, d_year#154] -Keys [8]: [c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, d_year#154] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#150 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#149 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#147 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#148 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] -Aggregate Attributes [2]: [sum#155, isEmpty#156] -Results [10]: [c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, d_year#154, sum#157, isEmpty#158] - -(105) Exchange -Input [10]: [c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, d_year#154, sum#157, isEmpty#158] -Arguments: hashpartitioning(c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, d_year#154, 5), ENSURE_REQUIREMENTS, [id=#159] - -(106) HashAggregate [codegen id : 23] -Input [10]: [c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, d_year#154, sum#157, isEmpty#158] -Keys [8]: [c_customer_id#139, c_first_name#140, c_last_name#141, c_preferred_cust_flag#142, c_birth_country#143, c_login#144, c_email_address#145, d_year#154] -Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#150 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#149 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#147 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#148 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#150 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#149 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#147 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#148 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#132] -Results [2]: [c_customer_id#139 AS customer_id#160, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#150 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#149 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#147 as decimal(9,2)))), DecimalType(9,2), true) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#148 as decimal(10,2)))), DecimalType(10,2), true)) / 2.00), DecimalType(14,6), true))#132 AS year_total#161] - -(107) BroadcastExchange -Input [2]: [customer_id#160, year_total#161] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#162] - -(108) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [customer_id#25] -Right keys [1]: [customer_id#160] -Join condition: (CASE WHEN (year_total#85 > 0.000000) THEN CheckOverflow((promote_precision(year_total#108) / promote_precision(year_total#85)), DecimalType(38,14), true) END > CASE WHEN (year_total#136 > 0.000000) THEN CheckOverflow((promote_precision(year_total#161) / promote_precision(year_total#136)), DecimalType(38,14), true) END) - -(109) Project [codegen id : 24] -Output [7]: [customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56] -Input [13]: [customer_id#25, customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56, year_total#85, year_total#108, year_total#136, customer_id#160, year_total#161] ->>>>>>> 965221296e (update explain.txt) (110) TakeOrderedAndProject Input [7]: [customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52] @@ -904,13 +652,8 @@ Input [2]: [d_date_sk#16, d_year#17] Condition : ((isnotnull(d_year#17) AND (d_year#17 = 2001)) AND isnotnull(d_date_sk#16)) (114) BroadcastExchange -<<<<<<< HEAD Input [2]: [d_date_sk#16, d_year#17] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=18] -======= -Input [2]: [d_date_sk#17, d_year#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#163] ->>>>>>> 965221296e (update explain.txt) Subquery:2 Hosting operator id = 20 Hosting Expression = ss_sold_date_sk#38 IN dynamicpruning#39 BroadcastExchange (118) @@ -934,28 +677,15 @@ Input [2]: [d_date_sk#40, d_year#41] Condition : ((isnotnull(d_year#41) AND (d_year#41 = 2002)) AND isnotnull(d_date_sk#40)) (118) BroadcastExchange -<<<<<<< HEAD Input [2]: [d_date_sk#40, d_year#41] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=19] -======= -Input [2]: [d_date_sk#43, d_year#44] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#164] ->>>>>>> 965221296e (update explain.txt) Subquery:3 Hosting operator id = 37 Hosting Expression = cs_sold_date_sk#67 IN dynamicpruning#15 -<<<<<<< HEAD Subquery:4 Hosting operator id = 57 Hosting Expression = cs_sold_date_sk#92 IN dynamicpruning#39 Subquery:5 Hosting operator id = 75 Hosting Expression = ws_sold_date_sk#114 IN dynamicpruning#15 Subquery:6 Hosting operator id = 95 Hosting Expression = ws_sold_date_sk#139 IN dynamicpruning#39 -======= -Subquery:4 Hosting operator id = 57 Hosting Expression = cs_sold_date_sk#98 IN dynamicpruning#41 - -Subquery:5 Hosting operator id = 75 Hosting Expression = ws_sold_date_sk#123 IN dynamicpruning#15 - -Subquery:6 Hosting operator id = 95 Hosting Expression = ws_sold_date_sk#151 IN dynamicpruning#41 ->>>>>>> 965221296e (update explain.txt) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt index d992d197487f..1f39a204e7c8 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt @@ -188,7 +188,6 @@ Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_s Condition : isnotnull(cs_catalog_page_sk#42) (25) Project [codegen id : 7] -<<<<<<< HEAD Output [6]: [cs_catalog_page_sk#42 AS page_sk#46, cs_sold_date_sk#45 AS date_sk#47, cs_ext_sales_price#43 AS sales_price#48, cs_net_profit#44 AS profit#49, 0.00 AS return_amt#50, 0.00 AS net_loss#51] Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] @@ -197,21 +196,10 @@ Output [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_retu Batched: true Location: InMemoryFileIndex [] PartitionFilters: [isnotnull(cr_returned_date_sk#55), dynamicpruningexpression(cr_returned_date_sk#55 IN dynamicpruning#5)] -======= -Output [6]: [cs_catalog_page_sk#44 AS page_sk#25, cs_sold_date_sk#47 AS date_sk#48, cs_ext_sales_price#45 AS sales_price#49, cs_net_profit#46 AS profit#50, 0.00 AS return_amt#51, 0.00 AS net_loss#52] -Input [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47] - -(26) Scan parquet default.catalog_returns -Output [4]: [cr_catalog_page_sk#53, cr_return_amount#54, cr_net_loss#55, cr_returned_date_sk#56] -Batched: true -Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cr_returned_date_sk#56), dynamicpruningexpression(cr_returned_date_sk#56 IN dynamicpruning#5)] ->>>>>>> 965221296e (update explain.txt) PushedFilters: [IsNotNull(cr_catalog_page_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 8] -<<<<<<< HEAD Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] (28) Filter [codegen id : 8] @@ -221,22 +209,10 @@ Condition : isnotnull(cr_catalog_page_sk#52) (29) Project [codegen id : 8] Output [6]: [cr_catalog_page_sk#52 AS page_sk#56, cr_returned_date_sk#55 AS date_sk#57, 0.00 AS sales_price#58, 0.00 AS profit#59, cr_return_amount#53 AS return_amt#60, cr_net_loss#54 AS net_loss#61] Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] -======= -Input [4]: [cr_catalog_page_sk#53, cr_return_amount#54, cr_net_loss#55, cr_returned_date_sk#56] - -(28) Filter [codegen id : 8] -Input [4]: [cr_catalog_page_sk#53, cr_return_amount#54, cr_net_loss#55, cr_returned_date_sk#56] -Condition : isnotnull(cr_catalog_page_sk#53) - -(29) Project [codegen id : 8] -Output [6]: [cr_catalog_page_sk#53 AS page_sk#57, cr_returned_date_sk#56 AS date_sk#58, 0.00 AS sales_price#59, 0.00 AS profit#60, cr_return_amount#54 AS return_amt#61, cr_net_loss#55 AS net_loss#62] -Input [4]: [cr_catalog_page_sk#53, cr_return_amount#54, cr_net_loss#55, cr_returned_date_sk#56] ->>>>>>> 965221296e (update explain.txt) (30) Union (31) ReusedExchange [Reuses operator id: 79] -<<<<<<< HEAD Output [1]: [d_date_sk#62] (32) BroadcastHashJoin [codegen id : 11] @@ -250,28 +226,12 @@ Input [7]: [page_sk#46, date_sk#47, sales_price#48, profit#49, return_amt#50, ne (34) Scan parquet default.catalog_page Output [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] -======= -Output [1]: [d_date_sk#63] - -(32) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [date_sk#48] -Right keys [1]: [d_date_sk#63] -Join condition: None - -(33) Project [codegen id : 11] -Output [5]: [page_sk#25, sales_price#49, profit#50, return_amt#51, net_loss#52] -Input [7]: [page_sk#25, date_sk#48, sales_price#49, profit#50, return_amt#51, net_loss#52, d_date_sk#63] - -(34) Scan parquet default.catalog_page -Output [2]: [cp_catalog_page_sk#64, cp_catalog_page_id#65] ->>>>>>> 965221296e (update explain.txt) Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_page] PushedFilters: [IsNotNull(cp_catalog_page_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 10] -<<<<<<< HEAD Input [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] (36) Filter [codegen id : 10] @@ -314,55 +274,10 @@ Output [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold Batched: true Location: InMemoryFileIndex [] PartitionFilters: [isnotnull(ws_sold_date_sk#85), dynamicpruningexpression(ws_sold_date_sk#85 IN dynamicpruning#5)] -======= -Input [2]: [cp_catalog_page_sk#64, cp_catalog_page_id#65] - -(36) Filter [codegen id : 10] -Input [2]: [cp_catalog_page_sk#64, cp_catalog_page_id#65] -Condition : isnotnull(cp_catalog_page_sk#64) - -(37) BroadcastExchange -Input [2]: [cp_catalog_page_sk#64, cp_catalog_page_id#65] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#66] - -(38) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [page_sk#25] -Right keys [1]: [cp_catalog_page_sk#64] -Join condition: None - -(39) Project [codegen id : 11] -Output [5]: [sales_price#49, profit#50, return_amt#51, net_loss#52, cp_catalog_page_id#65] -Input [7]: [page_sk#25, sales_price#49, profit#50, return_amt#51, net_loss#52, cp_catalog_page_sk#64, cp_catalog_page_id#65] - -(40) HashAggregate [codegen id : 11] -Input [5]: [sales_price#49, profit#50, return_amt#51, net_loss#52, cp_catalog_page_id#65] -Keys [1]: [cp_catalog_page_id#65] -Functions [4]: [partial_sum(UnscaledValue(sales_price#49)), partial_sum(UnscaledValue(return_amt#51)), partial_sum(UnscaledValue(profit#50)), partial_sum(UnscaledValue(net_loss#52))] -Aggregate Attributes [4]: [sum#67, sum#68, sum#69, sum#70] -Results [5]: [cp_catalog_page_id#65, sum#71, sum#72, sum#73, sum#74] - -(41) Exchange -Input [5]: [cp_catalog_page_id#65, sum#71, sum#72, sum#73, sum#74] -Arguments: hashpartitioning(cp_catalog_page_id#65, 5), ENSURE_REQUIREMENTS, [id=#75] - -(42) HashAggregate [codegen id : 12] -Input [5]: [cp_catalog_page_id#65, sum#71, sum#72, sum#73, sum#74] -Keys [1]: [cp_catalog_page_id#65] -Functions [4]: [sum(UnscaledValue(sales_price#49)), sum(UnscaledValue(return_amt#51)), sum(UnscaledValue(profit#50)), sum(UnscaledValue(net_loss#52))] -Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#49))#76, sum(UnscaledValue(return_amt#51))#77, sum(UnscaledValue(profit#50))#78, sum(UnscaledValue(net_loss#52))#79] -Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#49))#76,17,2) AS sales#80, MakeDecimal(sum(UnscaledValue(return_amt#51))#77,17,2) AS returns#81, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#50))#78,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#52))#79,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#82, catalog channel AS channel#83, concat(catalog_page, cp_catalog_page_id#65) AS id#84] - -(43) Scan parquet default.web_sales -Output [4]: [ws_web_site_sk#85, ws_ext_sales_price#86, ws_net_profit#87, ws_sold_date_sk#88] -Batched: true -Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#88), dynamicpruningexpression(ws_sold_date_sk#88 IN dynamicpruning#5)] ->>>>>>> 965221296e (update explain.txt) PushedFilters: [IsNotNull(ws_web_site_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 13] -<<<<<<< HEAD Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] (45) Filter [codegen id : 13] @@ -389,41 +304,12 @@ Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] (50) Scan parquet default.web_sales Output [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] -======= -Input [4]: [ws_web_site_sk#85, ws_ext_sales_price#86, ws_net_profit#87, ws_sold_date_sk#88] - -(45) Filter [codegen id : 13] -Input [4]: [ws_web_site_sk#85, ws_ext_sales_price#86, ws_net_profit#87, ws_sold_date_sk#88] -Condition : isnotnull(ws_web_site_sk#85) - -(46) Project [codegen id : 13] -Output [6]: [ws_web_site_sk#85 AS wsr_web_site_sk#89, ws_sold_date_sk#88 AS date_sk#90, ws_ext_sales_price#86 AS sales_price#91, ws_net_profit#87 AS profit#92, 0.00 AS return_amt#93, 0.00 AS net_loss#94] -Input [4]: [ws_web_site_sk#85, ws_ext_sales_price#86, ws_net_profit#87, ws_sold_date_sk#88] - -(47) Scan parquet default.web_returns -Output [5]: [wr_item_sk#95, wr_order_number#96, wr_return_amt#97, wr_net_loss#98, wr_returned_date_sk#99] -Batched: true -Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(wr_returned_date_sk#99), dynamicpruningexpression(wr_returned_date_sk#99 IN dynamicpruning#5)] -ReadSchema: struct - -(48) ColumnarToRow [codegen id : 14] -Input [5]: [wr_item_sk#95, wr_order_number#96, wr_return_amt#97, wr_net_loss#98, wr_returned_date_sk#99] - -(49) BroadcastExchange -Input [5]: [wr_item_sk#95, wr_order_number#96, wr_return_amt#97, wr_net_loss#98, wr_returned_date_sk#99] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295))),false), [id=#100] - -(50) Scan parquet default.web_sales -Output [4]: [ws_item_sk#34, ws_web_site_sk#101, ws_order_number#102, ws_sold_date_sk#103] ->>>>>>> 965221296e (update explain.txt) Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_site_sk)] ReadSchema: struct (51) ColumnarToRow -<<<<<<< HEAD Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] (52) Filter @@ -442,31 +328,10 @@ Join condition: None (55) Project [codegen id : 15] Output [6]: [ws_web_site_sk#98 AS wsr_web_site_sk#101, wr_returned_date_sk#96 AS date_sk#102, 0.00 AS sales_price#103, 0.00 AS profit#104, wr_return_amt#94 AS return_amt#105, wr_net_loss#95 AS net_loss#106] Input [8]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96, ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] -======= -Input [4]: [ws_item_sk#34, ws_web_site_sk#101, ws_order_number#102, ws_sold_date_sk#103] - -(52) Filter -Input [4]: [ws_item_sk#34, ws_web_site_sk#101, ws_order_number#102, ws_sold_date_sk#103] -Condition : ((isnotnull(ws_item_sk#34) AND isnotnull(ws_order_number#102)) AND isnotnull(ws_web_site_sk#101)) - -(53) Project -Output [3]: [ws_item_sk#34, ws_web_site_sk#101, ws_order_number#102] -Input [4]: [ws_item_sk#34, ws_web_site_sk#101, ws_order_number#102, ws_sold_date_sk#103] - -(54) BroadcastHashJoin [codegen id : 15] -Left keys [2]: [wr_item_sk#95, wr_order_number#96] -Right keys [2]: [ws_item_sk#34, ws_order_number#102] -Join condition: None - -(55) Project [codegen id : 15] -Output [6]: [ws_web_site_sk#101 AS wsr_web_site_sk#104, wr_returned_date_sk#99 AS date_sk#105, 0.00 AS sales_price#106, 0.00 AS profit#107, wr_return_amt#97 AS return_amt#108, wr_net_loss#98 AS net_loss#109] -Input [8]: [wr_item_sk#95, wr_order_number#96, wr_return_amt#97, wr_net_loss#98, wr_returned_date_sk#99, ws_item_sk#34, ws_web_site_sk#101, ws_order_number#102] ->>>>>>> 965221296e (update explain.txt) (56) Union (57) ReusedExchange [Reuses operator id: 79] -<<<<<<< HEAD Output [1]: [d_date_sk#107] (58) BroadcastHashJoin [codegen id : 18] @@ -480,28 +345,12 @@ Input [7]: [wsr_web_site_sk#86, date_sk#87, sales_price#88, profit#89, return_am (60) Scan parquet default.web_site Output [2]: [web_site_sk#108, web_site_id#109] -======= -Output [1]: [d_date_sk#110] - -(58) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [date_sk#90] -Right keys [1]: [d_date_sk#110] -Join condition: None - -(59) Project [codegen id : 18] -Output [5]: [wsr_web_site_sk#89, sales_price#91, profit#92, return_amt#93, net_loss#94] -Input [7]: [wsr_web_site_sk#89, date_sk#90, sales_price#91, profit#92, return_amt#93, net_loss#94, d_date_sk#110] - -(60) Scan parquet default.web_site -Output [2]: [web_site_sk#111, web_site_id#112] ->>>>>>> 965221296e (update explain.txt) Batched: true Location [not included in comparison]/{warehouse_dir}/web_site] PushedFilters: [IsNotNull(web_site_sk)] ReadSchema: struct (61) ColumnarToRow [codegen id : 17] -<<<<<<< HEAD Input [2]: [web_site_sk#108, web_site_id#109] (62) Filter [codegen id : 17] @@ -538,49 +387,10 @@ Keys [1]: [web_site_id#109] Functions [4]: [sum(UnscaledValue(sales_price#88)), sum(UnscaledValue(return_amt#90)), sum(UnscaledValue(profit#89)), sum(UnscaledValue(net_loss#91))] Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#88))#118, sum(UnscaledValue(return_amt#90))#119, sum(UnscaledValue(profit#89))#120, sum(UnscaledValue(net_loss#91))#121] Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#88))#118,17,2) AS sales#122, MakeDecimal(sum(UnscaledValue(return_amt#90))#119,17,2) AS returns#123, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#89))#120,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#91))#121,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#124, web channel AS channel#125, concat(web_site, web_site_id#109) AS id#126] -======= -Input [2]: [web_site_sk#111, web_site_id#112] - -(62) Filter [codegen id : 17] -Input [2]: [web_site_sk#111, web_site_id#112] -Condition : isnotnull(web_site_sk#111) - -(63) BroadcastExchange -Input [2]: [web_site_sk#111, web_site_id#112] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#113] - -(64) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [wsr_web_site_sk#89] -Right keys [1]: [web_site_sk#111] -Join condition: None - -(65) Project [codegen id : 18] -Output [5]: [sales_price#91, profit#92, return_amt#93, net_loss#94, web_site_id#112] -Input [7]: [wsr_web_site_sk#89, sales_price#91, profit#92, return_amt#93, net_loss#94, web_site_sk#111, web_site_id#112] - -(66) HashAggregate [codegen id : 18] -Input [5]: [sales_price#91, profit#92, return_amt#93, net_loss#94, web_site_id#112] -Keys [1]: [web_site_id#112] -Functions [4]: [partial_sum(UnscaledValue(sales_price#91)), partial_sum(UnscaledValue(return_amt#93)), partial_sum(UnscaledValue(profit#92)), partial_sum(UnscaledValue(net_loss#94))] -Aggregate Attributes [4]: [sum#114, sum#115, sum#116, sum#117] -Results [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] - -(67) Exchange -Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] -Arguments: hashpartitioning(web_site_id#112, 5), ENSURE_REQUIREMENTS, [id=#122] - -(68) HashAggregate [codegen id : 19] -Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121] -Keys [1]: [web_site_id#112] -Functions [4]: [sum(UnscaledValue(sales_price#91)), sum(UnscaledValue(return_amt#93)), sum(UnscaledValue(profit#92)), sum(UnscaledValue(net_loss#94))] -Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#91))#123, sum(UnscaledValue(return_amt#93))#124, sum(UnscaledValue(profit#92))#125, sum(UnscaledValue(net_loss#94))#126] -Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#91))#123,17,2) AS sales#127, MakeDecimal(sum(UnscaledValue(return_amt#93))#124,17,2) AS returns#128, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#92))#125,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#94))#126,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#129, web channel AS channel#130, concat(web_site, web_site_id#112) AS id#131] ->>>>>>> 965221296e (update explain.txt) (69) Union (70) Expand [codegen id : 20] -<<<<<<< HEAD Input [5]: [sales#37, returns#38, profit#39, channel#40, id#41] Arguments: [[sales#37, returns#38, profit#39, channel#40, id#41, 0], [sales#37, returns#38, profit#39, channel#40, null, 1], [sales#37, returns#38, profit#39, null, null, 3]], [sales#37, returns#38, profit#39, channel#127, id#128, spark_grouping_id#129] @@ -605,32 +415,6 @@ Results [5]: [channel#127, id#128, sum(sales#37)#142 AS sales#145, sum(returns#3 (74) TakeOrderedAndProject Input [5]: [channel#127, id#128, sales#145, returns#146, profit#147] Arguments: 100, [channel#127 ASC NULLS FIRST, id#128 ASC NULLS FIRST], [channel#127, id#128, sales#145, returns#146, profit#147] -======= -Input [5]: [sales#39, returns#40, profit#41, channel#42, id#43] -Arguments: [[sales#39, returns#40, profit#41, channel#42, id#43, 0], [sales#39, returns#40, profit#41, channel#42, null, 1], [sales#39, returns#40, profit#41, null, null, 3]], [sales#39, returns#40, profit#41, channel#132, id#133, spark_grouping_id#134] - -(71) HashAggregate [codegen id : 20] -Input [6]: [sales#39, returns#40, profit#41, channel#132, id#133, spark_grouping_id#134] -Keys [3]: [channel#132, id#133, spark_grouping_id#134] -Functions [3]: [partial_sum(sales#39), partial_sum(returns#40), partial_sum(profit#41)] -Aggregate Attributes [6]: [sum#135, isEmpty#136, sum#137, isEmpty#138, sum#139, isEmpty#140] -Results [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#75, sum#142, isEmpty#143, sum#144, isEmpty#145] - -(72) Exchange -Input [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#75, sum#142, isEmpty#143, sum#144, isEmpty#145] -Arguments: hashpartitioning(channel#132, id#133, spark_grouping_id#134, 5), ENSURE_REQUIREMENTS, [id=#146] - -(73) HashAggregate [codegen id : 21] -Input [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#75, sum#142, isEmpty#143, sum#144, isEmpty#145] -Keys [3]: [channel#132, id#133, spark_grouping_id#134] -Functions [3]: [sum(sales#39), sum(returns#40), sum(profit#41)] -Aggregate Attributes [3]: [sum(sales#39)#147, sum(returns#40)#148, sum(profit#41)#149] -Results [5]: [channel#132, id#133, sum(sales#39)#147 AS sales#150, sum(returns#40)#148 AS returns#151, sum(profit#41)#149 AS profit#152] - -(74) TakeOrderedAndProject -Input [5]: [channel#132, id#133, sales#150, returns#151, profit#152] -Arguments: 100, [channel#132 ASC NULLS FIRST, id#133 ASC NULLS FIRST], [channel#132, id#133, sales#150, returns#151, profit#152] ->>>>>>> 965221296e (update explain.txt) ===== Subqueries ===== @@ -643,18 +427,13 @@ BroadcastExchange (79) (75) Scan parquet default.date_dim -<<<<<<< HEAD Output [2]: [d_date_sk#22, d_date#148] -======= -Output [2]: [d_date_sk#22, d_date#153] ->>>>>>> 965221296e (update explain.txt) Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), IsNotNull(d_date_sk)] ReadSchema: struct (76) ColumnarToRow [codegen id : 1] -<<<<<<< HEAD Input [2]: [d_date_sk#22, d_date#148] (77) Filter [codegen id : 1] @@ -668,38 +447,15 @@ Input [2]: [d_date_sk#22, d_date#148] (79) BroadcastExchange Input [1]: [d_date_sk#22] Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] -======= -Input [2]: [d_date_sk#22, d_date#153] - -(77) Filter [codegen id : 1] -Input [2]: [d_date_sk#22, d_date#153] -Condition : (((isnotnull(d_date#153) AND (d_date#153 >= 2000-08-23)) AND (d_date#153 <= 2000-09-06)) AND isnotnull(d_date_sk#22)) - -(78) Project [codegen id : 1] -Output [1]: [d_date_sk#22] -Input [2]: [d_date_sk#22, d_date#153] - -(79) BroadcastExchange -Input [1]: [d_date_sk#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#154] ->>>>>>> 965221296e (update explain.txt) Subquery:2 Hosting operator id = 5 Hosting Expression = sr_returned_date_sk#15 IN dynamicpruning#5 Subquery:3 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#45 IN dynamicpruning#5 -<<<<<<< HEAD Subquery:4 Hosting operator id = 26 Hosting Expression = cr_returned_date_sk#55 IN dynamicpruning#5 Subquery:5 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#85 IN dynamicpruning#5 Subquery:6 Hosting operator id = 47 Hosting Expression = wr_returned_date_sk#96 IN dynamicpruning#5 -======= -Subquery:4 Hosting operator id = 26 Hosting Expression = cr_returned_date_sk#56 IN dynamicpruning#5 - -Subquery:5 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#88 IN dynamicpruning#5 - -Subquery:6 Hosting operator id = 47 Hosting Expression = wr_returned_date_sk#99 IN dynamicpruning#5 ->>>>>>> 965221296e (update explain.txt)