From 8a22e1d1705f0aa546b04dca0f5ffc60394b0f24 Mon Sep 17 00:00:00 2001 From: Wang Gengliang Date: Tue, 16 Jan 2018 22:24:59 +0800 Subject: [PATCH 1/2] Fix bug of Constraint --- .../catalyst/plans/logical/QueryPlanConstraints.scala | 11 ++++++++--- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 11 +++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala index 9c0a30a47f839..5895c85cd5fb7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala @@ -96,10 +96,15 @@ trait QueryPlanConstraints { self: LogicalPlan => // Collect aliases from expressions of the whole tree rooted by the current QueryPlan node, so // we may avoid producing recursive constraints. - private lazy val aliasMap: AttributeMap[Expression] = AttributeMap( - expressions.collect { + private lazy val aliasMap: AttributeMap[Expression] = { + val aliases = expressions.collect { case a: Alias if !a.child.isInstanceOf[Literal] => (a.toAttribute, a.child) - } ++ children.flatMap(_.asInstanceOf[QueryPlanConstraints].aliasMap)) + } ++ children.flatMap(_.asInstanceOf[QueryPlanConstraints].aliasMap) + AttributeMap(aliases.filter { + case (_, child) => child.references.nonEmpty && child.references.subsetOf(outputSet) + }) + } + // Note: the explicit cast is necessary, since Scala compiler fails to infer the type. /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 96bf65fce9c4a..8d4f9b2f89781 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2717,6 +2717,17 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } + test("SPARK-23079: constraints should be inferred correctly with aliases") { + withTable("t") { + spark.range(5).write.saveAsTable("t") + val t = spark.read.table("t") + val left = t.withColumn("xid", $"id" + lit(1)).as("x") + val right = t.withColumnRenamed("id", "xid").as("y") + val df = left.join(right, "xid").filter("id = 3").toDF() + checkAnswer(df, Row(4, 3)) + } + } + test("SRARK-22266: the same aggregate function was calculated multiple times") { val query = "SELECT a, max(b+1), max(b+1) + 1 FROM testData2 GROUP BY a" val df = sql(query) From aaad66aef2c1b8349660211a8589e906795ff0e8 Mon Sep 17 00:00:00 2001 From: Wang Gengliang Date: Tue, 16 Jan 2018 23:12:44 +0800 Subject: [PATCH 2/2] improve --- .../plans/logical/QueryPlanConstraints.scala | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala index 5895c85cd5fb7..2fadeb8f8a11a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala @@ -97,12 +97,15 @@ trait QueryPlanConstraints { self: LogicalPlan => // Collect aliases from expressions of the whole tree rooted by the current QueryPlan node, so // we may avoid producing recursive constraints. private lazy val aliasMap: AttributeMap[Expression] = { - val aliases = expressions.collect { + val childrenAliases = children.flatMap { child => + val childOutputSet = child.outputSet + child.asInstanceOf[QueryPlanConstraints].aliasMap.filter { + case (_, c) => c.references.nonEmpty && c.references.subsetOf(childOutputSet) + } + } + AttributeMap(expressions.collect { case a: Alias if !a.child.isInstanceOf[Literal] => (a.toAttribute, a.child) - } ++ children.flatMap(_.asInstanceOf[QueryPlanConstraints].aliasMap) - AttributeMap(aliases.filter { - case (_, child) => child.references.nonEmpty && child.references.subsetOf(outputSet) - }) + } ++ childrenAliases) } // Note: the explicit cast is necessary, since Scala compiler fails to infer the type.