From b38a21ef6146784e4b93ef4ce8c899f1eee14572 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Mon, 16 Nov 2015 18:30:26 -0800 Subject: [PATCH 1/5] SPARK-11633 --- .../spark/sql/catalyst/analysis/Analyzer.scala | 3 ++- .../spark/sql/hive/execution/SQLQuerySuite.scala | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 2f4670b55bdba..5a5b71e52dd79 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -425,7 +425,8 @@ class Analyzer( */ j case Some((oldRelation, newRelation)) => - val attributeRewrites = AttributeMap(oldRelation.output.zip(newRelation.output)) + val attributeRewrites = + AttributeMap(oldRelation.output.zip(newRelation.output).filter(x => x._1 != x._2)) val newRight = right transformUp { case r if r == oldRelation => newRelation } transformUp { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 3427152b2da02..5e00546a74c00 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -51,6 +51,8 @@ case class Order( state: String, month: Int) +case class Individual(F1: Integer, F2: Integer) + case class WindowData( month: Int, area: String, @@ -1479,4 +1481,18 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { |FROM (SELECT '{"f1": "value1", "f2": 12}' json, 'hello' as str) test """.stripMargin), Row("value1", "12", 3.14, "hello")) } + + test ("SPARK-11633: HiveContext throws TreeNode Exception : Failed to Copy Node") { + val rdd1 = sparkContext.parallelize(Seq( Individual(1,3), Individual(2,1))) + val df = hiveContext.createDataFrame(rdd1) + df.registerTempTable("foo") + val df2 = sql("select f1, F2 as F2 from foo") + df2.registerTempTable("foo2") + df2.registerTempTable("foo3") + + checkAnswer(sql( + """ + SELECT a.F1 FROM foo2 a INNER JOIN foo3 b ON a.F2=b.F2 + """.stripMargin), Row(2) :: Row(1) :: Nil) + } } From 0546772f151f83d6d3cf4d000cbe341f52545007 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Fri, 20 Nov 2015 10:56:45 -0800 Subject: [PATCH 2/5] converge --- .../spark/sql/catalyst/analysis/Analyzer.scala | 3 +-- .../spark/sql/hive/execution/SQLQuerySuite.scala | 15 --------------- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 7c9512fbd00aa..47962ebe6ef82 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -417,8 +417,7 @@ class Analyzer( */ j case Some((oldRelation, newRelation)) => - val attributeRewrites = - AttributeMap(oldRelation.output.zip(newRelation.output).filter(x => x._1 != x._2)) + val attributeRewrites = AttributeMap(oldRelation.output.zip(newRelation.output)) val newRight = right transformUp { case r if r == oldRelation => newRelation } transformUp { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 5e00546a74c00..61d9dcd37572b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -51,8 +51,6 @@ case class Order( state: String, month: Int) -case class Individual(F1: Integer, F2: Integer) - case class WindowData( month: Int, area: String, @@ -1481,18 +1479,5 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { |FROM (SELECT '{"f1": "value1", "f2": 12}' json, 'hello' as str) test """.stripMargin), Row("value1", "12", 3.14, "hello")) } - - test ("SPARK-11633: HiveContext throws TreeNode Exception : Failed to Copy Node") { - val rdd1 = sparkContext.parallelize(Seq( Individual(1,3), Individual(2,1))) - val df = hiveContext.createDataFrame(rdd1) - df.registerTempTable("foo") - val df2 = sql("select f1, F2 as F2 from foo") - df2.registerTempTable("foo2") - df2.registerTempTable("foo3") - - checkAnswer(sql( - """ - SELECT a.F1 FROM foo2 a INNER JOIN foo3 b ON a.F2=b.F2 - """.stripMargin), Row(2) :: Row(1) :: Nil) } } From b37a64f13956b6ddd0e38ddfd9fe1caee611f1a8 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Fri, 20 Nov 2015 10:58:37 -0800 Subject: [PATCH 3/5] converge --- .../org/apache/spark/sql/hive/execution/SQLQuerySuite.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 61d9dcd37572b..3427152b2da02 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1479,5 +1479,4 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { |FROM (SELECT '{"f1": "value1", "f2": 12}' json, 'hello' as str) test """.stripMargin), Row("value1", "12", 3.14, "hello")) } - } } From d6a6e9cc31b0f7547b35cf25884135ea65b03676 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sun, 3 Jan 2016 18:40:26 -0800 Subject: [PATCH 4/5] outer join elimination by parent join. --- .../sql/catalyst/optimizer/Optimizer.scala | 104 +++++++++++- .../apache/spark/sql/DataFrameJoinSuite.scala | 155 ++++++++++++++++++ 2 files changed, 258 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 0b1c74293bb8b..dace0fc208586 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.analysis.{CleanupAliases, EliminateSubQueri import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate._ import org.apache.spark.sql.catalyst.planning.ExtractFiltersAndInnerJoins -import org.apache.spark.sql.catalyst.plans.{FullOuter, Inner, LeftOuter, LeftSemi, RightOuter} +import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.types._ @@ -44,6 +44,7 @@ abstract class Optimizer extends RuleExecutor[LogicalPlan] { // Operator push down SetOperationPushDown, SamplePushDown, + OuterJoinElimination, ReorderJoin, PushPredicateThroughJoin, PushPredicateThroughProject, @@ -768,6 +769,107 @@ object ReorderJoin extends Rule[LogicalPlan] with PredicateHelper { } } +/** + * Elimination of Outer Join by Parent Join Condition + * + * Given an outer join is involved in another join (called parent join), when the join type of the + * parent join is inner, left-semi, left-outer and right-outer, checking if the join condition of + * the parent join satisfies the following two conditions: + * + * 1) there exist null filtering predicates against the columns in the null-supplying side of + * parent join. + * 2) these columns are from the child join. + * + * If having such join predicates, execute the elimination rules: + * - full outer -> inner if both sides of the child join have such predicates + * - left outer -> inner if the right side of the child join has such predicates + * - right outer -> inner if the left side of the child join has such predicates + * - full outer -> left outer if only the left side of the child join has such predicates + * - full outer -> right outer if only the right side of the child join has such predicates + * + */ +object OuterJoinElimination extends Rule[LogicalPlan] with PredicateHelper { + + private def containsAttr(plan: LogicalPlan, attr: Attribute): Boolean = + plan.outputSet.exists(_.semanticEquals(attr)) + + private def hasNullFilteringPredicate(predicate: Expression, plan: LogicalPlan): Boolean = { + predicate match { + case EqualTo(ar: AttributeReference, _) if containsAttr(plan, ar) => true + case EqualTo(_, ar: AttributeReference) if containsAttr(plan, ar) => true + case EqualNullSafe(ar: AttributeReference, l) + if !l.nullable && containsAttr(plan, ar) => true + case EqualNullSafe(l, ar: AttributeReference) + if !l.nullable && containsAttr(plan, ar) => true + case GreaterThan(ar: AttributeReference, _) if containsAttr(plan, ar) => true + case GreaterThan(_, ar: AttributeReference) if containsAttr(plan, ar) => true + case GreaterThanOrEqual(ar: AttributeReference, _) if containsAttr(plan, ar) => true + case GreaterThanOrEqual(_, ar: AttributeReference) if containsAttr(plan, ar) => true + case LessThan(ar: AttributeReference, _) if containsAttr(plan, ar) => true + case LessThan(_, ar: AttributeReference) if containsAttr(plan, ar) => true + case LessThanOrEqual(ar: AttributeReference, _) if containsAttr(plan, ar) => true + case LessThanOrEqual(_, ar: AttributeReference) if containsAttr(plan, ar) => true + case In(ar: AttributeReference, _) if containsAttr(plan, ar) => true + case IsNotNull(ar: AttributeReference) if containsAttr(plan, ar) => true + case And(l, r) => hasNullFilteringPredicate(l, plan) || hasNullFilteringPredicate(r, plan) + case Or(l, r) => hasNullFilteringPredicate(l, plan) && hasNullFilteringPredicate(r, plan) + case Not(e) => !hasNullFilteringPredicate(e, plan) + case _ => false + } + } + + private def buildNewJoin( + otherCondition: Expression, + left: LogicalPlan, + right: LogicalPlan, + joinType: JoinType, + condition: Option[Expression]): Join = { + val leftHasNonNullPredicate = hasNullFilteringPredicate(otherCondition, left) + val rightHasNonNullPredicate = hasNullFilteringPredicate(otherCondition, right) + + joinType match { + case RightOuter if leftHasNonNullPredicate => + Join(left, right, Inner, condition) + case LeftOuter if rightHasNonNullPredicate => + Join(left, right, Inner, condition) + case FullOuter if leftHasNonNullPredicate && rightHasNonNullPredicate => + Join(left, right, Inner, condition) + case FullOuter if leftHasNonNullPredicate => + Join(left, right, LeftOuter, condition) + case FullOuter if rightHasNonNullPredicate => + Join(left, right, RightOuter, condition) + case _ => Join(left, right, joinType, condition) + } + } + + def apply(plan: LogicalPlan): LogicalPlan = plan transform { + + // Case 1: when parent join is Inner|LeftSemi|LeftOuter and the child join is on the right side + case pj @ Join( + pLeft, + j @ Join(left, right, RightOuter|LeftOuter|FullOuter, condition), + Inner|LeftSemi|LeftOuter, + Some(pJoinCond)) => + Join( + pLeft, + buildNewJoin(pJoinCond, left, right, j.joinType, condition), + pj.joinType, + Some(pJoinCond)) + + // Case 2: when parent join is Inner|LeftSemi|RightOuter and the child join is on the left side + case pj @ Join( + j @ Join(left, right, RightOuter|LeftOuter|FullOuter, condition), + pRight, + Inner|LeftSemi|RightOuter, + Some(pJoinCond)) => + Join( + buildNewJoin(pJoinCond, left, right, j.joinType, condition), + pRight, + pj.joinType, + Some(pJoinCond)) + } +} + /** * Pushes down [[Filter]] operators where the `condition` can be * evaluated using only the attributes of the left or right side of a join. Other diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala index 39a65413bd592..0a1d28ab0278a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql +import org.apache.spark.sql.catalyst.plans._ +import org.apache.spark.sql.catalyst.plans.logical.{Project, Join} import org.apache.spark.sql.execution.joins.BroadcastHashJoin import org.apache.spark.sql.functions._ import org.apache.spark.sql.test.SharedSQLContext @@ -140,4 +142,157 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext { assert(df1.join(broadcast(pf1)).count() === 4) } } + + test("join - left outer to inner by the parent join's join condition") { + val df = Seq((1, 2, "1"), (3, 4, "3")).toDF("int", "int2", "str").as("a") + val df2 = Seq((1, 3, "1"), (5, 6, "5")).toDF("int", "int2", "str").as("b") + val df3 = Seq((1, 3, "1"), (3, 6, "5")).toDF("int", "int2", "str").as("c") + + // Left -> Inner + val right = df.join(df2, $"a.int" === $"b.int", "left") + val left2Inner = + df3.join(right, $"c.int" === $"b.int", "inner").select($"a.*", $"b.*", $"c.*") + + left2Inner.explain(true) + + // The order before conversion: Left Then Inner + assert(left2Inner.queryExecution.analyzed.collect { + case j@Join(_, Join(_, _, LeftOuter, _), Inner, _) => j + }.size === 1) + + // The order after conversion: Inner Then Inner + assert(left2Inner.queryExecution.optimizedPlan.collect { + case j@Join(_, Join(_, _, Inner, _), Inner, _) => j + }.size === 1) + + checkAnswer( + left2Inner, + Row(1, 2, "1", 1, 3, "1", 1, 3, "1") :: Nil) + } + + test("join - right outer to inner by the parent join's join condition") { + val df = Seq((1, 2, "1"), (3, 4, "3")).toDF("int", "int2", "str").as("a") + val df2 = Seq((1, 3, "1"), (5, 6, "5")).toDF("int", "int2", "str").as("b") + val df3 = Seq((1, 9, "8"), (5, 0, "4")).toDF("int", "int2", "str").as("c") + + // Right Then Inner -> Inner Then Right + val right2Inner = df.join(df2, $"a.int" === $"b.int", "right") + .join(df3, $"a.int" === $"b.int", "inner").select($"a.*", $"b.*", $"c.*") + + // The order before conversion: Left Then Inner + assert(right2Inner.queryExecution.analyzed.collect { + case j@Join(Join(_, _, RightOuter, _), _, Inner, _) => j + }.size === 1) + + // The order after conversion: Inner Then Inner + assert(right2Inner.queryExecution.optimizedPlan.collect { + case j@Join(Join(_, _, Inner, _), _, Inner, _) => j + }.size === 1) + + checkAnswer( + right2Inner, + Row(1, 2, "1", 1, 3, "1", 1, 9, "8") :: + Row(1, 2, "1", 1, 3, "1", 5, 0, "4") :: Nil) + } + + test("join - full outer to inner by the parent join's join condition") { + val df = Seq((1, 2, "1"), (3, 4, "3")).toDF("int", "int2", "str").as("a") + val df2 = Seq((1, 2, "1"), (5, 6, "5")).toDF("int", "int2", "str").as("b") + val df3 = Seq((1, 3, "1"), (3, 6, "5")).toDF("int", "int2", "str").as("c") + + // Full -> Inner + val right = df.join(df2, $"a.int" === $"b.int", "full") + val full2Inner = df3.join(right, $"c.int" === $"a.int" && $"b.int" === 1, "inner") + .select($"a.*", $"b.*", $"c.*") + + // The order before conversion: Left Then Inner + assert(full2Inner.queryExecution.analyzed.collect { + case j@Join(_, Join(_, _, FullOuter, _), Inner, _) => j + }.size === 1) + + // The order after conversion: Inner Then Inner + assert(full2Inner.queryExecution.optimizedPlan.collect { + case j@Join(_, Join(_, _, Inner, _), Inner, _) => j + }.size === 1) + + checkAnswer( + full2Inner, + Row(1, 2, "1", 1, 2, "1", 1, 3, "1") :: Nil) + } + + test("join - full outer to right by the parent join's join condition") { + val df = Seq((1, 2, "1"), (3, 4, "3")).toDF("int", "int2", "str").as("a") + val df2 = Seq((1, 2, "1"), (5, 6, "5")).toDF("int", "int2", "str").as("b") + val df3 = Seq((1, 3, "1"), (3, 6, "5")).toDF("int", "int2", "str").as("c") + + // Full -> Right + val right = df.join(df2, $"a.int" === $"b.int", "full") + val full2Right = df3.join(right, $"b.int" === 1, "leftsemi") + + // The order before conversion: Left Then Inner + assert(full2Right.queryExecution.analyzed.collect { + case j@Join(_, Join(_, _, FullOuter, _), LeftSemi, _) => j + }.size === 1) + + // The order after conversion: Inner Then Inner + assert(full2Right.queryExecution.optimizedPlan.collect { + case j@Join(_, Project(_, Join(_, _, RightOuter, _)), LeftSemi, _) => j + }.size === 1) + + checkAnswer( + full2Right, + Row(1, 3, "1") :: Row(3, 6, "5") :: Nil) + } + + + test("join - full outer to left by the parent join's join condition #1") { + val df = Seq((1, 2, "1"), (3, 4, "3")).toDF("int", "int2", "str").as("a") + val df2 = Seq((1, 2, "1"), (5, 6, "5")).toDF("int", "int2", "str").as("b") + val df3 = Seq((1, 3, "1"), (4, 6, "5")).toDF("int", "int2", "str").as("c") + + // Full -> Left + val right = df.join(df2, $"a.int" === $"b.int", "full") + val full2Left = df3.join(right, $"c.int" === $"a.int", "left") + .select($"a.*", $"b.*", $"c.*") + + // The order before conversion: Full Then Left + assert(full2Left.queryExecution.analyzed.collect { + case j@Join(_, Join(_, _, FullOuter, _), LeftOuter, _) => j + }.size === 1) + + // The order after conversion: Left Then Left + assert(full2Left.queryExecution.optimizedPlan.collect { + case j@Join(_, Join(_, _, LeftOuter, _), LeftOuter, _) => j + }.size === 1) + + checkAnswer( + full2Left, + Row(1, 2, "1", 1, 2, "1", 1, 3, "1") :: + Row(null, null, null, null, null, null, 4, 6, "5") :: Nil) + } + + test("join - full outer to left by the parent join's join condition #2") { + val df = Seq((1, 2, "1"), (3, 4, "3")).toDF("int", "int2", "str").as("a") + val df2 = Seq((1, 2, "1"), (5, 6, "5")).toDF("int", "int2", "str").as("b") + val df3 = Seq((1, 3, "1"), (4, 6, "5")).toDF("int", "int2", "str").as("c") + + // Full -> Left + val full2Left = df.join(df2, $"a.int" === $"b.int", "full") + .join(df3, $"c.int" === $"a.int", "right").select($"a.*", $"b.*", $"c.*") + + // The order before conversion: Full Then Right + assert(full2Left.queryExecution.analyzed.collect { + case j@Join(Join(_, _, FullOuter, _), _, RightOuter, _) => j + }.size === 1) + + // The order after conversion: Left Then Right + assert(full2Left.queryExecution.optimizedPlan.collect { + case j@Join(Join(_, _, LeftOuter, _), _, RightOuter, _) => j + }.size === 1) + + checkAnswer( + full2Left, + Row(1, 2, "1", 1, 2, "1", 1, 3, "1") :: + Row(null, null, null, null, null, null, 4, 6, "5") :: Nil) + } } From 1a9ebdff1da8b0661738db1c7cf466344261ed33 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Wed, 24 Feb 2016 23:36:57 -0800 Subject: [PATCH 5/5] integrate it into the existing outer-join elimination. --- .../sql/catalyst/optimizer/Optimizer.scala | 24 ++++++++++++++++--- .../apache/spark/sql/DataFrameJoinSuite.scala | 2 +- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index ec4bf08544dd8..6165663cd0436 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -945,8 +945,10 @@ object ReorderJoin extends Rule[LogicalPlan] with PredicateHelper { } /** - * Elimination of outer joins, if the predicates can restrict the result sets so that - * all null-supplying rows are eliminated + * Elimination of Outer Joins + * + * Rule Set 1: checking the Filter Condition if the predicates can restrict the result sets + * so that all null-supplying rows are eliminated * * - full outer -> inner if both sides have such predicates * - left outer -> inner if the right side has such predicates @@ -954,7 +956,21 @@ object ReorderJoin extends Rule[LogicalPlan] with PredicateHelper { * - full outer -> left outer if only the left side has such predicates * - full outer -> right outer if only the right side has such predicates * - * This rule should be executed before pushing down the Filter + * This rule set should be executed before pushing down the Filter + * + * Rule Set 2: given an outer join is involved in another join (called parent join), when the join + * type of the parent join is inner, left-semi, left-outer and right-outer, checking if the join + * condition of the parent join satisfies the following two conditions: + * 1) there exist null filtering predicates against the columns in the null-supplying side of + * parent join. + * 2) these columns are from the child join. + * + * If having such join predicates, execute the elimination rules: + * - full outer -> inner if both sides of the child join have such predicates + * - left outer -> inner if the right side of the child join has such predicates + * - right outer -> inner if the left side of the child join has such predicates + * - full outer -> left outer if only the left side of the child join has such predicates + * - full outer -> right outer if only the right side of the child join has such predicates */ object OuterJoinElimination extends Rule[LogicalPlan] with PredicateHelper { @@ -997,10 +1013,12 @@ object OuterJoinElimination extends Rule[LogicalPlan] with PredicateHelper { } def apply(plan: LogicalPlan): LogicalPlan = plan transform { + // Rule Set 1: elimination using Filter conditions/constraints case f @ Filter(condition, j @ Join(_, _, RightOuter | LeftOuter | FullOuter, _)) => val newJoinType = buildNewJoinType(f.condition, f.constraints, j) if (j.joinType == newJoinType) f else Filter(condition, j.copy(joinType = newJoinType)) + // Rule Set 2: elimination using Parent Join conditions/constraints // Case 1: when parent join is Inner|LeftSemi|LeftOuter and the child join is on the right side case pj @ Join( _, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala index 77cf015e9cdaf..514d53fb5e020 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql import org.apache.spark.sql.catalyst.plans._ -import org.apache.spark.sql.catalyst.plans.logical.{Project, Join} +import org.apache.spark.sql.catalyst.plans.logical.{Join, Project} import org.apache.spark.sql.execution.joins.BroadcastHashJoin import org.apache.spark.sql.functions._ import org.apache.spark.sql.test.SharedSQLContext