From e30ba2be3eb0a6be0a6abd9287492a1ac0ab6063 Mon Sep 17 00:00:00 2001 From: Wang Gengliang Date: Wed, 27 Sep 2017 12:44:10 +0200 Subject: [PATCH] [SPARK-22141][SQL] Propagate empty relation before checking Cartesian products When inferring constraints from children, Join's condition can be simplified as None. For example, ``` val testRelation = LocalRelation('a.int) val x = testRelation.as("x") val y = testRelation.where($"a" === 2 && !($"a" === 2)).as("y") x.join.where($"x.a" === $"y.a") ``` The plan will become ``` Join Inner :- LocalRelation , [a#23] +- LocalRelation , [a#224] ``` And the Cartesian products check will throw exception for above plan. Propagate empty relation before checking Cartesian products, and the issue is resolved. Unit test Author: Wang Gengliang Closes #19362 from gengliangwang/MoveCheckCartesianProducts. --- .../apache/spark/sql/catalyst/optimizer/Optimizer.scala | 4 ++-- .../src/test/scala/org/apache/spark/sql/JoinSuite.scala | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index f67daa55c04ff..71e03ee829710 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -113,8 +113,6 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf) SimplifyCreateArrayOps, SimplifyCreateMapOps) ++ extendedOperatorOptimizationRules: _*) :: - Batch("Check Cartesian Products", Once, - CheckCartesianProducts(conf)) :: Batch("Join Reorder", Once, CostBasedJoinReorder(conf)) :: Batch("Decimal Optimizations", fixedPoint, @@ -125,6 +123,8 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf) Batch("LocalRelation", fixedPoint, ConvertToLocalRelation, PropagateEmptyRelation) :: + Batch("Check Cartesian Products", Once, + CheckCartesianProducts(conf)) :: Batch("OptimizeCodegen", Once, OptimizeCodegen(conf)) :: Batch("RewriteSubquery", Once, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala index 95dc1478896f3..cdfd33dfb91a3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala @@ -200,6 +200,14 @@ class JoinSuite extends QueryTest with SharedSQLContext { Nil) } + test("SPARK-22141: Propagate empty relation before checking Cartesian products") { + Seq("inner", "left", "right", "left_outer", "right_outer", "full_outer").foreach { joinType => + val x = testData2.where($"a" === 2 && !($"a" === 2)).as("x") + val y = testData2.where($"a" === 1 && !($"a" === 1)).as("y") + checkAnswer(x.join(y, Seq.empty, joinType), Nil) + } + } + test("big inner join, 4 matches per row") { val bigData = testData.union(testData).union(testData).union(testData) val bigDataX = bigData.as("x")