Skip to content

Commit cd91f96

Browse files
viiryacloud-fan
authored andcommitted
[SPARK-20175][SQL] Exists should not be evaluated in Join operator
## What changes were proposed in this pull request? Similar to `ListQuery`, `Exists` should not be evaluated in `Join` operator too. ## How was this patch tested? Jenkins tests. Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Liang-Chi Hsieh <[email protected]> Closes #17491 from viirya/dont-push-exists-to-join.
1 parent c870698 commit cd91f96

File tree

2 files changed

+12
-1
lines changed

2 files changed

+12
-1
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,12 @@ trait PredicateHelper {
9292
protected def canEvaluateWithinJoin(expr: Expression): Boolean = expr match {
9393
// Non-deterministic expressions are not allowed as join conditions.
9494
case e if !e.deterministic => false
95-
case l: ListQuery =>
95+
case _: ListQuery | _: Exists =>
9696
// A ListQuery defines the query which we want to search in an IN subquery expression.
9797
// Currently the only way to evaluate an IN subquery is to convert it to a
9898
// LeftSemi/LeftAnti/ExistenceJoin by `RewritePredicateSubquery` rule.
9999
// It cannot be evaluated as part of a Join operator.
100+
// An Exists shouldn't be push into a Join operator too.
100101
false
101102
case e: SubqueryExpression =>
102103
// non-correlated subquery will be replaced as literal

sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -844,4 +844,14 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
844844
Row(0) :: Row(1) :: Nil)
845845
}
846846
}
847+
848+
test("ListQuery and Exists should work even no correlated references") {
849+
checkAnswer(
850+
sql("select * from l, r where l.a = r.c AND (r.d in (select d from r) OR l.a >= 1)"),
851+
Row(2, 1.0, 2, 3.0) :: Row(2, 1.0, 2, 3.0) :: Row(2, 1.0, 2, 3.0) ::
852+
Row(2, 1.0, 2, 3.0) :: Row(3.0, 3.0, 3, 2.0) :: Row(6, null, 6, null) :: Nil)
853+
checkAnswer(
854+
sql("select * from l, r where l.a = r.c + 1 AND (exists (select * from r) OR l.a = r.c)"),
855+
Row(3, 3.0, 2, 3.0) :: Row(3, 3.0, 2, 3.0) :: Nil)
856+
}
847857
}

0 commit comments

Comments
 (0)