Skip to content

Commit c2b50d6

Browse files
JoshRosenmarmbrus
authored andcommitted
[SPARK-9292] Analysis should check that join conditions' data types are BooleanType
This patch adds an analysis check to ensure that join conditions' data types are BooleanType. This check is necessary in order to report proper errors for non-boolean DataFrame join conditions. Author: Josh Rosen <[email protected]> Closes #7630 from JoshRosen/SPARK-9292 and squashes the following commits: aec6c7b [Josh Rosen] Check condition type in resolved() 75a3ea6 [Josh Rosen] Fix SPARK-9292.
1 parent c8d71a4 commit c2b50d6

File tree

3 files changed

+14
-1
lines changed

3 files changed

+14
-1
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,11 @@ trait CheckAnalysis {
8383
s"filter expression '${f.condition.prettyString}' " +
8484
s"of type ${f.condition.dataType.simpleString} is not a boolean.")
8585

86+
case j @ Join(_, _, _, Some(condition)) if condition.dataType != BooleanType =>
87+
failAnalysis(
88+
s"join condition '${condition.prettyString}' " +
89+
s"of type ${condition.dataType.simpleString} is not a boolean.")
90+
8691
case Aggregate(groupingExprs, aggregateExprs, child) =>
8792
def checkValidAggregateExpression(expr: Expression): Unit = expr match {
8893
case _: AggregateExpression => // OK

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,10 @@ case class Join(
128128

129129
// Joins are only resolved if they don't introduce ambiguous expression ids.
130130
override lazy val resolved: Boolean = {
131-
childrenResolved && expressions.forall(_.resolved) && selfJoinResolved
131+
childrenResolved &&
132+
expressions.forall(_.resolved) &&
133+
selfJoinResolved &&
134+
condition.forall(_.dataType == BooleanType)
132135
}
133136
}
134137

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,11 @@ class AnalysisErrorSuite extends SparkFunSuite with BeforeAndAfter {
118118
testRelation.where(Literal(1)),
119119
"filter" :: "'1'" :: "not a boolean" :: Literal(1).dataType.simpleString :: Nil)
120120

121+
errorTest(
122+
"non-boolean join conditions",
123+
testRelation.join(testRelation, condition = Some(Literal(1))),
124+
"condition" :: "'1'" :: "not a boolean" :: Literal(1).dataType.simpleString :: Nil)
125+
121126
errorTest(
122127
"missing group by",
123128
testRelation2.groupBy('a)('b),

0 commit comments

Comments
 (0)