Skip to content

Commit 184aa1c

Browse files
chenghao-intelmarmbrus
authored andcommitted
[SPARK-2665] [SQL] Add EqualNS & Unit Tests
Hive Supports the operator "<=>", which returns same result with EQUAL(=) operator for non-null operands, but returns TRUE if both are NULL, FALSE if one of the them is NULL. Author: Cheng Hao <[email protected]> Closes apache#1570 from chenghao-intel/equalns and squashes the following commits: 8d6c789 [Cheng Hao] Remove the test case orc_predicate_pushdown 5b2ca88 [Cheng Hao] Add cases into whitelist 8e66cdd [Cheng Hao] Rename the EqualNSTo ==> EqualNullSafe 7af4b0b [Cheng Hao] Add EqualNS & Unit Tests
1 parent eb82abd commit 184aa1c

File tree

58 files changed

+683
-3
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+683
-3
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,8 @@ trait HiveTypeCoercion {
246246

247247
// No need to change other EqualTo operators as that actually makes sense for boolean types.
248248
case e: EqualTo => e
249+
// No need to change the EqualNullSafe operators, too
250+
case e: EqualNullSafe => e
249251
// Otherwise turn them to Byte types so that there exists and ordering.
250252
case p: BinaryComparison
251253
if p.left.dataType == BooleanType && p.right.dataType == BooleanType =>

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ package object dsl {
7777
def > (other: Expression) = GreaterThan(expr, other)
7878
def >= (other: Expression) = GreaterThanOrEqual(expr, other)
7979
def === (other: Expression) = EqualTo(expr, other)
80+
def <=> (other: Expression) = EqualNullSafe(expr, other)
8081
def !== (other: Expression) = Not(EqualTo(expr, other))
8182

8283
def in(list: Expression*) = In(expr, list)

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,22 @@ case class EqualTo(left: Expression, right: Expression) extends BinaryComparison
153153
}
154154
}
155155

156+
case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComparison {
157+
def symbol = "<=>"
158+
override def nullable = false
159+
override def eval(input: Row): Any = {
160+
val l = left.eval(input)
161+
val r = right.eval(input)
162+
if (l == null && r == null) {
163+
true
164+
} else if (l == null || r == null) {
165+
false
166+
} else {
167+
l == r
168+
}
169+
}
170+
}
171+
156172
case class LessThan(left: Expression, right: Expression) extends BinaryComparison {
157173
def symbol = "<"
158174
override def eval(input: Row): Any = c2(input, left, right, _.lt(_, _))

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,8 @@ object NullPropagation extends Rule[LogicalPlan] {
153153
case e @ GetItem(Literal(null, _), _) => Literal(null, e.dataType)
154154
case e @ GetItem(_, Literal(null, _)) => Literal(null, e.dataType)
155155
case e @ GetField(Literal(null, _), _) => Literal(null, e.dataType)
156+
case e @ EqualNullSafe(Literal(null, _), r) => IsNull(r)
157+
case e @ EqualNullSafe(l, Literal(null, _)) => IsNull(l)
156158

157159
// For Coalesce, remove null literals.
158160
case e @ Coalesce(children) =>

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -451,11 +451,13 @@ class ExpressionEvaluationSuite extends FunSuite {
451451
}
452452

453453
test("BinaryComparison") {
454-
val row = new GenericRow(Array[Any](1, 2, 3, null))
454+
val row = new GenericRow(Array[Any](1, 2, 3, null, 3, null))
455455
val c1 = 'a.int.at(0)
456456
val c2 = 'a.int.at(1)
457457
val c3 = 'a.int.at(2)
458458
val c4 = 'a.int.at(3)
459+
val c5 = 'a.int.at(4)
460+
val c6 = 'a.int.at(5)
459461

460462
checkEvaluation(LessThan(c1, c4), null, row)
461463
checkEvaluation(LessThan(c1, c2), true, row)
@@ -469,6 +471,12 @@ class ExpressionEvaluationSuite extends FunSuite {
469471
checkEvaluation(c1 >= c2, false, row)
470472
checkEvaluation(c1 === c2, false, row)
471473
checkEvaluation(c1 !== c2, true, row)
474+
checkEvaluation(c4 <=> c1, false, row)
475+
checkEvaluation(c1 <=> c4, false, row)
476+
checkEvaluation(c4 <=> c6, true, row)
477+
checkEvaluation(c3 <=> c5, true, row)
478+
checkEvaluation(Literal(true) <=> Literal(null, BooleanType), false, row)
479+
checkEvaluation(Literal(null, BooleanType) <=> Literal(true), false, row)
472480
}
473481

474482
test("StringComparison") {

sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,10 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
196196

197197
// Hive returns the results of describe as plain text. Comments with multiple lines
198198
// introduce extra lines in the Hive results, which make the result comparison fail.
199-
"describe_comment_indent"
199+
"describe_comment_indent",
200+
201+
// Limit clause without a ordering, which causes failure.
202+
"orc_predicate_pushdown"
200203
)
201204

202205
/**
@@ -503,6 +506,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
503506
"join_hive_626",
504507
"join_map_ppr",
505508
"join_nulls",
509+
"join_nullsafe",
506510
"join_rc",
507511
"join_reorder2",
508512
"join_reorder3",
@@ -734,6 +738,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
734738
"udf_double",
735739
"udf_E",
736740
"udf_elt",
741+
"udf_equal",
737742
"udf_exp",
738743
"udf_field",
739744
"udf_find_in_set",

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
253253

254254
protected val primitiveTypes =
255255
Seq(StringType, IntegerType, LongType, DoubleType, FloatType, BooleanType, ByteType,
256-
ShortType, DecimalType, TimestampType)
256+
ShortType, DecimalType, TimestampType, BinaryType)
257257

258258
protected def toHiveString(a: (Any, DataType)): String = a match {
259259
case (struct: Row, StructType(fields)) =>
@@ -269,6 +269,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
269269
}.toSeq.sorted.mkString("{", ",", "}")
270270
case (null, _) => "NULL"
271271
case (t: Timestamp, TimestampType) => new TimestampWritable(t).toString
272+
case (bin: Array[Byte], BinaryType) => new String(bin, "UTF-8")
272273
case (other, tpe) if primitiveTypes contains tpe => other.toString
273274
}
274275

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -931,6 +931,7 @@ private[hive] object HiveQl {
931931
/* Comparisons */
932932
case Token("=", left :: right:: Nil) => EqualTo(nodeToExpr(left), nodeToExpr(right))
933933
case Token("==", left :: right:: Nil) => EqualTo(nodeToExpr(left), nodeToExpr(right))
934+
case Token("<=>", left :: right:: Nil) => EqualNullSafe(nodeToExpr(left), nodeToExpr(right))
934935
case Token("!=", left :: right:: Nil) => Not(EqualTo(nodeToExpr(left), nodeToExpr(right)))
935936
case Token("<>", left :: right:: Nil) => Not(EqualTo(nodeToExpr(left), nodeToExpr(right)))
936937
case Token(">", left :: right:: Nil) => GreaterThan(nodeToExpr(left), nodeToExpr(right))
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
0

sql/hive/src/test/resources/golden/join_nullsafe-1-5644ab44e5ba9f2941216b8d5dc33a99

Whitespace-only changes.

0 commit comments

Comments
 (0)