Skip to content

Commit 9f30d92

Browse files
viiryagatorsmile
authored andcommitted
[SPARK-21654][SQL] Complement SQL predicates expression description
## What changes were proposed in this pull request? SQL predicates don't have complete expression description. This patch goes to complement the description by adding arguments, examples. This change also adds related test cases for the SQL predicate expressions. ## How was this patch tested? Existing tests. And added predicate test. Author: Liang-Chi Hsieh <[email protected]> Closes #18869 from viirya/SPARK-21654.
1 parent 07fd68a commit 9f30d92

File tree

4 files changed

+460
-13
lines changed

4 files changed

+460
-13
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala

Lines changed: 137 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,25 @@ case class Not(child: Expression)
133133
/**
134134
* Evaluates to `true` if `list` contains `value`.
135135
*/
136+
// scalastyle:off line.size.limit
136137
@ExpressionDescription(
137-
usage = "expr1 _FUNC_(expr2, expr3, ...) - Returns true if `expr` equals to any valN.")
138+
usage = "expr1 _FUNC_(expr2, expr3, ...) - Returns true if `expr` equals to any valN.",
139+
arguments = """
140+
Arguments:
141+
* expr1, expr2, expr3, ... - the arguments must be same type.
142+
""",
143+
examples = """
144+
Examples:
145+
> SELECT 1 _FUNC_(1, 2, 3);
146+
true
147+
> SELECT 1 _FUNC_(2, 3, 4);
148+
false
149+
> SELECT named_struct('a', 1, 'b', 2) _FUNC_(named_struct('a', 1, 'b', 1), named_struct('a', 1, 'b', 3));
150+
false
151+
> SELECT named_struct('a', 1, 'b', 2) _FUNC_(named_struct('a', 1, 'b', 2), named_struct('a', 1, 'b', 3));
152+
true
153+
""")
154+
// scalastyle:on line.size.limit
138155
case class In(value: Expression, list: Seq[Expression]) extends Predicate {
139156

140157
require(list != null, "list should not be null")
@@ -491,7 +508,24 @@ object Equality {
491508
// TODO: although map type is not orderable, technically map type should be able to be used
492509
// in equality comparison
493510
@ExpressionDescription(
494-
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` equals `expr2`, or false otherwise.")
511+
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` equals `expr2`, or false otherwise.",
512+
arguments = """
513+
Arguments:
514+
* expr1, expr2 - the two expressions must be same type or can be casted to a common type,
515+
and must be a type that can be used in equality comparison. Map type is not supported.
516+
For complex types such array/struct, the data types of fields must be orderable.
517+
""",
518+
examples = """
519+
Examples:
520+
> SELECT 2 _FUNC_ 2;
521+
true
522+
> SELECT 1 _FUNC_ '1';
523+
true
524+
> SELECT true _FUNC_ NULL;
525+
NULL
526+
> SELECT NULL _FUNC_ NULL;
527+
NULL
528+
""")
495529
case class EqualTo(left: Expression, right: Expression)
496530
extends BinaryComparison with NullIntolerant {
497531

@@ -510,6 +544,23 @@ case class EqualTo(left: Expression, right: Expression)
510544
usage = """
511545
expr1 _FUNC_ expr2 - Returns same result as the EQUAL(=) operator for non-null operands,
512546
but returns true if both are null, false if one of the them is null.
547+
""",
548+
arguments = """
549+
Arguments:
550+
* expr1, expr2 - the two expressions must be same type or can be casted to a common type,
551+
and must be a type that can be used in equality comparison. Map type is not supported.
552+
For complex types such array/struct, the data types of fields must be orderable.
553+
""",
554+
examples = """
555+
Examples:
556+
> SELECT 2 _FUNC_ 2;
557+
true
558+
> SELECT 1 _FUNC_ '1';
559+
true
560+
> SELECT true _FUNC_ NULL;
561+
false
562+
> SELECT NULL _FUNC_ NULL;
563+
true
513564
""")
514565
case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComparison {
515566

@@ -540,7 +591,27 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp
540591
}
541592

542593
@ExpressionDescription(
543-
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than `expr2`.")
594+
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than `expr2`.",
595+
arguments = """
596+
Arguments:
597+
* expr1, expr2 - the two expressions must be same type or can be casted to a common type,
598+
and must be a type that can be ordered. For example, map type is not orderable, so it
599+
is not supported. For complex types such array/struct, the data types of fields must
600+
be orderable.
601+
""",
602+
examples = """
603+
Examples:
604+
> SELECT 1 _FUNC_ 2;
605+
true
606+
> SELECT 1.1 _FUNC_ '1';
607+
false
608+
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52');
609+
false
610+
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52');
611+
true
612+
> SELECT 1 _FUNC_ NULL;
613+
NULL
614+
""")
544615
case class LessThan(left: Expression, right: Expression)
545616
extends BinaryComparison with NullIntolerant {
546617

@@ -550,7 +621,27 @@ case class LessThan(left: Expression, right: Expression)
550621
}
551622

552623
@ExpressionDescription(
553-
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than or equal to `expr2`.")
624+
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than or equal to `expr2`.",
625+
arguments = """
626+
Arguments:
627+
* expr1, expr2 - the two expressions must be same type or can be casted to a common type,
628+
and must be a type that can be ordered. For example, map type is not orderable, so it
629+
is not supported. For complex types such array/struct, the data types of fields must
630+
be orderable.
631+
""",
632+
examples = """
633+
Examples:
634+
> SELECT 2 _FUNC_ 2;
635+
true
636+
> SELECT 1.0 _FUNC_ '1';
637+
true
638+
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52');
639+
true
640+
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52');
641+
true
642+
> SELECT 1 _FUNC_ NULL;
643+
NULL
644+
""")
554645
case class LessThanOrEqual(left: Expression, right: Expression)
555646
extends BinaryComparison with NullIntolerant {
556647

@@ -560,7 +651,27 @@ case class LessThanOrEqual(left: Expression, right: Expression)
560651
}
561652

562653
@ExpressionDescription(
563-
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than `expr2`.")
654+
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than `expr2`.",
655+
arguments = """
656+
Arguments:
657+
* expr1, expr2 - the two expressions must be same type or can be casted to a common type,
658+
and must be a type that can be ordered. For example, map type is not orderable, so it
659+
is not supported. For complex types such array/struct, the data types of fields must
660+
be orderable.
661+
""",
662+
examples = """
663+
Examples:
664+
> SELECT 2 _FUNC_ 1;
665+
true
666+
> SELECT 2 _FUNC_ '1.1';
667+
true
668+
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52');
669+
false
670+
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52');
671+
false
672+
> SELECT 1 _FUNC_ NULL;
673+
NULL
674+
""")
564675
case class GreaterThan(left: Expression, right: Expression)
565676
extends BinaryComparison with NullIntolerant {
566677

@@ -570,7 +681,27 @@ case class GreaterThan(left: Expression, right: Expression)
570681
}
571682

572683
@ExpressionDescription(
573-
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than or equal to `expr2`.")
684+
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than or equal to `expr2`.",
685+
arguments = """
686+
Arguments:
687+
* expr1, expr2 - the two expressions must be same type or can be casted to a common type,
688+
and must be a type that can be ordered. For example, map type is not orderable, so it
689+
is not supported. For complex types such array/struct, the data types of fields must
690+
be orderable.
691+
""",
692+
examples = """
693+
Examples:
694+
> SELECT 2 _FUNC_ 1;
695+
true
696+
> SELECT 2.0 _FUNC_ '2.1';
697+
false
698+
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52');
699+
true
700+
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52');
701+
false
702+
> SELECT 1 _FUNC_ NULL;
703+
NULL
704+
""")
574705
case class GreaterThanOrEqual(left: Expression, right: Expression)
575706
extends BinaryComparison with NullIntolerant {
576707

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala

Lines changed: 69 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
123123
(null, false, null) ::
124124
(null, null, null) :: Nil)
125125

126-
test("IN") {
126+
test("basic IN predicate test") {
127127
checkEvaluation(In(NonFoldableLiteral.create(null, IntegerType), Seq(Literal(1),
128128
Literal(2))), null)
129129
checkEvaluation(In(NonFoldableLiteral.create(null, IntegerType),
@@ -151,19 +151,32 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
151151
checkEvaluation(In(Literal("^Ba*n"), Seq(Literal("aa"), Literal("^Ba*n"))), true)
152152
checkEvaluation(In(Literal("^Ba*n"), Seq(Literal("aa"), Literal("^n"))), false)
153153

154-
val primitiveTypes = Seq(IntegerType, FloatType, DoubleType, StringType, ByteType, ShortType,
155-
LongType, BinaryType, BooleanType, DecimalType.USER_DEFAULT, TimestampType)
156-
primitiveTypes.foreach { t =>
157-
val dataGen = RandomDataGenerator.forType(t, nullable = true).get
154+
}
155+
156+
test("IN with different types") {
157+
def testWithRandomDataGeneration(dataType: DataType, nullable: Boolean): Unit = {
158+
val maybeDataGen = RandomDataGenerator.forType(dataType, nullable = nullable)
159+
// Actually we won't pass in unsupported data types, this is a safety check.
160+
val dataGen = maybeDataGen.getOrElse(
161+
fail(s"Failed to create data generator for type $dataType"))
158162
val inputData = Seq.fill(10) {
159163
val value = dataGen.apply()
160-
value match {
164+
def cleanData(value: Any) = value match {
161165
case d: Double if d.isNaN => 0.0d
162166
case f: Float if f.isNaN => 0.0f
163167
case _ => value
164168
}
169+
value match {
170+
case s: Seq[_] => s.map(cleanData(_))
171+
case m: Map[_, _] =>
172+
val pair = m.unzip
173+
val newKeys = pair._1.map(cleanData(_))
174+
val newValues = pair._2.map(cleanData(_))
175+
newKeys.zip(newValues).toMap
176+
case _ => cleanData(value)
177+
}
165178
}
166-
val input = inputData.map(NonFoldableLiteral.create(_, t))
179+
val input = inputData.map(NonFoldableLiteral.create(_, dataType))
167180
val expected = if (inputData(0) == null) {
168181
null
169182
} else if (inputData.slice(1, 10).contains(inputData(0))) {
@@ -175,6 +188,55 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
175188
}
176189
checkEvaluation(In(input(0), input.slice(1, 10)), expected)
177190
}
191+
192+
val atomicTypes = DataTypeTestUtils.atomicTypes.filter { t =>
193+
RandomDataGenerator.forType(t).isDefined && !t.isInstanceOf[DecimalType]
194+
} ++ Seq(DecimalType.USER_DEFAULT)
195+
196+
val atomicArrayTypes = atomicTypes.map(ArrayType(_, containsNull = true))
197+
198+
// Basic types:
199+
for (
200+
dataType <- atomicTypes;
201+
nullable <- Seq(true, false)) {
202+
testWithRandomDataGeneration(dataType, nullable)
203+
}
204+
205+
// Array types:
206+
for (
207+
arrayType <- atomicArrayTypes;
208+
nullable <- Seq(true, false)
209+
if RandomDataGenerator.forType(arrayType.elementType, arrayType.containsNull).isDefined) {
210+
testWithRandomDataGeneration(arrayType, nullable)
211+
}
212+
213+
// Struct types:
214+
for (
215+
colOneType <- atomicTypes;
216+
colTwoType <- atomicTypes;
217+
nullable <- Seq(true, false)) {
218+
val structType = StructType(
219+
StructField("a", colOneType) :: StructField("b", colTwoType) :: Nil)
220+
testWithRandomDataGeneration(structType, nullable)
221+
}
222+
223+
// Map types: not supported
224+
for (
225+
keyType <- atomicTypes;
226+
valueType <- atomicTypes;
227+
nullable <- Seq(true, false)) {
228+
val mapType = MapType(keyType, valueType)
229+
val e = intercept[Exception] {
230+
testWithRandomDataGeneration(mapType, nullable)
231+
}
232+
if (e.getMessage.contains("Code generation of")) {
233+
// If the `value` expression is null, `eval` will be short-circuited.
234+
// Codegen version evaluation will be run then.
235+
assert(e.getMessage.contains("cannot generate equality code for un-comparable type"))
236+
} else {
237+
assert(e.getMessage.contains("Exception evaluating"))
238+
}
239+
}
178240
}
179241

180242
test("INSET") {
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
-- EqualTo
2+
select 1 = 1;
3+
select 1 = '1';
4+
select 1.0 = '1';
5+
6+
-- GreaterThan
7+
select 1 > '1';
8+
select 2 > '1.0';
9+
select 2 > '2.0';
10+
select 2 > '2.2';
11+
select to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52');
12+
select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52';
13+
14+
-- GreaterThanOrEqual
15+
select 1 >= '1';
16+
select 2 >= '1.0';
17+
select 2 >= '2.0';
18+
select 2.0 >= '2.2';
19+
select to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52');
20+
select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52';
21+
22+
-- LessThan
23+
select 1 < '1';
24+
select 2 < '1.0';
25+
select 2 < '2.0';
26+
select 2.0 < '2.2';
27+
select to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52');
28+
select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52';
29+
30+
-- LessThanOrEqual
31+
select 1 <= '1';
32+
select 2 <= '1.0';
33+
select 2 <= '2.0';
34+
select 2.0 <= '2.2';
35+
select to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52');
36+
select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52';

0 commit comments

Comments
 (0)