Skip to content

Commit a4bf13a

Browse files
committed
[SPARK-16584][SQL] Move regexp unit tests to RegexpExpressionsSuite
## What changes were proposed in this pull request? This patch moves regexp related unit tests from StringExpressionsSuite to RegexpExpressionsSuite to match the file name for regexp expressions. ## How was this patch tested? This is a test only change. Author: Reynold Xin <[email protected]> Closes #14230 from rxin/SPARK-16584. (cherry picked from commit 7b84758) Signed-off-by: Reynold Xin <[email protected]>
1 parent c527e9e commit a4bf13a

File tree

2 files changed

+194
-164
lines changed

2 files changed

+194
-164
lines changed
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.catalyst.expressions
19+
20+
import org.apache.spark.SparkFunSuite
21+
import org.apache.spark.sql.catalyst.dsl.expressions._
22+
import org.apache.spark.sql.types.StringType
23+
24+
/**
25+
* Unit tests for regular expression (regexp) related SQL expressions.
26+
*/
27+
class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
28+
29+
test("LIKE literal Regular Expression") {
30+
checkEvaluation(Literal.create(null, StringType).like("a"), null)
31+
checkEvaluation(Literal.create("a", StringType).like(Literal.create(null, StringType)), null)
32+
checkEvaluation(Literal.create(null, StringType).like(Literal.create(null, StringType)), null)
33+
checkEvaluation(
34+
Literal.create("a", StringType).like(NonFoldableLiteral.create("a", StringType)), true)
35+
checkEvaluation(
36+
Literal.create("a", StringType).like(NonFoldableLiteral.create(null, StringType)), null)
37+
checkEvaluation(
38+
Literal.create(null, StringType).like(NonFoldableLiteral.create("a", StringType)), null)
39+
checkEvaluation(
40+
Literal.create(null, StringType).like(NonFoldableLiteral.create(null, StringType)), null)
41+
42+
checkEvaluation("abdef" like "abdef", true)
43+
checkEvaluation("a_%b" like "a\\__b", true)
44+
checkEvaluation("addb" like "a_%b", true)
45+
checkEvaluation("addb" like "a\\__b", false)
46+
checkEvaluation("addb" like "a%\\%b", false)
47+
checkEvaluation("a_%b" like "a%\\%b", true)
48+
checkEvaluation("addb" like "a%", true)
49+
checkEvaluation("addb" like "**", false)
50+
checkEvaluation("abc" like "a%", true)
51+
checkEvaluation("abc" like "b%", false)
52+
checkEvaluation("abc" like "bc%", false)
53+
checkEvaluation("a\nb" like "a_b", true)
54+
checkEvaluation("ab" like "a%b", true)
55+
checkEvaluation("a\nb" like "a%b", true)
56+
}
57+
58+
test("LIKE Non-literal Regular Expression") {
59+
val regEx = 'a.string.at(0)
60+
checkEvaluation("abcd" like regEx, null, create_row(null))
61+
checkEvaluation("abdef" like regEx, true, create_row("abdef"))
62+
checkEvaluation("a_%b" like regEx, true, create_row("a\\__b"))
63+
checkEvaluation("addb" like regEx, true, create_row("a_%b"))
64+
checkEvaluation("addb" like regEx, false, create_row("a\\__b"))
65+
checkEvaluation("addb" like regEx, false, create_row("a%\\%b"))
66+
checkEvaluation("a_%b" like regEx, true, create_row("a%\\%b"))
67+
checkEvaluation("addb" like regEx, true, create_row("a%"))
68+
checkEvaluation("addb" like regEx, false, create_row("**"))
69+
checkEvaluation("abc" like regEx, true, create_row("a%"))
70+
checkEvaluation("abc" like regEx, false, create_row("b%"))
71+
checkEvaluation("abc" like regEx, false, create_row("bc%"))
72+
checkEvaluation("a\nb" like regEx, true, create_row("a_b"))
73+
checkEvaluation("ab" like regEx, true, create_row("a%b"))
74+
checkEvaluation("a\nb" like regEx, true, create_row("a%b"))
75+
76+
checkEvaluation(Literal.create(null, StringType) like regEx, null, create_row("bc%"))
77+
}
78+
79+
test("RLIKE literal Regular Expression") {
80+
checkEvaluation(Literal.create(null, StringType) rlike "abdef", null)
81+
checkEvaluation("abdef" rlike Literal.create(null, StringType), null)
82+
checkEvaluation(Literal.create(null, StringType) rlike Literal.create(null, StringType), null)
83+
checkEvaluation("abdef" rlike NonFoldableLiteral.create("abdef", StringType), true)
84+
checkEvaluation("abdef" rlike NonFoldableLiteral.create(null, StringType), null)
85+
checkEvaluation(
86+
Literal.create(null, StringType) rlike NonFoldableLiteral.create("abdef", StringType), null)
87+
checkEvaluation(
88+
Literal.create(null, StringType) rlike NonFoldableLiteral.create(null, StringType), null)
89+
90+
checkEvaluation("abdef" rlike "abdef", true)
91+
checkEvaluation("abbbbc" rlike "a.*c", true)
92+
93+
checkEvaluation("fofo" rlike "^fo", true)
94+
checkEvaluation("fo\no" rlike "^fo\no$", true)
95+
checkEvaluation("Bn" rlike "^Ba*n", true)
96+
checkEvaluation("afofo" rlike "fo", true)
97+
checkEvaluation("afofo" rlike "^fo", false)
98+
checkEvaluation("Baan" rlike "^Ba?n", false)
99+
checkEvaluation("axe" rlike "pi|apa", false)
100+
checkEvaluation("pip" rlike "^(pi)*$", false)
101+
102+
checkEvaluation("abc" rlike "^ab", true)
103+
checkEvaluation("abc" rlike "^bc", false)
104+
checkEvaluation("abc" rlike "^ab", true)
105+
checkEvaluation("abc" rlike "^bc", false)
106+
107+
intercept[java.util.regex.PatternSyntaxException] {
108+
evaluate("abbbbc" rlike "**")
109+
}
110+
}
111+
112+
test("RLIKE Non-literal Regular Expression") {
113+
val regEx = 'a.string.at(0)
114+
checkEvaluation("abdef" rlike regEx, true, create_row("abdef"))
115+
checkEvaluation("abbbbc" rlike regEx, true, create_row("a.*c"))
116+
checkEvaluation("fofo" rlike regEx, true, create_row("^fo"))
117+
checkEvaluation("fo\no" rlike regEx, true, create_row("^fo\no$"))
118+
checkEvaluation("Bn" rlike regEx, true, create_row("^Ba*n"))
119+
120+
intercept[java.util.regex.PatternSyntaxException] {
121+
evaluate("abbbbc" rlike regEx, create_row("**"))
122+
}
123+
}
124+
125+
126+
test("RegexReplace") {
127+
val row1 = create_row("100-200", "(\\d+)", "num")
128+
val row2 = create_row("100-200", "(\\d+)", "###")
129+
val row3 = create_row("100-200", "(-)", "###")
130+
val row4 = create_row(null, "(\\d+)", "###")
131+
val row5 = create_row("100-200", null, "###")
132+
val row6 = create_row("100-200", "(-)", null)
133+
134+
val s = 's.string.at(0)
135+
val p = 'p.string.at(1)
136+
val r = 'r.string.at(2)
137+
138+
val expr = RegExpReplace(s, p, r)
139+
checkEvaluation(expr, "num-num", row1)
140+
checkEvaluation(expr, "###-###", row2)
141+
checkEvaluation(expr, "100###200", row3)
142+
checkEvaluation(expr, null, row4)
143+
checkEvaluation(expr, null, row5)
144+
checkEvaluation(expr, null, row6)
145+
146+
val nonNullExpr = RegExpReplace(Literal("100-200"), Literal("(\\d+)"), Literal("num"))
147+
checkEvaluation(nonNullExpr, "num-num", row1)
148+
}
149+
150+
test("RegexExtract") {
151+
val row1 = create_row("100-200", "(\\d+)-(\\d+)", 1)
152+
val row2 = create_row("100-200", "(\\d+)-(\\d+)", 2)
153+
val row3 = create_row("100-200", "(\\d+).*", 1)
154+
val row4 = create_row("100-200", "([a-z])", 1)
155+
val row5 = create_row(null, "([a-z])", 1)
156+
val row6 = create_row("100-200", null, 1)
157+
val row7 = create_row("100-200", "([a-z])", null)
158+
159+
val s = 's.string.at(0)
160+
val p = 'p.string.at(1)
161+
val r = 'r.int.at(2)
162+
163+
val expr = RegExpExtract(s, p, r)
164+
checkEvaluation(expr, "100", row1)
165+
checkEvaluation(expr, "200", row2)
166+
checkEvaluation(expr, "100", row3)
167+
checkEvaluation(expr, "", row4) // will not match anything, empty string get
168+
checkEvaluation(expr, null, row5)
169+
checkEvaluation(expr, null, row6)
170+
checkEvaluation(expr, null, row7)
171+
172+
val expr1 = new RegExpExtract(s, p)
173+
checkEvaluation(expr1, "100", row1)
174+
175+
val nonNullExpr = RegExpExtract(Literal("100-200"), Literal("(\\d+)-(\\d+)"), Literal(1))
176+
checkEvaluation(nonNullExpr, "100", row1)
177+
}
178+
179+
test("SPLIT") {
180+
val s1 = 'a.string.at(0)
181+
val s2 = 'b.string.at(1)
182+
val row1 = create_row("aa2bb3cc", "[1-9]+")
183+
val row2 = create_row(null, "[1-9]+")
184+
val row3 = create_row("aa2bb3cc", null)
185+
186+
checkEvaluation(
187+
StringSplit(Literal("aa2bb3cc"), Literal("[1-9]+")), Seq("aa", "bb", "cc"), row1)
188+
checkEvaluation(
189+
StringSplit(s1, s2), Seq("aa", "bb", "cc"), row1)
190+
checkEvaluation(StringSplit(s1, s2), null, row2)
191+
checkEvaluation(StringSplit(s1, s2), null, row3)
192+
}
193+
194+
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala

Lines changed: 0 additions & 164 deletions
Original file line numberDiff line numberDiff line change
@@ -254,102 +254,6 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
254254
SubstringIndex(Literal("www||apache||org"), Literal( "||"), Literal(2)), "www||apache")
255255
}
256256

257-
test("LIKE literal Regular Expression") {
258-
checkEvaluation(Literal.create(null, StringType).like("a"), null)
259-
checkEvaluation(Literal.create("a", StringType).like(Literal.create(null, StringType)), null)
260-
checkEvaluation(Literal.create(null, StringType).like(Literal.create(null, StringType)), null)
261-
checkEvaluation(
262-
Literal.create("a", StringType).like(NonFoldableLiteral.create("a", StringType)), true)
263-
checkEvaluation(
264-
Literal.create("a", StringType).like(NonFoldableLiteral.create(null, StringType)), null)
265-
checkEvaluation(
266-
Literal.create(null, StringType).like(NonFoldableLiteral.create("a", StringType)), null)
267-
checkEvaluation(
268-
Literal.create(null, StringType).like(NonFoldableLiteral.create(null, StringType)), null)
269-
270-
checkEvaluation("abdef" like "abdef", true)
271-
checkEvaluation("a_%b" like "a\\__b", true)
272-
checkEvaluation("addb" like "a_%b", true)
273-
checkEvaluation("addb" like "a\\__b", false)
274-
checkEvaluation("addb" like "a%\\%b", false)
275-
checkEvaluation("a_%b" like "a%\\%b", true)
276-
checkEvaluation("addb" like "a%", true)
277-
checkEvaluation("addb" like "**", false)
278-
checkEvaluation("abc" like "a%", true)
279-
checkEvaluation("abc" like "b%", false)
280-
checkEvaluation("abc" like "bc%", false)
281-
checkEvaluation("a\nb" like "a_b", true)
282-
checkEvaluation("ab" like "a%b", true)
283-
checkEvaluation("a\nb" like "a%b", true)
284-
}
285-
286-
test("LIKE Non-literal Regular Expression") {
287-
val regEx = 'a.string.at(0)
288-
checkEvaluation("abcd" like regEx, null, create_row(null))
289-
checkEvaluation("abdef" like regEx, true, create_row("abdef"))
290-
checkEvaluation("a_%b" like regEx, true, create_row("a\\__b"))
291-
checkEvaluation("addb" like regEx, true, create_row("a_%b"))
292-
checkEvaluation("addb" like regEx, false, create_row("a\\__b"))
293-
checkEvaluation("addb" like regEx, false, create_row("a%\\%b"))
294-
checkEvaluation("a_%b" like regEx, true, create_row("a%\\%b"))
295-
checkEvaluation("addb" like regEx, true, create_row("a%"))
296-
checkEvaluation("addb" like regEx, false, create_row("**"))
297-
checkEvaluation("abc" like regEx, true, create_row("a%"))
298-
checkEvaluation("abc" like regEx, false, create_row("b%"))
299-
checkEvaluation("abc" like regEx, false, create_row("bc%"))
300-
checkEvaluation("a\nb" like regEx, true, create_row("a_b"))
301-
checkEvaluation("ab" like regEx, true, create_row("a%b"))
302-
checkEvaluation("a\nb" like regEx, true, create_row("a%b"))
303-
304-
checkEvaluation(Literal.create(null, StringType) like regEx, null, create_row("bc%"))
305-
}
306-
307-
test("RLIKE literal Regular Expression") {
308-
checkEvaluation(Literal.create(null, StringType) rlike "abdef", null)
309-
checkEvaluation("abdef" rlike Literal.create(null, StringType), null)
310-
checkEvaluation(Literal.create(null, StringType) rlike Literal.create(null, StringType), null)
311-
checkEvaluation("abdef" rlike NonFoldableLiteral.create("abdef", StringType), true)
312-
checkEvaluation("abdef" rlike NonFoldableLiteral.create(null, StringType), null)
313-
checkEvaluation(
314-
Literal.create(null, StringType) rlike NonFoldableLiteral.create("abdef", StringType), null)
315-
checkEvaluation(
316-
Literal.create(null, StringType) rlike NonFoldableLiteral.create(null, StringType), null)
317-
318-
checkEvaluation("abdef" rlike "abdef", true)
319-
checkEvaluation("abbbbc" rlike "a.*c", true)
320-
321-
checkEvaluation("fofo" rlike "^fo", true)
322-
checkEvaluation("fo\no" rlike "^fo\no$", true)
323-
checkEvaluation("Bn" rlike "^Ba*n", true)
324-
checkEvaluation("afofo" rlike "fo", true)
325-
checkEvaluation("afofo" rlike "^fo", false)
326-
checkEvaluation("Baan" rlike "^Ba?n", false)
327-
checkEvaluation("axe" rlike "pi|apa", false)
328-
checkEvaluation("pip" rlike "^(pi)*$", false)
329-
330-
checkEvaluation("abc" rlike "^ab", true)
331-
checkEvaluation("abc" rlike "^bc", false)
332-
checkEvaluation("abc" rlike "^ab", true)
333-
checkEvaluation("abc" rlike "^bc", false)
334-
335-
intercept[java.util.regex.PatternSyntaxException] {
336-
evaluate("abbbbc" rlike "**")
337-
}
338-
}
339-
340-
test("RLIKE Non-literal Regular Expression") {
341-
val regEx = 'a.string.at(0)
342-
checkEvaluation("abdef" rlike regEx, true, create_row("abdef"))
343-
checkEvaluation("abbbbc" rlike regEx, true, create_row("a.*c"))
344-
checkEvaluation("fofo" rlike regEx, true, create_row("^fo"))
345-
checkEvaluation("fo\no" rlike regEx, true, create_row("^fo\no$"))
346-
checkEvaluation("Bn" rlike regEx, true, create_row("^Ba*n"))
347-
348-
intercept[java.util.regex.PatternSyntaxException] {
349-
evaluate("abbbbc" rlike regEx, create_row("**"))
350-
}
351-
}
352-
353257
test("ascii for string") {
354258
val a = 'a.string.at(0)
355259
checkEvaluation(Ascii(Literal("efg")), 101, create_row("abdef"))
@@ -612,74 +516,6 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
612516
checkEvaluation(StringSpace(s1), null, row2)
613517
}
614518

615-
test("RegexReplace") {
616-
val row1 = create_row("100-200", "(\\d+)", "num")
617-
val row2 = create_row("100-200", "(\\d+)", "###")
618-
val row3 = create_row("100-200", "(-)", "###")
619-
val row4 = create_row(null, "(\\d+)", "###")
620-
val row5 = create_row("100-200", null, "###")
621-
val row6 = create_row("100-200", "(-)", null)
622-
623-
val s = 's.string.at(0)
624-
val p = 'p.string.at(1)
625-
val r = 'r.string.at(2)
626-
627-
val expr = RegExpReplace(s, p, r)
628-
checkEvaluation(expr, "num-num", row1)
629-
checkEvaluation(expr, "###-###", row2)
630-
checkEvaluation(expr, "100###200", row3)
631-
checkEvaluation(expr, null, row4)
632-
checkEvaluation(expr, null, row5)
633-
checkEvaluation(expr, null, row6)
634-
635-
val nonNullExpr = RegExpReplace(Literal("100-200"), Literal("(\\d+)"), Literal("num"))
636-
checkEvaluation(nonNullExpr, "num-num", row1)
637-
}
638-
639-
test("RegexExtract") {
640-
val row1 = create_row("100-200", "(\\d+)-(\\d+)", 1)
641-
val row2 = create_row("100-200", "(\\d+)-(\\d+)", 2)
642-
val row3 = create_row("100-200", "(\\d+).*", 1)
643-
val row4 = create_row("100-200", "([a-z])", 1)
644-
val row5 = create_row(null, "([a-z])", 1)
645-
val row6 = create_row("100-200", null, 1)
646-
val row7 = create_row("100-200", "([a-z])", null)
647-
648-
val s = 's.string.at(0)
649-
val p = 'p.string.at(1)
650-
val r = 'r.int.at(2)
651-
652-
val expr = RegExpExtract(s, p, r)
653-
checkEvaluation(expr, "100", row1)
654-
checkEvaluation(expr, "200", row2)
655-
checkEvaluation(expr, "100", row3)
656-
checkEvaluation(expr, "", row4) // will not match anything, empty string get
657-
checkEvaluation(expr, null, row5)
658-
checkEvaluation(expr, null, row6)
659-
checkEvaluation(expr, null, row7)
660-
661-
val expr1 = new RegExpExtract(s, p)
662-
checkEvaluation(expr1, "100", row1)
663-
664-
val nonNullExpr = RegExpExtract(Literal("100-200"), Literal("(\\d+)-(\\d+)"), Literal(1))
665-
checkEvaluation(nonNullExpr, "100", row1)
666-
}
667-
668-
test("SPLIT") {
669-
val s1 = 'a.string.at(0)
670-
val s2 = 'b.string.at(1)
671-
val row1 = create_row("aa2bb3cc", "[1-9]+")
672-
val row2 = create_row(null, "[1-9]+")
673-
val row3 = create_row("aa2bb3cc", null)
674-
675-
checkEvaluation(
676-
StringSplit(Literal("aa2bb3cc"), Literal("[1-9]+")), Seq("aa", "bb", "cc"), row1)
677-
checkEvaluation(
678-
StringSplit(s1, s2), Seq("aa", "bb", "cc"), row1)
679-
checkEvaluation(StringSplit(s1, s2), null, row2)
680-
checkEvaluation(StringSplit(s1, s2), null, row3)
681-
}
682-
683519
test("length for string / binary") {
684520
val a = 'a.string.at(0)
685521
val b = 'b.binary.at(0)

0 commit comments

Comments
 (0)