Skip to content

Commit f99a2f4

Browse files
committed
Fixed LIKE predicate so that we can use EOL character as in a operand
1 parent bf589fc commit f99a2f4

File tree

2 files changed

+25
-23
lines changed

2 files changed

+25
-23
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -102,31 +102,27 @@ case class Like(left: Expression, right: Expression)
102102

103103
// replace the _ with .{1} exactly match 1 time of any character
104104
// replace the % with .*, match 0 or more times with any character
105-
override def escape(v: String) = {
106-
val sb = new StringBuilder()
107-
var i = 0;
108-
while (i < v.length) {
109-
// Make a special case for "\\_" and "\\%"
110-
val n = v.charAt(i);
111-
if (n == '\\' && i + 1 < v.length && (v.charAt(i + 1) == '_' || v.charAt(i + 1) == '%')) {
112-
sb.append(v.charAt(i + 1))
113-
i += 1
114-
} else {
115-
if (n == '_') {
116-
sb.append(".");
117-
} else if (n == '%') {
118-
sb.append(".*");
119-
} else {
120-
sb.append(Pattern.quote(Character.toString(n)));
121-
}
122-
}
123-
124-
i += 1
105+
override def escape(v: String) =
106+
if (!v.isEmpty) {
107+
"(?s)" + (' ' +: v.init).zip(v).flatMap {
108+
case (prev, '\\') => ""
109+
case ('\\', c) =>
110+
c match {
111+
case '_' => "_"
112+
case '%' => "%"
113+
case _ => Pattern.quote("\\" + c)
114+
}
115+
case (prev, c) =>
116+
c match {
117+
case '_' => "."
118+
case '%' => ".*"
119+
case _ => Pattern.quote(Character.toString(c))
120+
}
121+
}.mkString
122+
} else {
123+
v
125124
}
126125

127-
sb.toString()
128-
}
129-
130126
override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).matches()
131127
}
132128

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,9 @@ class ExpressionEvaluationSuite extends FunSuite {
191191
checkEvaluation("abc" like "a%", true)
192192
checkEvaluation("abc" like "b%", false)
193193
checkEvaluation("abc" like "bc%", false)
194+
checkEvaluation("a\nb" like "a_b", true)
195+
checkEvaluation("ab" like "a%b", true)
196+
checkEvaluation("a\nb" like "a%b", true)
194197
}
195198

196199
test("LIKE Non-literal Regular Expression") {
@@ -207,6 +210,9 @@ class ExpressionEvaluationSuite extends FunSuite {
207210
checkEvaluation("abc" like regEx, true, new GenericRow(Array[Any]("a%")))
208211
checkEvaluation("abc" like regEx, false, new GenericRow(Array[Any]("b%")))
209212
checkEvaluation("abc" like regEx, false, new GenericRow(Array[Any]("bc%")))
213+
checkEvaluation("a\nb" like regEx, true, new GenericRow(Array[Any]("a_b")))
214+
checkEvaluation("ab" like regEx, true, new GenericRow(Array[Any]("a%b")))
215+
checkEvaluation("a\nb" like regEx, true, new GenericRow(Array[Any]("a%b")))
210216

211217
checkEvaluation(Literal(null, StringType) like regEx, null, new GenericRow(Array[Any]("bc%")))
212218
}

0 commit comments

Comments
 (0)