From f99a2f4ff1d5464d37687264280bf8d1bf775e7e Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Mon, 27 Oct 2014 05:41:38 +0900 Subject: [PATCH 1/2] Fixed LIKE predicate so that we can use EOL character as in a operand --- .../expressions/stringOperations.scala | 42 +++++++++---------- .../ExpressionEvaluationSuite.scala | 6 +++ 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala index c2a3a5ca3ca8b..f6349767764a3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala @@ -102,31 +102,27 @@ case class Like(left: Expression, right: Expression) // replace the _ with .{1} exactly match 1 time of any character // replace the % with .*, match 0 or more times with any character - override def escape(v: String) = { - val sb = new StringBuilder() - var i = 0; - while (i < v.length) { - // Make a special case for "\\_" and "\\%" - val n = v.charAt(i); - if (n == '\\' && i + 1 < v.length && (v.charAt(i + 1) == '_' || v.charAt(i + 1) == '%')) { - sb.append(v.charAt(i + 1)) - i += 1 - } else { - if (n == '_') { - sb.append("."); - } else if (n == '%') { - sb.append(".*"); - } else { - sb.append(Pattern.quote(Character.toString(n))); - } - } - - i += 1 + override def escape(v: String) = + if (!v.isEmpty) { + "(?s)" + (' ' +: v.init).zip(v).flatMap { + case (prev, '\\') => "" + case ('\\', c) => + c match { + case '_' => "_" + case '%' => "%" + case _ => Pattern.quote("\\" + c) + } + case (prev, c) => + c match { + case '_' => "." + case '%' => ".*" + case _ => Pattern.quote(Character.toString(c)) + } + }.mkString + } else { + v } - sb.toString() - } - override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).matches() } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala index f134d73450515..53c53481f984e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala @@ -191,6 +191,9 @@ class ExpressionEvaluationSuite extends FunSuite { checkEvaluation("abc" like "a%", true) checkEvaluation("abc" like "b%", false) checkEvaluation("abc" like "bc%", false) + checkEvaluation("a\nb" like "a_b", true) + checkEvaluation("ab" like "a%b", true) + checkEvaluation("a\nb" like "a%b", true) } test("LIKE Non-literal Regular Expression") { @@ -207,6 +210,9 @@ class ExpressionEvaluationSuite extends FunSuite { checkEvaluation("abc" like regEx, true, new GenericRow(Array[Any]("a%"))) checkEvaluation("abc" like regEx, false, new GenericRow(Array[Any]("b%"))) checkEvaluation("abc" like regEx, false, new GenericRow(Array[Any]("bc%"))) + checkEvaluation("a\nb" like regEx, true, new GenericRow(Array[Any]("a_b"))) + checkEvaluation("ab" like regEx, true, new GenericRow(Array[Any]("a%b"))) + checkEvaluation("a\nb" like regEx, true, new GenericRow(Array[Any]("a%b"))) checkEvaluation(Literal(null, StringType) like regEx, null, new GenericRow(Array[Any]("bc%"))) } From d15798bc99f816d7c07da9f4c57ff53cb7ce3111 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Mon, 27 Oct 2014 06:42:12 +0900 Subject: [PATCH 2/2] Remove test setting for thriftserver --- dev/run-tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/run-tests b/dev/run-tests index f55497ae2bfbd..972c8c8a21567 100755 --- a/dev/run-tests +++ b/dev/run-tests @@ -173,7 +173,7 @@ CURRENT_BLOCK=$BLOCK_SPARK_UNIT_TESTS if [ -n "$_SQL_TESTS_ONLY" ]; then # This must be an array of individual arguments. Otherwise, having one long string #+ will be interpreted as a single test, which doesn't work. - SBT_MAVEN_TEST_ARGS=("catalyst/test" "sql/test" "hive/test" "hive-thriftserver/test") + SBT_MAVEN_TEST_ARGS=("catalyst/test" "sql/test" "hive/test") else SBT_MAVEN_TEST_ARGS=("test") fi