@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, _}
2323import org .apache .spark .sql .catalyst .expressions ._
2424import org .apache .spark .sql .catalyst .expressions .aggregate .{First , Last }
2525import org .apache .spark .sql .catalyst .plans .PlanTest
26+ import org .apache .spark .sql .internal .SQLConf
2627import org .apache .spark .sql .types ._
2728import org .apache .spark .unsafe .types .CalendarInterval
2829
@@ -39,12 +40,17 @@ class ExpressionParserSuite extends PlanTest {
3940 import org .apache .spark .sql .catalyst .dsl .expressions ._
4041 import org .apache .spark .sql .catalyst .dsl .plans ._
4142
42- def assertEqual (sqlCommand : String , e : Expression ): Unit = {
43- compareExpressions(parseExpression(sqlCommand), e)
43+ val defaultParser = CatalystSqlParser
44+
45+ def assertEqual (
46+ sqlCommand : String ,
47+ e : Expression ,
48+ parser : ParserInterface = defaultParser): Unit = {
49+ compareExpressions(parser.parseExpression(sqlCommand), e)
4450 }
4551
4652 def intercept (sqlCommand : String , messages : String * ): Unit = {
47- val e = intercept[ParseException ](parseExpression(sqlCommand))
53+ val e = intercept[ParseException ](defaultParser. parseExpression(sqlCommand))
4854 messages.foreach { message =>
4955 assert(e.message.contains(message))
5056 }
@@ -101,7 +107,7 @@ class ExpressionParserSuite extends PlanTest {
101107 test(" long binary logical expressions" ) {
102108 def testVeryBinaryExpression (op : String , clazz : Class [_]): Unit = {
103109 val sql = (1 to 1000 ).map(x => s " $x == $x" ).mkString(op)
104- val e = parseExpression(sql)
110+ val e = defaultParser. parseExpression(sql)
105111 assert(e.collect { case _ : EqualTo => true }.size === 1000 )
106112 assert(e.collect { case x if clazz.isInstance(x) => true }.size === 999 )
107113 }
@@ -160,6 +166,15 @@ class ExpressionParserSuite extends PlanTest {
160166 assertEqual(" a not regexp 'pattern%'" , ! (' a rlike " pattern%" ))
161167 }
162168
169+ test(" like expressions with ESCAPED_STRING_LITERALS = true" ) {
170+ val conf = new SQLConf ()
171+ conf.setConfString(SQLConf .ESCAPED_STRING_LITERALS .key, " true" )
172+ val parser = new CatalystSqlParser (conf)
173+ assertEqual(" a rlike '^\\ x20[\\ x20-\\ x23]+$'" , ' a rlike " ^\\ x20[\\ x20-\\ x23]+$" , parser)
174+ assertEqual(" a rlike 'pattern\\\\ '" , ' a rlike " pattern\\\\ " , parser)
175+ assertEqual(" a rlike 'pattern\\ t\\ n'" , ' a rlike " pattern\\ t\\ n" , parser)
176+ }
177+
163178 test(" is null expressions" ) {
164179 assertEqual(" a is null" , ' a .isNull)
165180 assertEqual(" a is not null" , ' a .isNotNull)
@@ -418,38 +433,79 @@ class ExpressionParserSuite extends PlanTest {
418433 }
419434
420435 test(" strings" ) {
421- // Single Strings.
422- assertEqual(" \" hello\" " , " hello" )
423- assertEqual(" 'hello'" , " hello" )
424-
425- // Multi-Strings.
426- assertEqual(" \" hello\" 'world'" , " helloworld" )
427- assertEqual(" 'hello' \" \" 'world'" , " hello world" )
428-
429- // 'LIKE' string literals. Notice that an escaped '%' is the same as an escaped '\' and a
430- // regular '%'; to get the correct result you need to add another escaped '\'.
431- // TODO figure out if we shouldn't change the ParseUtils.unescapeSQLString method?
432- assertEqual(" 'pattern%'" , " pattern%" )
433- assertEqual(" 'no-pattern\\ %'" , " no-pattern\\ %" )
434- assertEqual(" 'pattern\\\\ %'" , " pattern\\ %" )
435- assertEqual(" 'pattern\\\\\\ %'" , " pattern\\\\ %" )
436-
437- // Escaped characters.
438- // See: http://dev.mysql.com/doc/refman/5.7/en/string-literals.html
439- assertEqual(" '\\ 0'" , " \u0000 " ) // ASCII NUL (X'00')
440- assertEqual(" '\\ ''" , " \' " ) // Single quote
441- assertEqual(" '\\\" '" , " \" " ) // Double quote
442- assertEqual(" '\\ b'" , " \b " ) // Backspace
443- assertEqual(" '\\ n'" , " \n " ) // Newline
444- assertEqual(" '\\ r'" , " \r " ) // Carriage return
445- assertEqual(" '\\ t'" , " \t " ) // Tab character
446- assertEqual(" '\\ Z'" , " \u001A " ) // ASCII 26 - CTRL + Z (EOF on windows)
447-
448- // Octals
449- assertEqual(" '\\ 110\\ 145\\ 154\\ 154\\ 157\\ 041'" , " Hello!" )
450-
451- // Unicode
452- assertEqual(" '\\ u0057\\ u006F\\ u0072\\ u006C\\ u0064\\ u0020\\ u003A\\ u0029'" , " World :)" )
436+ Seq (true , false ).foreach { escape =>
437+ val conf = new SQLConf ()
438+ conf.setConfString(SQLConf .ESCAPED_STRING_LITERALS .key, escape.toString)
439+ val parser = new CatalystSqlParser (conf)
440+
441+ // tests that have same result whatever the conf is
442+ // Single Strings.
443+ assertEqual(" \" hello\" " , " hello" , parser)
444+ assertEqual(" 'hello'" , " hello" , parser)
445+
446+ // Multi-Strings.
447+ assertEqual(" \" hello\" 'world'" , " helloworld" , parser)
448+ assertEqual(" 'hello' \" \" 'world'" , " hello world" , parser)
449+
450+ // 'LIKE' string literals. Notice that an escaped '%' is the same as an escaped '\' and a
451+ // regular '%'; to get the correct result you need to add another escaped '\'.
452+ // TODO figure out if we shouldn't change the ParseUtils.unescapeSQLString method?
453+ assertEqual(" 'pattern%'" , " pattern%" , parser)
454+ assertEqual(" 'no-pattern\\ %'" , " no-pattern\\ %" , parser)
455+
456+ // tests that have different result regarding the conf
457+ if (escape) {
458+ // When SQLConf.ESCAPED_STRING_LITERALS is enabled, string literal parsing fallbacks to
459+ // Spark 1.6 behavior.
460+
461+ // 'LIKE' string literals.
462+ assertEqual(" 'pattern\\\\ %'" , " pattern\\\\ %" , parser)
463+ assertEqual(" 'pattern\\\\\\ %'" , " pattern\\\\\\ %" , parser)
464+
465+ // Escaped characters.
466+ assertEqual(" '\0 '" , " \u0000 " , parser) // ASCII NUL (X'00')
467+
468+ // Note: Single quote follows 1.6 parsing behavior when ESCAPED_STRING_LITERALS is enabled.
469+ val e = intercept[ParseException ](parser.parseExpression(" '\' '" ))
470+ assert(e.message.contains(" extraneous input '''" ))
471+
472+ assertEqual(" '\" '" , " \" " , parser) // Double quote
473+ assertEqual(" '\b '" , " \b " , parser) // Backspace
474+ assertEqual(" '\n '" , " \n " , parser) // Newline
475+ assertEqual(" '\r '" , " \r " , parser) // Carriage return
476+ assertEqual(" '\t '" , " \t " , parser) // Tab character
477+
478+ // Octals
479+ assertEqual(" '\110\145\154\154\157\041 '" , " Hello!" , parser)
480+ // Unicode
481+ assertEqual(" '\u0057\u006F\u0072\u006C\u0064\u0020\u003A\u0029 '" , " World :)" , parser)
482+ } else {
483+ // Default behavior
484+
485+ // 'LIKE' string literals.
486+ assertEqual(" 'pattern\\\\ %'" , " pattern\\ %" , parser)
487+ assertEqual(" 'pattern\\\\\\ %'" , " pattern\\\\ %" , parser)
488+
489+ // Escaped characters.
490+ // See: http://dev.mysql.com/doc/refman/5.7/en/string-literals.html
491+ assertEqual(" '\\ 0'" , " \u0000 " , parser) // ASCII NUL (X'00')
492+ assertEqual(" '\\ ''" , " \' " , parser) // Single quote
493+ assertEqual(" '\\\" '" , " \" " , parser) // Double quote
494+ assertEqual(" '\\ b'" , " \b " , parser) // Backspace
495+ assertEqual(" '\\ n'" , " \n " , parser) // Newline
496+ assertEqual(" '\\ r'" , " \r " , parser) // Carriage return
497+ assertEqual(" '\\ t'" , " \t " , parser) // Tab character
498+ assertEqual(" '\\ Z'" , " \u001A " , parser) // ASCII 26 - CTRL + Z (EOF on windows)
499+
500+ // Octals
501+ assertEqual(" '\\ 110\\ 145\\ 154\\ 154\\ 157\\ 041'" , " Hello!" , parser)
502+
503+ // Unicode
504+ assertEqual(" '\\ u0057\\ u006F\\ u0072\\ u006C\\ u0064\\ u0020\\ u003A\\ u0029'" , " World :)" ,
505+ parser)
506+ }
507+
508+ }
453509 }
454510
455511 test(" intervals" ) {
0 commit comments