From 49fa2a87254238eb42e2801321dc50fb94cc50bb Mon Sep 17 00:00:00 2001 From: Javier Fuentes Date: Tue, 12 May 2020 13:46:24 +0000 Subject: [PATCH 1/2] [SPARK-31102][SQL] Spark-sql fails to parse when contains comment This PR introduces a change to false for the insideComment flag on a newline. Fixing the issue introduced by SPARK-30049. Previously on SPARK-30049 a comment containing an unclosed quote produced the following issue: ``` spark-sql> SELECT 1 -- someone's comment here > ; Error in query: extraneous input ';' expecting (line 2, pos 0) == SQL == SELECT 1 -- someone's comment here ; ^^^ ``` This was caused because there was no flag for comment sections inside the splitSemiColon method to ignore quotes. SPARK-30049 added that flag and fixed the issue, but introduced the follwoing problem: ``` spark-sql> select > 1, > -- two > 2; Error in query: mismatched input '' expecting {'(', 'ADD', 'AFTER', 'ALL', 'ALTER', ...}(line 3, pos 2) == SQL == select 1, --^^^ ``` This issue is generated by a missing turn-off for the insideComment flag with a newline. No - For previous tests using line-continuity(`\`) it was added a line-continuity rule in the SqlBase.g4 file to add the functionality to the SQL context. - A new test for inline comments was added. Closes #27920 from javierivanov/SPARK-31102. Authored-by: Javier Fuentes Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/parser/SqlBase.g4 | 2 +- .../sql/catalyst/parser/PlanParserSuite.scala | 103 ++++++++++++++++++ .../hive/thriftserver/SparkSQLCLIDriver.scala | 12 +- .../sql/hive/thriftserver/CliSuite.scala | 20 ++-- 4 files changed, 121 insertions(+), 16 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 613582fa0a99e..2adaa9f46ceba 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -1784,7 +1784,7 @@ fragment LETTER ; SIMPLE_COMMENT - : '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN) + : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN) ; BRACKETED_EMPTY_COMMENT diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 714f8c53e1d35..88afcb10d9c20 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -55,6 +55,109 @@ class PlanParserSuite extends AnalysisTest { With(plan, ctes) } + test("single comment case one") { + val plan = table("a").select(star()) + assertEqual("-- single comment\nSELECT * FROM a", plan) + } + + test("single comment case two") { + val plan = table("a").select(star()) + assertEqual("-- single comment\\\nwith line continuity\nSELECT * FROM a", plan) + } + + test("bracketed comment case one") { + val plan = table("a").select(star()) + assertEqual( + """ + |/* This is an example of SQL which should not execute: + | * select 'multi-line'; + | */ + |SELECT * FROM a + """.stripMargin, plan) + } + + test("bracketed comment case two") { + val plan = table("a").select(star()) + assertEqual( + """ + |/* + |SELECT 'trailing' as x1; -- inside block comment + |*/ + |SELECT * FROM a + """.stripMargin, plan) + } + + test("nested bracketed comment case one") { + val plan = table("a").select(star()) + assertEqual( + """ + |/* This block comment surrounds a query which itself has a block comment... + |SELECT /* embedded single line */ 'embedded' AS x2; + |*/ + |SELECT * FROM a + """.stripMargin, plan) + } + + test("nested bracketed comment case two") { + val plan = table("a").select(star()) + assertEqual( + """ + |SELECT -- continued after the following block comments... + |/* Deeply nested comment. + | This includes a single apostrophe to make sure we aren't decoding this part as a string. + |SELECT 'deep nest' AS n1; + |/* Second level of nesting... + |SELECT 'deeper nest' as n2; + |/* Third level of nesting... + |SELECT 'deepest nest' as n3; + |*/ + |Hoo boy. Still two deep... + |*/ + |Now just one deep... + |*/ + |* FROM a + """.stripMargin, plan) + } + + test("nested bracketed comment case three") { + val plan = table("a").select(star()) + assertEqual( + """ + |/* This block comment surrounds a query which itself has a block comment... + |//* I am a nested bracketed comment. + |*/ + |*/ + |SELECT * FROM a + """.stripMargin, plan) + } + + test("nested bracketed comment case four") { + val plan = table("a").select(star()) + assertEqual( + """ + |/*/**/*/ + |SELECT * FROM a + """.stripMargin, plan) + } + + test("nested bracketed comment case five") { + val plan = table("a").select(star()) + assertEqual( + """ + |/*/*abc*/*/ + |SELECT * FROM a + """.stripMargin, plan) + } + + test("nested bracketed comment case six") { + val plan = table("a").select(star()) + assertEqual( + """ + |/*/*foo*//*bar*/*/ + |SELECT * FROM a + """.stripMargin, plan) + } + test("case insensitive") { val plan = table("a").select(star()) assertEqual("sELEct * FroM a", plan) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 50b5eb43bd1ad..c7848afd822d5 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -524,7 +524,6 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { var insideComment = false var escape = false var beginIndex = 0 - var endIndex = line.length val ret = new JArrayList[String] for (index <- 0 until line.length) { @@ -552,8 +551,6 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { } else if (hasNext && line.charAt(index + 1) == '-') { // ignore quotes and ; insideComment = true - // ignore eol - endIndex = index } } else if (line.charAt(index) == ';') { if (insideSingleQuote || insideDoubleQuote || insideComment) { @@ -563,8 +560,11 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { ret.add(line.substring(beginIndex, index)) beginIndex = index + 1 } - } else { - // nothing to do + } else if (line.charAt(index) == '\n') { + // with a new line the inline comment should end. + if (!escape) { + insideComment = false + } } // set the escape if (escape) { @@ -573,7 +573,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { escape = true } } - ret.add(line.substring(beginIndex, endIndex)) + ret.add(line.substring(beginIndex)) ret } } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index b41a4b7f891db..ea1a371151c36 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -508,18 +508,20 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { ) } - test("SPARK-30049 Should not complain for quotes in commented with multi-lines") { + test("SPARK-31102 spark-sql fails to parse when contains comment") { runCliWithin(1.minute)( - """SELECT concat('test', 'comment') -- someone's comment here \\ - | comment continues here with single ' quote \\ - | extra ' \\ - |;""".stripMargin -> "testcomment" + """SELECT concat('test', 'comment'), + | -- someone's comment here + | 2;""".stripMargin -> "testcomment" ) + } + + test("SPARK-30049 Should not complain for quotes in commented with multi-lines") { runCliWithin(1.minute)( - """SELECT concat('test', 'comment') -- someone's comment here \\ - | comment continues here with single ' quote \\ - | extra ' \\ - | ;""".stripMargin -> "testcomment" + """SELECT concat('test', 'comment') -- someone's comment here \ + | comment continues here with single ' quote \ + | extra ' \ + |;""".stripMargin -> "testcomment" ) } From a31812733731b13f68cb1a677cd06f92a6286c48 Mon Sep 17 00:00:00 2001 From: Javier Fuentes Date: Mon, 18 May 2020 08:11:40 -0400 Subject: [PATCH 2/2] removed tests carried from master --- .../sql/catalyst/parser/PlanParserSuite.scala | 93 ------------------- 1 file changed, 93 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 88afcb10d9c20..515ba2a076fa8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -65,99 +65,6 @@ class PlanParserSuite extends AnalysisTest { assertEqual("-- single comment\\\nwith line continuity\nSELECT * FROM a", plan) } - test("bracketed comment case one") { - val plan = table("a").select(star()) - assertEqual( - """ - |/* This is an example of SQL which should not execute: - | * select 'multi-line'; - | */ - |SELECT * FROM a - """.stripMargin, plan) - } - - test("bracketed comment case two") { - val plan = table("a").select(star()) - assertEqual( - """ - |/* - |SELECT 'trailing' as x1; -- inside block comment - |*/ - |SELECT * FROM a - """.stripMargin, plan) - } - - test("nested bracketed comment case one") { - val plan = table("a").select(star()) - assertEqual( - """ - |/* This block comment surrounds a query which itself has a block comment... - |SELECT /* embedded single line */ 'embedded' AS x2; - |*/ - |SELECT * FROM a - """.stripMargin, plan) - } - - test("nested bracketed comment case two") { - val plan = table("a").select(star()) - assertEqual( - """ - |SELECT -- continued after the following block comments... - |/* Deeply nested comment. - | This includes a single apostrophe to make sure we aren't decoding this part as a string. - |SELECT 'deep nest' AS n1; - |/* Second level of nesting... - |SELECT 'deeper nest' as n2; - |/* Third level of nesting... - |SELECT 'deepest nest' as n3; - |*/ - |Hoo boy. Still two deep... - |*/ - |Now just one deep... - |*/ - |* FROM a - """.stripMargin, plan) - } - - test("nested bracketed comment case three") { - val plan = table("a").select(star()) - assertEqual( - """ - |/* This block comment surrounds a query which itself has a block comment... - |//* I am a nested bracketed comment. - |*/ - |*/ - |SELECT * FROM a - """.stripMargin, plan) - } - - test("nested bracketed comment case four") { - val plan = table("a").select(star()) - assertEqual( - """ - |/*/**/*/ - |SELECT * FROM a - """.stripMargin, plan) - } - - test("nested bracketed comment case five") { - val plan = table("a").select(star()) - assertEqual( - """ - |/*/*abc*/*/ - |SELECT * FROM a - """.stripMargin, plan) - } - - test("nested bracketed comment case six") { - val plan = table("a").select(star()) - assertEqual( - """ - |/*/*foo*//*bar*/*/ - |SELECT * FROM a - """.stripMargin, plan) - } - test("case insensitive") { val plan = table("a").select(star()) assertEqual("sELEct * FroM a", plan)