Skip to content

Commit f702a95

Browse files
turboFeimaropu
authored andcommitted
[SPARK-33100][SQL] Ignore a semicolon inside a bracketed comment in spark-sql
### What changes were proposed in this pull request? Now the spark-sql does not support parse the sql statements with bracketed comments. For the sql statements: ``` /* SELECT 'test'; */ SELECT 'test'; ``` Would be split to two statements: The first one: `/* SELECT 'test'` The second one: `*/ SELECT 'test'` Then it would throw an exception because the first one is illegal. In this PR, we ignore the content in bracketed comments while splitting the sql statements. Besides, we ignore the comment without any content. ### Why are the changes needed? Spark-sql might split the statements inside bracketed comments and it is not correct. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added UT. Closes #29982 from turboFei/SPARK-33110. Lead-authored-by: fwang12 <[email protected]> Co-authored-by: turbofei <[email protected]> Signed-off-by: Takeshi Yamamuro <[email protected]> (cherry picked from commit a071826) Signed-off-by: Takeshi Yamamuro <[email protected]>
1 parent b3cd86d commit f702a95

File tree

2 files changed

+55
-8
lines changed

2 files changed

+55
-8
lines changed

sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -522,14 +522,22 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
522522
// Note: [SPARK-31595] if there is a `'` in a double quoted string, or a `"` in a single quoted
523523
// string, the origin implementation from Hive will not drop the trailing semicolon as expected,
524524
// hence we refined this function a little bit.
525+
// Note: [SPARK-33100] Ignore a semicolon inside a bracketed comment in spark-sql.
525526
private def splitSemiColon(line: String): JList[String] = {
526527
var insideSingleQuote = false
527528
var insideDoubleQuote = false
528-
var insideComment = false
529+
var insideSimpleComment = false
530+
var bracketedCommentLevel = 0
529531
var escape = false
530532
var beginIndex = 0
533+
var includingStatement = false
531534
val ret = new JArrayList[String]
532535

536+
def insideBracketedComment: Boolean = bracketedCommentLevel > 0
537+
def insideComment: Boolean = insideSimpleComment || insideBracketedComment
538+
def statementBegin(index: Int): Boolean = includingStatement || (!insideComment &&
539+
index > beginIndex && !s"${line.charAt(index)}".trim.isEmpty)
540+
533541
for (index <- 0 until line.length) {
534542
if (line.charAt(index) == '\'' && !insideComment) {
535543
// take a look to see if it is escaped
@@ -553,21 +561,33 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
553561
// Sample query: select "quoted value --"
554562
// ^^ avoids starting a comment if it's inside quotes.
555563
} else if (hasNext && line.charAt(index + 1) == '-') {
556-
// ignore quotes and ;
557-
insideComment = true
564+
// ignore quotes and ; in simple comment
565+
insideSimpleComment = true
558566
}
559567
} else if (line.charAt(index) == ';') {
560568
if (insideSingleQuote || insideDoubleQuote || insideComment) {
561569
// do not split
562570
} else {
563-
// split, do not include ; itself
564-
ret.add(line.substring(beginIndex, index))
571+
if (includingStatement) {
572+
// split, do not include ; itself
573+
ret.add(line.substring(beginIndex, index))
574+
}
565575
beginIndex = index + 1
576+
includingStatement = false
566577
}
567578
} else if (line.charAt(index) == '\n') {
568-
// with a new line the inline comment should end.
579+
// with a new line the inline simple comment should end.
569580
if (!escape) {
570-
insideComment = false
581+
insideSimpleComment = false
582+
}
583+
} else if (line.charAt(index) == '/' && !insideSimpleComment) {
584+
val hasNext = index + 1 < line.length
585+
if (insideSingleQuote || insideDoubleQuote) {
586+
// Ignores '/' in any case of quotes
587+
} else if (insideBracketedComment && line.charAt(index - 1) == '*' ) {
588+
bracketedCommentLevel -= 1
589+
} else if (hasNext && !insideBracketedComment && line.charAt(index + 1) == '*') {
590+
bracketedCommentLevel += 1
571591
}
572592
}
573593
// set the escape
@@ -576,8 +596,12 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
576596
} else if (line.charAt(index) == '\\') {
577597
escape = true
578598
}
599+
600+
includingStatement = statementBegin(index)
601+
}
602+
if (includingStatement) {
603+
ret.add(line.substring(beginIndex))
579604
}
580-
ret.add(line.substring(beginIndex))
581605
ret
582606
}
583607
}

sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,4 +571,27 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
571571
// the date formatter for `java.sql.LocalDate` must output negative years with sign.
572572
runCliWithin(1.minute)("SELECT MAKE_DATE(-44, 3, 15);" -> "-0044-03-15")
573573
}
574+
575+
test("SPARK-33100: Ignore a semicolon inside a bracketed comment in spark-sql") {
576+
runCliWithin(4.minute)(
577+
"/* SELECT 'test';*/ SELECT 'test';" -> "test",
578+
";;/* SELECT 'test';*/ SELECT 'test';" -> "test",
579+
"/* SELECT 'test';*/;; SELECT 'test';" -> "test",
580+
"SELECT 'test'; -- SELECT 'test';" -> "",
581+
"SELECT 'test'; /* SELECT 'test';*/;" -> "",
582+
"/*$meta chars{^\\;}*/ SELECT 'test';" -> "test",
583+
"/*\nmulti-line\n*/ SELECT 'test';" -> "test",
584+
"/*/* multi-level bracketed*/ SELECT 'test';" -> "test"
585+
)
586+
}
587+
588+
test("SPARK-33100: test sql statements with hint in bracketed comment") {
589+
runCliWithin(2.minute)(
590+
"CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES(1, 2) AS t1(k, v);" -> "",
591+
"CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES(2, 1) AS t2(k, v);" -> "",
592+
"EXPLAIN SELECT /*+ MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;" -> "SortMergeJoin",
593+
"EXPLAIN SELECT /* + MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;"
594+
-> "BroadcastHashJoin"
595+
)
596+
}
574597
}

0 commit comments

Comments
 (0)