Skip to content

Commit a15f125

Browse files
committed
[SPARK-33100][SQL][FOLLOWUP] Find correct bound of bracketed comment in spark-sql
### What changes were proposed in this pull request? This PR help find correct bound of bracketed comment in spark-sql. Here is the log for UT of SPARK-33100 in CliSuite before: ``` 2021-01-05 13:22:34.768 - stdout> spark-sql> /* SELECT 'test';*/ SELECT 'test'; 2021-01-05 13:22:41.523 - stderr> Time taken: 6.716 seconds, Fetched 1 row(s) 2021-01-05 13:22:41.599 - stdout> test 2021-01-05 13:22:41.6 - stdout> spark-sql> ;;/* SELECT 'test';*/ SELECT 'test'; 2021-01-05 13:22:41.709 - stdout> test 2021-01-05 13:22:41.709 - stdout> spark-sql> /* SELECT 'test';*/;; SELECT 'test'; 2021-01-05 13:22:41.902 - stdout> spark-sql> SELECT 'test'; -- SELECT 'test'; 2021-01-05 13:22:41.902 - stderr> Time taken: 0.129 seconds, Fetched 1 row(s) 2021-01-05 13:22:41.902 - stderr> Error in query: 2021-01-05 13:22:41.902 - stderr> mismatched input '<EOF>' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 19) 2021-01-05 13:22:42.006 - stderr> 2021-01-05 13:22:42.006 - stderr> == SQL == 2021-01-05 13:22:42.006 - stderr> /* SELECT 'test';*/ 2021-01-05 13:22:42.006 - stderr> -------------------^^^ 2021-01-05 13:22:42.006 - stderr> 2021-01-05 13:22:42.006 - stderr> Time taken: 0.226 seconds, Fetched 1 row(s) 2021-01-05 13:22:42.006 - stdout> test ``` The root cause is that the insideBracketedComment is not accurate. For `/* comment */`, the last character `/` is not insideBracketedComment and it would be treat as beginning of statements. In this PR, this issue is fixed. ### Why are the changes needed? To fix the issue described above. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UT Closes #31054 from turboFei/SPARK-33100-followup. Authored-by: fwang12 <[email protected]> Signed-off-by: Takeshi Yamamuro <[email protected]>
1 parent 3ee7483 commit a15f125

File tree

2 files changed

+19
-9
lines changed

2 files changed

+19
-9
lines changed

sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -526,15 +526,24 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
526526
var bracketedCommentLevel = 0
527527
var escape = false
528528
var beginIndex = 0
529-
var includingStatement = false
529+
var leavingBracketedComment = false
530+
var isStatement = false
530531
val ret = new JArrayList[String]
531532

532533
def insideBracketedComment: Boolean = bracketedCommentLevel > 0
533534
def insideComment: Boolean = insideSimpleComment || insideBracketedComment
534-
def statementBegin(index: Int): Boolean = includingStatement || (!insideComment &&
535+
def statementInProgress(index: Int): Boolean = isStatement || (!insideComment &&
535536
index > beginIndex && !s"${line.charAt(index)}".trim.isEmpty)
536537

537538
for (index <- 0 until line.length) {
539+
// Checks if we need to decrement a bracketed comment level; the last character '/' of
540+
// bracketed comments is still inside the comment, so `insideBracketedComment` must keep true
541+
// in the previous loop and we decrement the level here if needed.
542+
if (leavingBracketedComment) {
543+
bracketedCommentLevel -= 1
544+
leavingBracketedComment = false
545+
}
546+
538547
if (line.charAt(index) == '\'' && !insideComment) {
539548
// take a look to see if it is escaped
540549
// See the comment above about SPARK-31595
@@ -564,12 +573,12 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
564573
if (insideSingleQuote || insideDoubleQuote || insideComment) {
565574
// do not split
566575
} else {
567-
if (includingStatement) {
576+
if (isStatement) {
568577
// split, do not include ; itself
569578
ret.add(line.substring(beginIndex, index))
570579
}
571580
beginIndex = index + 1
572-
includingStatement = false
581+
isStatement = false
573582
}
574583
} else if (line.charAt(index) == '\n') {
575584
// with a new line the inline simple comment should end.
@@ -581,7 +590,8 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
581590
if (insideSingleQuote || insideDoubleQuote) {
582591
// Ignores '/' in any case of quotes
583592
} else if (insideBracketedComment && line.charAt(index - 1) == '*' ) {
584-
bracketedCommentLevel -= 1
593+
// Decrements `bracketedCommentLevel` at the beginning of the next loop
594+
leavingBracketedComment = true
585595
} else if (hasNext && !insideBracketedComment && line.charAt(index + 1) == '*') {
586596
bracketedCommentLevel += 1
587597
}
@@ -593,9 +603,9 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
593603
escape = true
594604
}
595605

596-
includingStatement = statementBegin(index)
606+
isStatement = statementInProgress(index)
597607
}
598-
if (includingStatement) {
608+
if (isStatement) {
599609
ret.add(line.substring(beginIndex))
600610
}
601611
ret

sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -557,8 +557,8 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
557557
"/* SELECT 'test';*/ SELECT 'test';" -> "test",
558558
";;/* SELECT 'test';*/ SELECT 'test';" -> "test",
559559
"/* SELECT 'test';*/;; SELECT 'test';" -> "test",
560-
"SELECT 'test'; -- SELECT 'test';" -> "",
561-
"SELECT 'test'; /* SELECT 'test';*/;" -> "",
560+
"SELECT 'test'; -- SELECT 'test';" -> "test",
561+
"SELECT 'test'; /* SELECT 'test';*/;" -> "test",
562562
"/*$meta chars{^\\;}*/ SELECT 'test';" -> "test",
563563
"/*\nmulti-line\n*/ SELECT 'test';" -> "test",
564564
"/*/* multi-level bracketed*/ SELECT 'test';" -> "test"

0 commit comments

Comments
 (0)