Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,17 @@ case object ParseErrorListener extends BaseErrorListener {
charPositionInLine: Int,
msg: String,
e: RecognitionException): Unit = {
val position = Origin(Some(line), Some(charPositionInLine))
throw new ParseException(None, msg, position, position)
val (start, stop) = offendingSymbol match {
case token: CommonToken =>
val start = Origin(Some(line), Some(token.getCharPositionInLine))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems like computation of start can be moved outside ? Only the computation of stop is different between commonToken and non common tokens ?

Also, just for my understanding, can you please briefly explain the difference between the common token and other ones ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not exactly the same code, but does it have the same result? Looking OK to me but @rubenfiszel could you comment?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From a pure code point of view, it's not equivalent since it is using the token.getCharPositionInline instead of the method arg.

It might be equivalent but that would require an invariant to hold (method getCharPositionInline == token.getCharPositionInLine) that seems unnecessary since the intent of this specific case is to leverage the informations from the CommonToken directly.

The difference between CommonToken and other types of offending symbols is that it is clear for CommonToken where is the stop.

We use this internally on our fork of spark to get nice language-server-protocol errors that are correctly delimited.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rberenguel thanks for your explanation.

val length = token.getStopIndex - token.getStartIndex + 1
val stop = Origin(Some(line), Some(token.getCharPositionInLine + length))
(start, stop)
case _ =>
val start = Origin(Some(line), Some(charPositionInLine))
(start, start)
}
throw new ParseException(None, msg, start, stop)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,17 @@ import org.apache.spark.SparkFunSuite
* Test various parser errors.
*/
class ErrorParserSuite extends SparkFunSuite {
def intercept(sql: String, line: Int, startPosition: Int, messages: String*): Unit = {
def intercept(sql: String, line: Int, startPosition: Int, stopPosition: Int,
messages: String*): Unit = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit:

  def intercept(
      sql: String,
      line: Int,
      startPosition: Int,
      stopPosition: Int,
      messages: String*): Unit = {

val e = intercept[ParseException](CatalystSqlParser.parsePlan(sql))

// Check position.
assert(e.line.isDefined)
assert(e.line.get === line)
assert(e.startPosition.isDefined)
assert(e.startPosition.get === startPosition)
assert(e.stop.startPosition.isDefined)
assert(e.stop.startPosition.get === stopPosition)

// Check messages.
val error = e.getMessage
Expand All @@ -39,23 +42,24 @@ class ErrorParserSuite extends SparkFunSuite {
}

test("no viable input") {
intercept("select ((r + 1) ", 1, 16, "no viable alternative at input", "----------------^^^")
intercept("select ((r + 1) ", 1, 16, 16,
"no viable alternative at input", "----------------^^^")
}

test("extraneous input") {
intercept("select 1 1", 1, 9, "extraneous input '1' expecting", "---------^^^")
intercept("select *\nfrom r as q t", 2, 12, "extraneous input", "------------^^^")
intercept("select 1 1", 1, 9, 10, "extraneous input '1' expecting", "---------^^^")
intercept("select *\nfrom r as q t", 2, 12, 13, "extraneous input", "------------^^^")
}

test("mismatched input") {
intercept("select * from r order by q from t", 1, 27,
intercept("select * from r order by q from t", 1, 27, 31,
"mismatched input",
"---------------------------^^^")
intercept("select *\nfrom r\norder by q\nfrom t", 4, 0, "mismatched input", "^^^")
intercept("select *\nfrom r\norder by q\nfrom t", 4, 0, 4, "mismatched input", "^^^")
}

test("semantic errors") {
intercept("select *\nfrom r\norder by q\ncluster by q", 3, 0,
intercept("select *\nfrom r\norder by q\ncluster by q", 3, 0, 11,
"Combination of ORDER BY/SORT BY/DISTRIBUTE BY/CLUSTER BY is not supported",
"^^^")
}
Expand Down