Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions docs/sql-keywords.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@ license: |
limitations under the License.
---

When `spark.sql.dialect.spark.ansi.enabled` is true, Spark SQL has two kinds of keywords:
When `spark.sql.dialect=PostgreSQL` or keep default `spark.sql.dialect=Spark` with setting `spark.sql.dialect.spark.ansi.enabled` to true, Spark SQL will use the ANSI mode parser.
In this mode, Spark SQL has two kinds of keywords:
* Reserved keywords: Keywords that are reserved and can't be used as identifiers for table, view, column, function, alias, etc.
* Non-reserved keywords: Keywords that have a special meaning only in particular contexts and can be used as identifiers in other contexts. For example, `SELECT 1 WEEK` is an interval literal, but WEEK can be used as identifiers in other places.

When `spark.sql.dialect.spark.ansi.enabled` is false, Spark SQL has two kinds of keywords:
* Non-reserved keywords: Same definition as the one when `spark.sql.dialect.spark.ansi.enabled=true`.
When the ANSI mode is disabled, Spark SQL has two kinds of keywords:
* Non-reserved keywords: Same definition as the one when the ANSI mode enabled.
* Strict-non-reserved keywords: A strict version of non-reserved keywords, which can not be used as table alias.

By default `spark.sql.dialect.spark.ansi.enabled` is false.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ grammar SqlBase;
}

/**
* When true, ANSI SQL parsing mode is enabled.
* When true, the behavior of keywords follows ANSI SQL standard.
*/
public boolean ansi = false;
public boolean SQL_standard_keyword_behavior = false;
}

singleStatement
Expand Down Expand Up @@ -744,7 +744,7 @@ primaryExpression
| qualifiedName '.' ASTERISK #star
| '(' namedExpression (',' namedExpression)+ ')' #rowConstructor
| '(' query ')' #subqueryExpression
| qualifiedName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')'
| functionName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')'
(OVER windowSpec)? #functionCall
| identifier '->' expression #lambda
| '(' identifier (',' identifier)+ ')' '->' expression #lambda
Expand Down Expand Up @@ -788,7 +788,7 @@ booleanValue

interval
: INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)?
| {ansi}? (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)
| {SQL_standard_keyword_behavior}? (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)
;

errorCapturingMultiUnitsInterval
Expand Down Expand Up @@ -908,6 +908,12 @@ qualifiedNameList
: qualifiedName (',' qualifiedName)*
;

functionName
: qualifiedName
| LEFT
| RIGHT
;

qualifiedName
: identifier ('.' identifier)*
;
Expand All @@ -927,14 +933,14 @@ errorCapturingIdentifierExtra

identifier
: strictIdentifier
| {!ansi}? strictNonReserved
| {!SQL_standard_keyword_behavior}? strictNonReserved
;

strictIdentifier
: IDENTIFIER #unquotedIdentifier
| quotedIdentifier #quotedIdentifierAlternative
| {ansi}? ansiNonReserved #unquotedIdentifier
| {!ansi}? nonReserved #unquotedIdentifier
| {SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier
| {!SQL_standard_keyword_behavior}? nonReserved #unquotedIdentifier
;

quotedIdentifier
Expand All @@ -951,7 +957,7 @@ number
| MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral
;

// When `spark.sql.dialect.spark.ansi.enabled=true`, there are 2 kinds of keywords in Spark SQL.
// When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL.
// - Reserved keywords:
// Keywords that are reserved and can't be used as identifiers for table, view, column,
// function, alias, etc.
Expand Down Expand Up @@ -1151,9 +1157,9 @@ ansiNonReserved
| YEARS
;

// When `spark.sql.dialect.spark.ansi.enabled=false`, there are 2 kinds of keywords in Spark SQL.
// When `SQL_standard_keyword_behavior=false`, there are 2 kinds of keywords in Spark SQL.
// - Non-reserved keywords:
// Same definition as the one when `spark.sql.dialect.spark.ansi.enabled=true`.
// Same definition as the one when `SQL_standard_keyword_behavior=true`.
// - Strict-non-reserved keywords:
// A strict version of non-reserved keywords, which can not be used as table alias.
// You can find the full keywords list by searching "Start of the keywords list" in this file.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1581,7 +1581,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
*/
override def visitFunctionCall(ctx: FunctionCallContext): Expression = withOrigin(ctx) {
// Create the function call.
val name = ctx.qualifiedName.getText
val name = ctx.functionName.getText
val isDistinct = Option(ctx.setQuantifier()).exists(_.DISTINCT != null)
val arguments = ctx.argument.asScala.map(expression) match {
case Seq(UnresolvedStar(None))
Expand All @@ -1591,7 +1591,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
case expressions =>
expressions
}
val function = UnresolvedFunction(visitFunctionName(ctx.qualifiedName), arguments, isDistinct)
val function = UnresolvedFunction(
getFunctionIdentifier(ctx.functionName), arguments, isDistinct)

// Check if the function is evaluated in a windowed context.
ctx.windowSpec match {
Expand Down Expand Up @@ -1631,6 +1632,17 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
}
}

/**
* Get a function identifier consist by database (optional) and name.
*/
protected def getFunctionIdentifier(ctx: FunctionNameContext): FunctionIdentifier = {
if (ctx.qualifiedName != null) {
visitFunctionName(ctx.qualifiedName)
} else {
FunctionIdentifier(ctx.getText, None)
}
}

/**
* Create an [[LambdaFunction]].
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.trees.Origin
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.Dialect
import org.apache.spark.sql.types.{DataType, StructType}

/**
Expand Down Expand Up @@ -88,19 +89,26 @@ abstract class AbstractSqlParser(conf: SQLConf) extends ParserInterface with Log
protected def parse[T](command: String)(toResult: SqlBaseParser => T): T = {
logDebug(s"Parsing command: $command")

// When we use PostgreSQL dialect or use Spark dialect with setting
// `spark.sql.dialect.spark.ansi.enabled=true`, the parser will use ANSI SQL standard keywords.
val SQLStandardKeywordBehavior = conf.dialect match {
case Dialect.POSTGRESQL => true
case Dialect.SPARK => conf.dialectSparkAnsiEnabled
}

val lexer = new SqlBaseLexer(new UpperCaseCharStream(CharStreams.fromString(command)))
lexer.removeErrorListeners()
lexer.addErrorListener(ParseErrorListener)
lexer.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced
lexer.ansi = conf.dialectSparkAnsiEnabled
lexer.SQL_standard_keyword_behavior = SQLStandardKeywordBehavior

val tokenStream = new CommonTokenStream(lexer)
val parser = new SqlBaseParser(tokenStream)
parser.addParseListener(PostProcessor)
parser.removeErrorListeners()
parser.addErrorListener(ParseErrorListener)
parser.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced
parser.ansi = conf.dialectSparkAnsiEnabled
parser.SQL_standard_keyword_behavior = SQLStandardKeywordBehavior

try {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2515,7 +2515,9 @@ class SQLConf extends Serializable with Logging {

def intervalOutputStyle: IntervalStyle.Value = IntervalStyle.withName(getConf(INTERVAL_STYLE))

def usePostgreSQLDialect: Boolean = getConf(DIALECT) == Dialect.POSTGRESQL.toString
def dialect: Dialect.Value = Dialect.withName(getConf(DIALECT))

def usePostgreSQLDialect: Boolean = dialect == Dialect.POSTGRESQL

def dialectSparkAnsiEnabled: Boolean = getConf(DIALECT_SPARK_ANSI_ENABLED)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,7 @@ select concat_ws(',',10,20,null,30);
select concat_ws('',10,20,null,30);
select concat_ws(NULL,10,20,null,30) is null;
select reverse('abcde');
-- [SPARK-28036] Built-in udf left/right has inconsistent behavior
-- [SPARK-28479][SPARK-28989] Parser error when enabling ANSI mode
set spark.sql.dialect.spark.ansi.enabled=false;
select i, left('ahoj', i), right('ahoj', i) from range(-5, 6) t(i) order by i;
set spark.sql.dialect.spark.ansi.enabled=true;
-- [SPARK-28037] Add built-in String Functions: quote_literal
-- select quote_literal('');
-- select quote_literal('abc''');
Expand Down
Loading