From d5cab2f5ee8d908c0175e56f81a23a42e655a30b Mon Sep 17 00:00:00 2001 From: Yuanjian Li Date: Tue, 19 Nov 2019 04:30:25 +0800 Subject: [PATCH 1/5] Make the behavior of Postgre dialect independent of ansi mode config --- .../spark/sql/catalyst/parser/SqlBase.g4 | 8 +- .../sql/catalyst/parser/AstBuilder.scala | 16 +- .../sql/catalyst/parser/ParseDriver.scala | 4 +- .../sql-tests/inputs/postgreSQL/text.sql | 4 - .../sql-tests/results/postgreSQL/text.sql.out | 160 ++++++++---------- .../apache/spark/sql/SQLQueryTestSuite.scala | 1 - 6 files changed, 95 insertions(+), 98 deletions(-) mode change 100644 => 100755 sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 4f81dc47394a5..20a65b904afff 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -744,7 +744,7 @@ primaryExpression | qualifiedName '.' ASTERISK #star | '(' namedExpression (',' namedExpression)+ ')' #rowConstructor | '(' query ')' #subqueryExpression - | qualifiedName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')' + | functionCallName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')' (OVER windowSpec)? #functionCall | identifier '->' expression #lambda | '(' identifier (',' identifier)+ ')' '->' expression #lambda @@ -908,6 +908,12 @@ qualifiedNameList : qualifiedName (',' qualifiedName)* ; +functionCallName + : qualifiedName + | LEFT + | RIGHT + ; + qualifiedName : identifier ('.' identifier)* ; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index e0476879f0e80..2be78e861e6ab 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1581,7 +1581,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging */ override def visitFunctionCall(ctx: FunctionCallContext): Expression = withOrigin(ctx) { // Create the function call. - val name = ctx.qualifiedName.getText + val name = ctx.functionCallName.getText val isDistinct = Option(ctx.setQuantifier()).exists(_.DISTINCT != null) val arguments = ctx.argument.asScala.map(expression) match { case Seq(UnresolvedStar(None)) @@ -1591,7 +1591,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging case expressions => expressions } - val function = UnresolvedFunction(visitFunctionName(ctx.qualifiedName), arguments, isDistinct) + val function = UnresolvedFunction( + visitFunctionName(ctx.functionCallName), arguments, isDistinct) // Check if the function is evaluated in a windowed context. ctx.windowSpec match { @@ -1619,6 +1620,17 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging visitFunctionName(ctx, ctx.identifier().asScala.map(_.getText)) } + /** + * Create a function database (optional) and name pair. + */ + protected def visitFunctionName(ctx: FunctionCallNameContext): FunctionIdentifier = { + if (ctx.qualifiedName != null) { + visitFunctionName(ctx.qualifiedName) + } else { + FunctionIdentifier(ctx.getText, None) + } + } + /** * Create a function database (optional) and name pair. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index e291d5f9cd84c..a84d29b71ac42 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -92,7 +92,7 @@ abstract class AbstractSqlParser(conf: SQLConf) extends ParserInterface with Log lexer.removeErrorListeners() lexer.addErrorListener(ParseErrorListener) lexer.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced - lexer.ansi = conf.dialectSparkAnsiEnabled + lexer.ansi = conf.ansiEnabled val tokenStream = new CommonTokenStream(lexer) val parser = new SqlBaseParser(tokenStream) @@ -100,7 +100,7 @@ abstract class AbstractSqlParser(conf: SQLConf) extends ParserInterface with Log parser.removeErrorListeners() parser.addErrorListener(ParseErrorListener) parser.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced - parser.ansi = conf.dialectSparkAnsiEnabled + parser.ansi = conf.ansiEnabled try { try { diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/text.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/text.sql index a1fe95462ecae..05953123da86f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/text.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/text.sql @@ -44,11 +44,7 @@ select concat_ws(',',10,20,null,30); select concat_ws('',10,20,null,30); select concat_ws(NULL,10,20,null,30) is null; select reverse('abcde'); --- [SPARK-28036] Built-in udf left/right has inconsistent behavior --- [SPARK-28479][SPARK-28989] Parser error when enabling ANSI mode -set spark.sql.dialect.spark.ansi.enabled=false; select i, left('ahoj', i), right('ahoj', i) from range(-5, 6) t(i) order by i; -set spark.sql.dialect.spark.ansi.enabled=true; -- [SPARK-28037] Add built-in String Functions: quote_literal -- select quote_literal(''); -- select quote_literal('abc'''); diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out old mode 100644 new mode 100755 index cac1e7ee5ab12..311b0eb5a5844 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 44 +-- Number of queries: 42 -- !query 0 @@ -151,18 +151,10 @@ edcba -- !query 18 -set spark.sql.dialect.spark.ansi.enabled=false --- !query 18 schema -struct --- !query 18 output -spark.sql.dialect.spark.ansi.enabled false - - --- !query 19 select i, left('ahoj', i), right('ahoj', i) from range(-5, 6) t(i) order by i --- !query 19 schema +-- !query 18 schema struct --- !query 19 output +-- !query 18 output -5 -4 -3 @@ -176,200 +168,192 @@ struct 5 ahoj ahoj --- !query 20 -set spark.sql.dialect.spark.ansi.enabled=true --- !query 20 schema -struct --- !query 20 output -spark.sql.dialect.spark.ansi.enabled true - - --- !query 21 +-- !query 19 /* * format */ select format_string(NULL) --- !query 21 schema +-- !query 19 schema struct --- !query 21 output +-- !query 19 output NULL --- !query 22 +-- !query 20 select format_string('Hello') --- !query 22 schema +-- !query 20 schema struct --- !query 22 output +-- !query 20 output Hello --- !query 23 +-- !query 21 select format_string('Hello %s', 'World') --- !query 23 schema +-- !query 21 schema struct --- !query 23 output +-- !query 21 output Hello World --- !query 24 +-- !query 22 select format_string('Hello %%') --- !query 24 schema +-- !query 22 schema struct --- !query 24 output +-- !query 22 output Hello % --- !query 25 +-- !query 23 select format_string('Hello %%%%') --- !query 25 schema +-- !query 23 schema struct --- !query 25 output +-- !query 23 output Hello %% --- !query 26 +-- !query 24 select format_string('Hello %s %s', 'World') --- !query 26 schema +-- !query 24 schema struct<> --- !query 26 output +-- !query 24 output java.util.MissingFormatArgumentException Format specifier '%s' --- !query 27 +-- !query 25 select format_string('Hello %s') --- !query 27 schema +-- !query 25 schema struct<> --- !query 27 output +-- !query 25 output java.util.MissingFormatArgumentException Format specifier '%s' --- !query 28 +-- !query 26 select format_string('Hello %x', 20) --- !query 28 schema +-- !query 26 schema struct --- !query 28 output +-- !query 26 output Hello 14 --- !query 29 +-- !query 27 select format_string('%1$s %3$s', 1, 2, 3) --- !query 29 schema +-- !query 27 schema struct --- !query 29 output +-- !query 27 output 1 3 --- !query 30 +-- !query 28 select format_string('%1$s %12$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12) --- !query 30 schema +-- !query 28 schema struct --- !query 30 output +-- !query 28 output 1 12 --- !query 31 +-- !query 29 select format_string('%1$s %4$s', 1, 2, 3) --- !query 31 schema +-- !query 29 schema struct<> --- !query 31 output +-- !query 29 output java.util.MissingFormatArgumentException Format specifier '%4$s' --- !query 32 +-- !query 30 select format_string('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12) --- !query 32 schema +-- !query 30 schema struct<> --- !query 32 output +-- !query 30 output java.util.MissingFormatArgumentException Format specifier '%13$s' --- !query 33 +-- !query 31 select format_string('%0$s', 'Hello') --- !query 33 schema +-- !query 31 schema struct --- !query 33 output +-- !query 31 output Hello --- !query 34 +-- !query 32 select format_string('Hello %s %1$s %s', 'World', 'Hello again') --- !query 34 schema +-- !query 32 schema struct --- !query 34 output +-- !query 32 output Hello World World Hello again --- !query 35 +-- !query 33 select format_string('Hello %s %s, %2$s %2$s', 'World', 'Hello again') --- !query 35 schema +-- !query 33 schema struct --- !query 35 output +-- !query 33 output Hello World Hello again, Hello again Hello again --- !query 36 +-- !query 34 select format_string('>>%10s<<', 'Hello') --- !query 36 schema +-- !query 34 schema struct>%10s<<, Hello):string> --- !query 36 output +-- !query 34 output >> Hello<< --- !query 37 +-- !query 35 select format_string('>>%10s<<', NULL) --- !query 37 schema +-- !query 35 schema struct>%10s<<, NULL):string> --- !query 37 output +-- !query 35 output >> null<< --- !query 38 +-- !query 36 select format_string('>>%10s<<', '') --- !query 38 schema +-- !query 36 schema struct>%10s<<, ):string> --- !query 38 output +-- !query 36 output >> << --- !query 39 +-- !query 37 select format_string('>>%-10s<<', '') --- !query 39 schema +-- !query 37 schema struct>%-10s<<, ):string> --- !query 39 output +-- !query 37 output >> << --- !query 40 +-- !query 38 select format_string('>>%-10s<<', 'Hello') --- !query 40 schema +-- !query 38 schema struct>%-10s<<, Hello):string> --- !query 40 output +-- !query 38 output >>Hello << --- !query 41 +-- !query 39 select format_string('>>%-10s<<', NULL) --- !query 41 schema +-- !query 39 schema struct>%-10s<<, NULL):string> --- !query 41 output +-- !query 39 output >>null << --- !query 42 +-- !query 40 select format_string('>>%1$10s<<', 'Hello') --- !query 42 schema +-- !query 40 schema struct>%1$10s<<, Hello):string> --- !query 42 output +-- !query 40 output >> Hello<< --- !query 43 +-- !query 41 DROP TABLE TEXT_TBL --- !query 43 schema +-- !query 41 schema struct<> --- !query 43 output +-- !query 41 output diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index ae0ccf556fbc3..12cca778f1d47 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -347,7 +347,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { localSparkSession.udf.register("boolne", (b1: Boolean, b2: Boolean) => b1 != b2) // vol used by boolean.sql and case.sql. localSparkSession.udf.register("vol", (s: String) => s) - localSparkSession.conf.set(SQLConf.DIALECT_SPARK_ANSI_ENABLED.key, true) localSparkSession.conf.set(SQLConf.DIALECT.key, SQLConf.Dialect.POSTGRESQL.toString) case _: AnsiTest => localSparkSession.conf.set(SQLConf.DIALECT_SPARK_ANSI_ENABLED.key, true) From d3ea117bbee162f2d0f956c90e064d6c2baf72e8 Mon Sep 17 00:00:00 2001 From: Yuanjian Li Date: Tue, 19 Nov 2019 19:32:29 +0800 Subject: [PATCH 2/5] comment address --- .../spark/sql/catalyst/parser/SqlBase.g4 | 24 +++++++++-------- .../sql/catalyst/parser/AstBuilder.scala | 26 +++++++++---------- .../sql/catalyst/parser/ParseDriver.scala | 10 +++++-- .../apache/spark/sql/internal/SQLConf.scala | 4 ++- 4 files changed, 37 insertions(+), 27 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 20a65b904afff..f26ab56ba4c15 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -46,9 +46,9 @@ grammar SqlBase; } /** - * When true, ANSI SQL parsing mode is enabled. + * When true, use ANSI SQL standard keywords. */ - public boolean ansi = false; + public boolean use_SQL_standard_keywords = false; } singleStatement @@ -744,7 +744,7 @@ primaryExpression | qualifiedName '.' ASTERISK #star | '(' namedExpression (',' namedExpression)+ ')' #rowConstructor | '(' query ')' #subqueryExpression - | functionCallName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')' + | functionName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')' (OVER windowSpec)? #functionCall | identifier '->' expression #lambda | '(' identifier (',' identifier)+ ')' '->' expression #lambda @@ -788,7 +788,7 @@ booleanValue interval : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)? - | {ansi}? (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval) + | {use_SQL_standard_keywords}? (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval) ; errorCapturingMultiUnitsInterval @@ -908,7 +908,7 @@ qualifiedNameList : qualifiedName (',' qualifiedName)* ; -functionCallName +functionName : qualifiedName | LEFT | RIGHT @@ -933,14 +933,14 @@ errorCapturingIdentifierExtra identifier : strictIdentifier - | {!ansi}? strictNonReserved + | {!use_SQL_standard_keywords}? strictNonReserved ; strictIdentifier : IDENTIFIER #unquotedIdentifier | quotedIdentifier #quotedIdentifierAlternative - | {ansi}? ansiNonReserved #unquotedIdentifier - | {!ansi}? nonReserved #unquotedIdentifier + | {use_SQL_standard_keywords}? ansiNonReserved #unquotedIdentifier + | {!use_SQL_standard_keywords}? nonReserved #unquotedIdentifier ; quotedIdentifier @@ -957,7 +957,8 @@ number | MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral ; -// When `spark.sql.dialect.spark.ansi.enabled=true`, there are 2 kinds of keywords in Spark SQL. +// When we use PostgreSQL dialect or use Spark dialect with +// `spark.sql.dialect.spark.ansi.enabled=true`, there are 2 kinds of keywords in Spark SQL. // - Reserved keywords: // Keywords that are reserved and can't be used as identifiers for table, view, column, // function, alias, etc. @@ -1157,9 +1158,10 @@ ansiNonReserved | YEARS ; -// When `spark.sql.dialect.spark.ansi.enabled=false`, there are 2 kinds of keywords in Spark SQL. +// When we use Spark dialect with `spark.sql.dialect.spark.ansi.enabled=false`, +// there are 2 kinds of keywords in Spark SQL. // - Non-reserved keywords: -// Same definition as the one when `spark.sql.dialect.spark.ansi.enabled=true`. +// Same definition as the one when the ANSI mode enabled. // - Strict-non-reserved keywords: // A strict version of non-reserved keywords, which can not be used as table alias. // You can find the full keywords list by searching "Start of the keywords list" in this file. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 2be78e861e6ab..7bec46678f58d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1581,7 +1581,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging */ override def visitFunctionCall(ctx: FunctionCallContext): Expression = withOrigin(ctx) { // Create the function call. - val name = ctx.functionCallName.getText + val name = ctx.functionName.getText val isDistinct = Option(ctx.setQuantifier()).exists(_.DISTINCT != null) val arguments = ctx.argument.asScala.map(expression) match { case Seq(UnresolvedStar(None)) @@ -1592,7 +1592,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging expressions } val function = UnresolvedFunction( - visitFunctionName(ctx.functionCallName), arguments, isDistinct) + getFunctionIdentifier(ctx.functionName), arguments, isDistinct) // Check if the function is evaluated in a windowed context. ctx.windowSpec match { @@ -1620,17 +1620,6 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging visitFunctionName(ctx, ctx.identifier().asScala.map(_.getText)) } - /** - * Create a function database (optional) and name pair. - */ - protected def visitFunctionName(ctx: FunctionCallNameContext): FunctionIdentifier = { - if (ctx.qualifiedName != null) { - visitFunctionName(ctx.qualifiedName) - } else { - FunctionIdentifier(ctx.getText, None) - } - } - /** * Create a function database (optional) and name pair. */ @@ -1643,6 +1632,17 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } } + /** + * Get a function identifier consist by database (optional) and name. + */ + protected def getFunctionIdentifier(ctx: FunctionNameContext): FunctionIdentifier = { + if (ctx.qualifiedName != null) { + visitFunctionName(ctx.qualifiedName) + } else { + FunctionIdentifier(ctx.getText, None) + } + } + /** * Create an [[LambdaFunction]]. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index a84d29b71ac42..198cbf3c13b72 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.trees.Origin import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.SQLConf.Dialect import org.apache.spark.sql.types.{DataType, StructType} /** @@ -88,11 +89,16 @@ abstract class AbstractSqlParser(conf: SQLConf) extends ParserInterface with Log protected def parse[T](command: String)(toResult: SqlBaseParser => T): T = { logDebug(s"Parsing command: $command") + val useSQLStandardKeywords = Dialect.withName(conf.dialect) match { + case Dialect.POSTGRESQL => true + case Dialect.SPARK => conf.dialectSparkAnsiEnabled + } + val lexer = new SqlBaseLexer(new UpperCaseCharStream(CharStreams.fromString(command))) lexer.removeErrorListeners() lexer.addErrorListener(ParseErrorListener) lexer.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced - lexer.ansi = conf.ansiEnabled + lexer.use_SQL_standard_keywords = useSQLStandardKeywords val tokenStream = new CommonTokenStream(lexer) val parser = new SqlBaseParser(tokenStream) @@ -100,7 +106,7 @@ abstract class AbstractSqlParser(conf: SQLConf) extends ParserInterface with Log parser.removeErrorListeners() parser.addErrorListener(ParseErrorListener) parser.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced - parser.ansi = conf.ansiEnabled + parser.use_SQL_standard_keywords = useSQLStandardKeywords try { try { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index d4fcefe99ee75..35a907bec700d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2515,7 +2515,9 @@ class SQLConf extends Serializable with Logging { def intervalOutputStyle: IntervalStyle.Value = IntervalStyle.withName(getConf(INTERVAL_STYLE)) - def usePostgreSQLDialect: Boolean = getConf(DIALECT) == Dialect.POSTGRESQL.toString + def dialect: String = getConf(DIALECT) + + def usePostgreSQLDialect: Boolean = dialect == Dialect.POSTGRESQL.toString def dialectSparkAnsiEnabled: Boolean = getConf(DIALECT_SPARK_ANSI_ENABLED) From f98a9efd894a31cba3c88a0ecca658c30e31383a Mon Sep 17 00:00:00 2001 From: Yuanjian Li Date: Tue, 19 Nov 2019 21:23:22 +0800 Subject: [PATCH 3/5] comment address --- docs/sql-keywords.md | 7 ++++--- .../org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 8 +++----- .../apache/spark/sql/catalyst/parser/ParseDriver.scala | 4 +++- .../scala/org/apache/spark/sql/internal/SQLConf.scala | 4 ++-- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/docs/sql-keywords.md b/docs/sql-keywords.md index 79bc134596237..3117ee40a8c9b 100644 --- a/docs/sql-keywords.md +++ b/docs/sql-keywords.md @@ -19,12 +19,13 @@ license: | limitations under the License. --- -When `spark.sql.dialect.spark.ansi.enabled` is true, Spark SQL has two kinds of keywords: +When `spark.sql.dialect=PostgreSQL` or keep default `spark.sql.dialect=Spark` with setting `spark.sql.dialect.spark.ansi.enabled` to true, Spark SQL will use the ANSI mode parser. +In this mode, Spark SQL has two kinds of keywords: * Reserved keywords: Keywords that are reserved and can't be used as identifiers for table, view, column, function, alias, etc. * Non-reserved keywords: Keywords that have a special meaning only in particular contexts and can be used as identifiers in other contexts. For example, `SELECT 1 WEEK` is an interval literal, but WEEK can be used as identifiers in other places. -When `spark.sql.dialect.spark.ansi.enabled` is false, Spark SQL has two kinds of keywords: -* Non-reserved keywords: Same definition as the one when `spark.sql.dialect.spark.ansi.enabled=true`. +When the ANSI mode is disabled, Spark SQL has two kinds of keywords: +* Non-reserved keywords: Same definition as the one when the ANSI mode enabled. * Strict-non-reserved keywords: A strict version of non-reserved keywords, which can not be used as table alias. By default `spark.sql.dialect.spark.ansi.enabled` is false. diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index f26ab56ba4c15..ce356b08f6124 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -957,8 +957,7 @@ number | MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral ; -// When we use PostgreSQL dialect or use Spark dialect with -// `spark.sql.dialect.spark.ansi.enabled=true`, there are 2 kinds of keywords in Spark SQL. +// When `use_SQL_standard_keywords=true`, there are 2 kinds of keywords in Spark SQL. // - Reserved keywords: // Keywords that are reserved and can't be used as identifiers for table, view, column, // function, alias, etc. @@ -1158,10 +1157,9 @@ ansiNonReserved | YEARS ; -// When we use Spark dialect with `spark.sql.dialect.spark.ansi.enabled=false`, -// there are 2 kinds of keywords in Spark SQL. +// When `use_SQL_standard_keywords=false`, there are 2 kinds of keywords in Spark SQL. // - Non-reserved keywords: -// Same definition as the one when the ANSI mode enabled. +// Same definition as the one when `use_SQL_standard_keywords=true`. // - Strict-non-reserved keywords: // A strict version of non-reserved keywords, which can not be used as table alias. // You can find the full keywords list by searching "Start of the keywords list" in this file. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index 198cbf3c13b72..21925db64fff7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -89,7 +89,9 @@ abstract class AbstractSqlParser(conf: SQLConf) extends ParserInterface with Log protected def parse[T](command: String)(toResult: SqlBaseParser => T): T = { logDebug(s"Parsing command: $command") - val useSQLStandardKeywords = Dialect.withName(conf.dialect) match { + // When we use PostgreSQL dialect or use Spark dialect with setting + // `spark.sql.dialect.spark.ansi.enabled=true`, the parser will use ANSI SQL standard keywords. + val useSQLStandardKeywords = conf.dialect match { case Dialect.POSTGRESQL => true case Dialect.SPARK => conf.dialectSparkAnsiEnabled } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 35a907bec700d..74046cd91c962 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2515,9 +2515,9 @@ class SQLConf extends Serializable with Logging { def intervalOutputStyle: IntervalStyle.Value = IntervalStyle.withName(getConf(INTERVAL_STYLE)) - def dialect: String = getConf(DIALECT) + def dialect: Dialect.Value = Dialect.withName(getConf(DIALECT)) - def usePostgreSQLDialect: Boolean = dialect == Dialect.POSTGRESQL.toString + def usePostgreSQLDialect: Boolean = dialect == Dialect.POSTGRESQL def dialectSparkAnsiEnabled: Boolean = getConf(DIALECT_SPARK_ANSI_ENABLED) From c05adb9309fd00ca4d98c78120258987c08cce7f Mon Sep 17 00:00:00 2001 From: Yuanjian Li Date: Wed, 20 Nov 2019 13:41:43 +0800 Subject: [PATCH 4/5] comment address --- .../apache/spark/sql/catalyst/parser/SqlBase.g4 | 16 ++++++++-------- .../spark/sql/catalyst/parser/ParseDriver.scala | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index ce356b08f6124..e316a36144ebf 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -48,7 +48,7 @@ grammar SqlBase; /** * When true, use ANSI SQL standard keywords. */ - public boolean use_SQL_standard_keywords = false; + public boolean SQL_standard_keyword_behavior = false; } singleStatement @@ -788,7 +788,7 @@ booleanValue interval : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)? - | {use_SQL_standard_keywords}? (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval) + | {SQL_standard_keyword_behavior}? (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval) ; errorCapturingMultiUnitsInterval @@ -933,14 +933,14 @@ errorCapturingIdentifierExtra identifier : strictIdentifier - | {!use_SQL_standard_keywords}? strictNonReserved + | {!SQL_standard_keyword_behavior}? strictNonReserved ; strictIdentifier : IDENTIFIER #unquotedIdentifier | quotedIdentifier #quotedIdentifierAlternative - | {use_SQL_standard_keywords}? ansiNonReserved #unquotedIdentifier - | {!use_SQL_standard_keywords}? nonReserved #unquotedIdentifier + | {SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier + | {!SQL_standard_keyword_behavior}? nonReserved #unquotedIdentifier ; quotedIdentifier @@ -957,7 +957,7 @@ number | MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral ; -// When `use_SQL_standard_keywords=true`, there are 2 kinds of keywords in Spark SQL. +// When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL. // - Reserved keywords: // Keywords that are reserved and can't be used as identifiers for table, view, column, // function, alias, etc. @@ -1157,9 +1157,9 @@ ansiNonReserved | YEARS ; -// When `use_SQL_standard_keywords=false`, there are 2 kinds of keywords in Spark SQL. +// When `SQL_standard_keyword_behavior=false`, there are 2 kinds of keywords in Spark SQL. // - Non-reserved keywords: -// Same definition as the one when `use_SQL_standard_keywords=true`. +// Same definition as the one when `SQL_standard_keyword_behavior=true`. // - Strict-non-reserved keywords: // A strict version of non-reserved keywords, which can not be used as table alias. // You can find the full keywords list by searching "Start of the keywords list" in this file. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index 21925db64fff7..c339ddda7a16a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -100,7 +100,7 @@ abstract class AbstractSqlParser(conf: SQLConf) extends ParserInterface with Log lexer.removeErrorListeners() lexer.addErrorListener(ParseErrorListener) lexer.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced - lexer.use_SQL_standard_keywords = useSQLStandardKeywords + lexer.SQL_standard_keyword_behavior = useSQLStandardKeywords val tokenStream = new CommonTokenStream(lexer) val parser = new SqlBaseParser(tokenStream) @@ -108,7 +108,7 @@ abstract class AbstractSqlParser(conf: SQLConf) extends ParserInterface with Log parser.removeErrorListeners() parser.addErrorListener(ParseErrorListener) parser.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced - parser.use_SQL_standard_keywords = useSQLStandardKeywords + parser.SQL_standard_keyword_behavior = useSQLStandardKeywords try { try { From 5a4b2ea3bb88454cbbb1c3e7da9ae85bca7a0e08 Mon Sep 17 00:00:00 2001 From: Yuanjian Li Date: Wed, 20 Nov 2019 16:12:49 +0800 Subject: [PATCH 5/5] rename config --- .../antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 2 +- .../org/apache/spark/sql/catalyst/parser/ParseDriver.scala | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index e316a36144ebf..cc273fd36011e 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -46,7 +46,7 @@ grammar SqlBase; } /** - * When true, use ANSI SQL standard keywords. + * When true, the behavior of keywords follows ANSI SQL standard. */ public boolean SQL_standard_keyword_behavior = false; } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index c339ddda7a16a..30c36598d81d6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -91,7 +91,7 @@ abstract class AbstractSqlParser(conf: SQLConf) extends ParserInterface with Log // When we use PostgreSQL dialect or use Spark dialect with setting // `spark.sql.dialect.spark.ansi.enabled=true`, the parser will use ANSI SQL standard keywords. - val useSQLStandardKeywords = conf.dialect match { + val SQLStandardKeywordBehavior = conf.dialect match { case Dialect.POSTGRESQL => true case Dialect.SPARK => conf.dialectSparkAnsiEnabled } @@ -100,7 +100,7 @@ abstract class AbstractSqlParser(conf: SQLConf) extends ParserInterface with Log lexer.removeErrorListeners() lexer.addErrorListener(ParseErrorListener) lexer.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced - lexer.SQL_standard_keyword_behavior = useSQLStandardKeywords + lexer.SQL_standard_keyword_behavior = SQLStandardKeywordBehavior val tokenStream = new CommonTokenStream(lexer) val parser = new SqlBaseParser(tokenStream) @@ -108,7 +108,7 @@ abstract class AbstractSqlParser(conf: SQLConf) extends ParserInterface with Log parser.removeErrorListeners() parser.addErrorListener(ParseErrorListener) parser.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced - parser.SQL_standard_keyword_behavior = useSQLStandardKeywords + parser.SQL_standard_keyword_behavior = SQLStandardKeywordBehavior try { try {