Skip to content

Commit a05e85e

Browse files
committed
[SPARK-22934][SQL] Make optional clauses order insensitive for CREATE TABLE SQL statement
## What changes were proposed in this pull request? Currently, our CREATE TABLE syntax require the EXACT order of clauses. It is pretty hard to remember the exact order. Thus, this PR is to make optional clauses order insensitive for `CREATE TABLE` SQL statement. ``` CREATE [TEMPORARY] TABLE [IF NOT EXISTS] [db_name.]table_name [(col_name1 col_type1 [COMMENT col_comment1], ...)] USING datasource [OPTIONS (key1=val1, key2=val2, ...)] [PARTITIONED BY (col_name1, col_name2, ...)] [CLUSTERED BY (col_name3, col_name4, ...) INTO num_buckets BUCKETS] [LOCATION path] [COMMENT table_comment] [TBLPROPERTIES (key1=val1, key2=val2, ...)] [AS select_statement] ``` The proposal is to make the following clauses order insensitive. ``` [OPTIONS (key1=val1, key2=val2, ...)] [PARTITIONED BY (col_name1, col_name2, ...)] [CLUSTERED BY (col_name3, col_name4, ...) INTO num_buckets BUCKETS] [LOCATION path] [COMMENT table_comment] [TBLPROPERTIES (key1=val1, key2=val2, ...)] ``` The same idea is also applicable to Create Hive Table. ``` CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name [(col_name1[:] col_type1 [COMMENT col_comment1], ...)] [COMMENT table_comment] [PARTITIONED BY (col_name2[:] col_type2 [COMMENT col_comment2], ...)] [ROW FORMAT row_format] [STORED AS file_format] [LOCATION path] [TBLPROPERTIES (key1=val1, key2=val2, ...)] [AS select_statement] ``` The proposal is to make the following clauses order insensitive. ``` [COMMENT table_comment] [PARTITIONED BY (col_name2[:] col_type2 [COMMENT col_comment2], ...)] [ROW FORMAT row_format] [STORED AS file_format] [LOCATION path] [TBLPROPERTIES (key1=val1, key2=val2, ...)] ``` ## How was this patch tested? Added test cases Author: gatorsmile <[email protected]> Closes #20133 from gatorsmile/createDataSourceTableDDL. (cherry picked from commit 1a87a16) Signed-off-by: gatorsmile <[email protected]>
1 parent b96a213 commit a05e85e

File tree

7 files changed

+335
-138
lines changed

7 files changed

+335
-138
lines changed

sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -73,18 +73,22 @@ statement
7373
| ALTER DATABASE identifier SET DBPROPERTIES tablePropertyList #setDatabaseProperties
7474
| DROP DATABASE (IF EXISTS)? identifier (RESTRICT | CASCADE)? #dropDatabase
7575
| createTableHeader ('(' colTypeList ')')? tableProvider
76-
(OPTIONS options=tablePropertyList)?
77-
(PARTITIONED BY partitionColumnNames=identifierList)?
78-
bucketSpec? locationSpec?
79-
(COMMENT comment=STRING)?
80-
(TBLPROPERTIES tableProps=tablePropertyList)?
76+
((OPTIONS options=tablePropertyList) |
77+
(PARTITIONED BY partitionColumnNames=identifierList) |
78+
bucketSpec |
79+
locationSpec |
80+
(COMMENT comment=STRING) |
81+
(TBLPROPERTIES tableProps=tablePropertyList))*
8182
(AS? query)? #createTable
8283
| createTableHeader ('(' columns=colTypeList ')')?
83-
(COMMENT comment=STRING)?
84-
(PARTITIONED BY '(' partitionColumns=colTypeList ')')?
85-
bucketSpec? skewSpec?
86-
rowFormat? createFileFormat? locationSpec?
87-
(TBLPROPERTIES tablePropertyList)?
84+
((COMMENT comment=STRING) |
85+
(PARTITIONED BY '(' partitionColumns=colTypeList ')') |
86+
bucketSpec |
87+
skewSpec |
88+
rowFormat |
89+
createFileFormat |
90+
locationSpec |
91+
(TBLPROPERTIES tableProps=tablePropertyList))*
8892
(AS? query)? #createHiveTable
8993
| CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier
9094
LIKE source=tableIdentifier locationSpec? #createTableLike

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
*/
1717
package org.apache.spark.sql.catalyst.parser
1818

19+
import java.util
20+
1921
import scala.collection.mutable.StringBuilder
2022

2123
import org.antlr.v4.runtime.{ParserRuleContext, Token}
@@ -39,6 +41,13 @@ object ParserUtils {
3941
throw new ParseException(s"Operation not allowed: $message", ctx)
4042
}
4143

44+
def checkDuplicateClauses[T](
45+
nodes: util.List[T], clauseName: String, ctx: ParserRuleContext): Unit = {
46+
if (nodes.size() > 1) {
47+
throw new ParseException(s"Found duplicate clauses: $clauseName", ctx)
48+
}
49+
}
50+
4251
/** Check if duplicate keys exist in a set of key-value pairs. */
4352
def checkDuplicateKeys[T](keyPairs: Seq[(String, T)], ctx: ParserRuleContext): Unit = {
4453
keyPairs.groupBy(_._1).filter(_._2.size > 1).foreach { case (key, _) =>

sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala

Lines changed: 56 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -383,23 +383,34 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
383383
* {{{
384384
* CREATE [TEMPORARY] TABLE [IF NOT EXISTS] [db_name.]table_name
385385
* USING table_provider
386-
* [OPTIONS table_property_list]
387-
* [PARTITIONED BY (col_name, col_name, ...)]
388-
* [CLUSTERED BY (col_name, col_name, ...)
389-
* [SORTED BY (col_name [ASC|DESC], ...)]
390-
* INTO num_buckets BUCKETS
391-
* ]
392-
* [LOCATION path]
393-
* [COMMENT table_comment]
394-
* [TBLPROPERTIES (property_name=property_value, ...)]
386+
* create_table_clauses
395387
* [[AS] select_statement];
388+
*
389+
* create_table_clauses (order insensitive):
390+
* [OPTIONS table_property_list]
391+
* [PARTITIONED BY (col_name, col_name, ...)]
392+
* [CLUSTERED BY (col_name, col_name, ...)
393+
* [SORTED BY (col_name [ASC|DESC], ...)]
394+
* INTO num_buckets BUCKETS
395+
* ]
396+
* [LOCATION path]
397+
* [COMMENT table_comment]
398+
* [TBLPROPERTIES (property_name=property_value, ...)]
396399
* }}}
397400
*/
398401
override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = withOrigin(ctx) {
399402
val (table, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader)
400403
if (external) {
401404
operationNotAllowed("CREATE EXTERNAL TABLE ... USING", ctx)
402405
}
406+
407+
checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
408+
checkDuplicateClauses(ctx.OPTIONS, "OPTIONS", ctx)
409+
checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED BY", ctx)
410+
checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
411+
checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx)
412+
checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
413+
403414
val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
404415
val provider = ctx.tableProvider.qualifiedName.getText
405416
val schema = Option(ctx.colTypeList()).map(createSchema)
@@ -408,9 +419,9 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
408419
.map(visitIdentifierList(_).toArray)
409420
.getOrElse(Array.empty[String])
410421
val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty)
411-
val bucketSpec = Option(ctx.bucketSpec()).map(visitBucketSpec)
422+
val bucketSpec = ctx.bucketSpec().asScala.headOption.map(visitBucketSpec)
412423

413-
val location = Option(ctx.locationSpec).map(visitLocationSpec)
424+
val location = ctx.locationSpec.asScala.headOption.map(visitLocationSpec)
414425
val storage = DataSource.buildStorageFormatFromOptions(options)
415426

416427
if (location.isDefined && storage.locationUri.isDefined) {
@@ -1087,13 +1098,16 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
10871098
* {{{
10881099
* CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
10891100
* [(col1[:] data_type [COMMENT col_comment], ...)]
1090-
* [COMMENT table_comment]
1091-
* [PARTITIONED BY (col2[:] data_type [COMMENT col_comment], ...)]
1092-
* [ROW FORMAT row_format]
1093-
* [STORED AS file_format]
1094-
* [LOCATION path]
1095-
* [TBLPROPERTIES (property_name=property_value, ...)]
1101+
* create_table_clauses
10961102
* [AS select_statement];
1103+
*
1104+
* create_table_clauses (order insensitive):
1105+
* [COMMENT table_comment]
1106+
* [PARTITIONED BY (col2[:] data_type [COMMENT col_comment], ...)]
1107+
* [ROW FORMAT row_format]
1108+
* [STORED AS file_format]
1109+
* [LOCATION path]
1110+
* [TBLPROPERTIES (property_name=property_value, ...)]
10971111
* }}}
10981112
*/
10991113
override def visitCreateHiveTable(ctx: CreateHiveTableContext): LogicalPlan = withOrigin(ctx) {
@@ -1104,28 +1118,36 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
11041118
"CREATE TEMPORARY TABLE is not supported yet. " +
11051119
"Please use CREATE TEMPORARY VIEW as an alternative.", ctx)
11061120
}
1107-
if (ctx.skewSpec != null) {
1121+
if (ctx.skewSpec.size > 0) {
11081122
operationNotAllowed("CREATE TABLE ... SKEWED BY", ctx)
11091123
}
11101124

1125+
checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
1126+
checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED BY", ctx)
1127+
checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
1128+
checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx)
1129+
checkDuplicateClauses(ctx.createFileFormat, "STORED AS/BY", ctx)
1130+
checkDuplicateClauses(ctx.rowFormat, "ROW FORMAT", ctx)
1131+
checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
1132+
11111133
val dataCols = Option(ctx.columns).map(visitColTypeList).getOrElse(Nil)
11121134
val partitionCols = Option(ctx.partitionColumns).map(visitColTypeList).getOrElse(Nil)
1113-
val properties = Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty)
1135+
val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty)
11141136
val selectQuery = Option(ctx.query).map(plan)
1115-
val bucketSpec = Option(ctx.bucketSpec()).map(visitBucketSpec)
1137+
val bucketSpec = ctx.bucketSpec().asScala.headOption.map(visitBucketSpec)
11161138

11171139
// Note: Hive requires partition columns to be distinct from the schema, so we need
11181140
// to include the partition columns here explicitly
11191141
val schema = StructType(dataCols ++ partitionCols)
11201142

11211143
// Storage format
11221144
val defaultStorage = HiveSerDe.getDefaultStorage(conf)
1123-
validateRowFormatFileFormat(ctx.rowFormat, ctx.createFileFormat, ctx)
1124-
val fileStorage = Option(ctx.createFileFormat).map(visitCreateFileFormat)
1145+
validateRowFormatFileFormat(ctx.rowFormat.asScala, ctx.createFileFormat.asScala, ctx)
1146+
val fileStorage = ctx.createFileFormat.asScala.headOption.map(visitCreateFileFormat)
11251147
.getOrElse(CatalogStorageFormat.empty)
1126-
val rowStorage = Option(ctx.rowFormat).map(visitRowFormat)
1148+
val rowStorage = ctx.rowFormat.asScala.headOption.map(visitRowFormat)
11271149
.getOrElse(CatalogStorageFormat.empty)
1128-
val location = Option(ctx.locationSpec).map(visitLocationSpec)
1150+
val location = ctx.locationSpec.asScala.headOption.map(visitLocationSpec)
11291151
// If we are creating an EXTERNAL table, then the LOCATION field is required
11301152
if (external && location.isEmpty) {
11311153
operationNotAllowed("CREATE EXTERNAL TABLE must be accompanied by LOCATION", ctx)
@@ -1180,7 +1202,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
11801202
ctx)
11811203
}
11821204

1183-
val hasStorageProperties = (ctx.createFileFormat != null) || (ctx.rowFormat != null)
1205+
val hasStorageProperties = (ctx.createFileFormat.size != 0) || (ctx.rowFormat.size != 0)
11841206
if (conf.convertCTAS && !hasStorageProperties) {
11851207
// At here, both rowStorage.serdeProperties and fileStorage.serdeProperties
11861208
// are empty Maps.
@@ -1366,6 +1388,15 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
13661388
}
13671389
}
13681390

1391+
private def validateRowFormatFileFormat(
1392+
rowFormatCtx: Seq[RowFormatContext],
1393+
createFileFormatCtx: Seq[CreateFileFormatContext],
1394+
parentCtx: ParserRuleContext): Unit = {
1395+
if (rowFormatCtx.size == 1 && createFileFormatCtx.size == 1) {
1396+
validateRowFormatFileFormat(rowFormatCtx.head, createFileFormatCtx.head, parentCtx)
1397+
}
1398+
}
1399+
13691400
/**
13701401
* Create or replace a view. This creates a [[CreateViewCommand]] command.
13711402
*

0 commit comments

Comments
 (0)