-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-16879][SQL] unify logical plans for CREATE TABLE and CTAS #14482
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ec47911
7c670b7
2662bf1
8db79e4
2933c58
f49ef22
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.parser._ | |
| import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ | ||
| import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation, ScriptInputOutputSchema} | ||
| import org.apache.spark.sql.execution.command._ | ||
| import org.apache.spark.sql.execution.datasources.{CreateTempViewUsing, _} | ||
| import org.apache.spark.sql.execution.datasources.{CreateTable, CreateTempViewUsing, _} | ||
| import org.apache.spark.sql.internal.{HiveSerDe, SQLConf, VariableSubstitution} | ||
| import org.apache.spark.sql.types.{DataType, StructType} | ||
|
|
||
|
|
@@ -310,7 +310,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { | |
| } | ||
|
|
||
| /** | ||
| * Create a [[CreateTableUsing]] or a [[CreateTableUsingAsSelect]] logical plan. | ||
| * Create a [[CreateTable]] logical plan. | ||
| */ | ||
| override def visitCreateTableUsing(ctx: CreateTableUsingContext): LogicalPlan = withOrigin(ctx) { | ||
| val (table, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader) | ||
|
|
@@ -319,12 +319,31 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { | |
| } | ||
| val options = Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty) | ||
| val provider = ctx.tableProvider.qualifiedName.getText | ||
| val schema = Option(ctx.colTypeList()).map(createStructType) | ||
| val partitionColumnNames = | ||
| Option(ctx.partitionColumnNames) | ||
| .map(visitIdentifierList(_).toArray) | ||
| .getOrElse(Array.empty[String]) | ||
| val bucketSpec = Option(ctx.bucketSpec()).map(visitBucketSpec) | ||
|
|
||
| val tableDesc = CatalogTable( | ||
| identifier = table, | ||
| // TODO: actually the table type may be EXTERNAL if we have `path` in options. However, the | ||
| // physical plan `CreateDataSourceTableCommand` doesn't take table type as parameter, but a | ||
| // boolean flag called `managedIfNoPath`. We set the table type to MANAGED here to simulate | ||
| // setting the `managedIfNoPath` flag. In the future we should refactor the physical plan and | ||
| // make it take `CatalogTable` directly. | ||
| tableType = CatalogTableType.MANAGED, | ||
| storage = CatalogStorageFormat.empty.copy(properties = options), | ||
| schema = schema.getOrElse(new StructType), | ||
| provider = Some(provider), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it still possible that a provide is
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. well, just look at the codes around, it's possible that
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It was a more general question which you have answered, sorry about that. In this case the provider cannot be null: the grammar requires a provider and the call |
||
| partitionColumnNames = partitionColumnNames, | ||
| bucketSpec = bucketSpec | ||
| ) | ||
|
|
||
| // Determine the storage mode. | ||
| val mode = if (ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists | ||
|
|
||
| if (ctx.query != null) { | ||
| // Get the backing query. | ||
| val query = plan(ctx.query) | ||
|
|
@@ -333,32 +352,19 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { | |
| operationNotAllowed("CREATE TEMPORARY TABLE ... USING ... AS query", ctx) | ||
| } | ||
|
|
||
| // Determine the storage mode. | ||
| val mode = if (ifNotExists) { | ||
| SaveMode.Ignore | ||
| } else { | ||
| SaveMode.ErrorIfExists | ||
| } | ||
|
|
||
| CreateTableUsingAsSelect( | ||
| table, provider, partitionColumnNames, bucketSpec, mode, options, query) | ||
| CreateTable(tableDesc, mode, Some(query)) | ||
| } else { | ||
| val struct = Option(ctx.colTypeList()).map(createStructType) | ||
| if (struct.isEmpty && bucketSpec.nonEmpty) { | ||
| throw new ParseException( | ||
| "Expected explicit specification of table schema when using CLUSTERED BY clause.", ctx) | ||
| } | ||
| if (temp) { | ||
| if (ifNotExists) { | ||
| operationNotAllowed("CREATE TEMPORARY TABLE IF NOT EXISTS", ctx) | ||
| } | ||
|
|
||
| CreateTableUsing( | ||
| table, | ||
| struct, | ||
| provider, | ||
| temp, | ||
| options, | ||
| partitionColumnNames, | ||
| bucketSpec, | ||
| ifNotExists, | ||
| managedIfNoPath = true) | ||
| logWarning(s"CREATE TEMPORARY TABLE ... USING ... is deprecated, please use " + | ||
| "CREATE TEMPORARY VIEW ... USING ... instead") | ||
| CreateTempViewUsing(table, schema, replace = true, provider, options) | ||
| } else { | ||
| CreateTable(tableDesc, mode, None) | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -891,8 +897,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { | |
| } | ||
|
|
||
| /** | ||
| * Create a table, returning either a [[CreateTableCommand]] or a | ||
| * [[CreateHiveTableAsSelectLogicalPlan]]. | ||
| * Create a table, returning a [[CreateTable]] logical plan. | ||
| * | ||
| * This is not used to create datasource tables, which is handled through | ||
| * "CREATE TABLE ... USING ...". | ||
|
|
@@ -933,23 +938,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { | |
| val properties = Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty) | ||
| val selectQuery = Option(ctx.query).map(plan) | ||
|
|
||
| // Ensuring whether no duplicate name is used in table definition | ||
| val colNames = dataCols.map(_.name) | ||
| if (colNames.length != colNames.distinct.length) { | ||
| val duplicateColumns = colNames.groupBy(identity).collect { | ||
| case (x, ys) if ys.length > 1 => "\"" + x + "\"" | ||
| } | ||
| operationNotAllowed(s"Duplicated column names found in table definition of $name: " + | ||
| duplicateColumns.mkString("[", ",", "]"), ctx) | ||
| } | ||
|
|
||
| // For Hive tables, partition columns must not be part of the schema | ||
| val badPartCols = partitionCols.map(_.name).toSet.intersect(colNames.toSet) | ||
| if (badPartCols.nonEmpty) { | ||
| operationNotAllowed(s"Partition columns may not be specified in the schema: " + | ||
| badPartCols.map("\"" + _ + "\"").mkString("[", ",", "]"), ctx) | ||
| } | ||
|
|
||
| // Note: Hive requires partition columns to be distinct from the schema, so we need | ||
| // to include the partition columns here explicitly | ||
| val schema = StructType(dataCols ++ partitionCols) | ||
|
|
@@ -1001,10 +989,13 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { | |
| tableType = tableType, | ||
| storage = storage, | ||
| schema = schema, | ||
| provider = Some("hive"), | ||
| partitionColumnNames = partitionCols.map(_.name), | ||
| properties = properties, | ||
| comment = comment) | ||
|
|
||
| val mode = if (ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists | ||
|
|
||
| selectQuery match { | ||
| case Some(q) => | ||
| // Just use whatever is projected in the select statement as our schema | ||
|
|
@@ -1025,27 +1016,24 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { | |
|
|
||
| val hasStorageProperties = (ctx.createFileFormat != null) || (ctx.rowFormat != null) | ||
| if (conf.convertCTAS && !hasStorageProperties) { | ||
| val mode = if (ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists | ||
| // At here, both rowStorage.serdeProperties and fileStorage.serdeProperties | ||
| // are empty Maps. | ||
| val optionsWithPath = if (location.isDefined) { | ||
| Map("path" -> location.get) | ||
| } else { | ||
| Map.empty[String, String] | ||
| } | ||
| CreateTableUsingAsSelect( | ||
| tableIdent = tableDesc.identifier, | ||
| provider = conf.defaultDataSourceName, | ||
| partitionColumns = tableDesc.partitionColumnNames.toArray, | ||
| bucketSpec = None, | ||
| mode = mode, | ||
| options = optionsWithPath, | ||
| q | ||
|
|
||
| val newTableDesc = tableDesc.copy( | ||
| storage = CatalogStorageFormat.empty.copy(properties = optionsWithPath), | ||
| provider = Some(conf.defaultDataSourceName) | ||
| ) | ||
|
|
||
| CreateTable(newTableDesc, mode, Some(q)) | ||
| } else { | ||
| CreateHiveTableAsSelectLogicalPlan(tableDesc, q, ifNotExists) | ||
| CreateTable(tableDesc, mode, Some(q)) | ||
| } | ||
| case None => CreateTableCommand(tableDesc, ifNotExists) | ||
| case None => CreateTable(tableDesc, mode, None) | ||
| } | ||
| } | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
NIT: we really shouldn't create the
Some(...)constructor useOption(...)instead....happens a few times - but I'll stop complaining after this one :)...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
hmmm, do we have to use
Optioneven though the parameter is guaranteed to be not null?For this case, we can't use
Option, or the behaviour will be changed. Previously ifdf.logicalPlanis null, it's a bug and we will throw NPE somewhere. If we useOptionhere, then we are silently converting a CTAS to CREATE TABLE, which is not expected.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I know I am nit picking here :).
I have seen a few cases in which some thought the parameter was non-null and used
Some(...)to wrap that; resulting in a very nice NPE down the line (which you don't expect in anOption). In this case you are totally right to useSome(...).