From dfb25a15ee437dd6c09d840efdcaf9d233cb8dd0 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 13 Jan 2017 11:27:06 +0800 Subject: [PATCH] SHOW CREATE TABLE should generate new syntax to create hive table --- .../spark/sql/execution/command/ddl.scala | 3 + .../spark/sql/execution/command/tables.scala | 117 +++++++++--------- .../sql/hive/execution/HiveOptions.scala | 7 +- .../spark/sql/hive/ShowCreateTableSuite.scala | 4 +- 4 files changed, 68 insertions(+), 63 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index 82cbb4aa4744..3c9708912d39 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -761,6 +761,9 @@ case class AlterTableSetLocationCommand( object DDLUtils { val HIVE_PROVIDER = "hive" + val HIVE_SERDE_OPTION = "serde" + val HIVE_INPUT_FORMAT_OPTION = "inputFormat" + val HIVE_OUTPUT_FORMAT_OPTION = "outputFormat" def isHiveTable(table: CatalogTable): Boolean = { table.provider.isDefined && table.provider.get.toLowerCase == HIVE_PROVIDER diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index ac6c3a89dbd0..36ee1c54fe33 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -789,55 +789,50 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman ) override def run(sparkSession: SparkSession): Seq[Row] = { - val catalog = sparkSession.sessionState.catalog - val tableMetadata = catalog.getTableMetadata(table) - - // TODO: unify this after we unify the CREATE TABLE syntax for hive serde and data source table. - val stmt = if (DDLUtils.isDatasourceTable(tableMetadata)) { - showCreateDataSourceTable(tableMetadata) - } else { - showCreateHiveTable(tableMetadata) - } + val tableMeta = sparkSession.sessionState.catalog.getTableMetadata(table) + val tableName = tableMeta.identifier.quotedString - Seq(Row(stmt)) - } - - private def showCreateHiveTable(metadata: CatalogTable): String = { - def reportUnsupportedError(features: Seq[String]): Unit = { + if (tableMeta.unsupportedFeatures.nonEmpty) { throw new AnalysisException( - s"Failed to execute SHOW CREATE TABLE against table/view ${metadata.identifier}, " + + s"Failed to execute SHOW CREATE TABLE against table/view $tableName, " + "which is created by Hive and uses the following unsupported feature(s)\n" + - features.map(" - " + _).mkString("\n") - ) + tableMeta.unsupportedFeatures.map(" - " + _).mkString("\n")) } - if (metadata.unsupportedFeatures.nonEmpty) { - reportUnsupportedError(metadata.unsupportedFeatures) - } + val stmt = if (tableMeta.tableType == VIEW) { + val builder = StringBuilder.newBuilder + builder ++= s"CREATE VIEW $tableName" - val builder = StringBuilder.newBuilder + if (tableMeta.schema.nonEmpty) { + builder ++= tableMeta.schema.map(_.name).mkString("(", ", ", ")") + } - val tableTypeString = metadata.tableType match { - case EXTERNAL => " EXTERNAL TABLE" - case VIEW => " VIEW" - case MANAGED => " TABLE" + builder ++= s" AS\n${tableMeta.viewText.get}" + builder.toString + } else if (DDLUtils.isHiveTable(tableMeta) && tableMeta.properties.nonEmpty) { + // If table properties are not empty, this Hive table was probably created via legacy Hive + // syntax, and we have to generate CREATE TABLE statement using legacy syntax, as the + // official syntax doesn't support table properties. + showCreateTableWithLegacySyntax(tableMeta) + } else { + showCreateTable(tableMeta) } - builder ++= s"CREATE$tableTypeString ${table.quotedString}" + Seq(Row(stmt)) + } - if (metadata.tableType == VIEW) { - if (metadata.schema.nonEmpty) { - builder ++= metadata.schema.map(_.name).mkString("(", ", ", ")") - } - builder ++= metadata.viewText.mkString(" AS\n", "", "\n") - } else { - showHiveTableHeader(metadata, builder) - showHiveTableNonDataColumns(metadata, builder) - showHiveTableStorageInfo(metadata, builder) - showHiveTableProperties(metadata, builder) - } + private def showCreateTableWithLegacySyntax(metadata: CatalogTable): String = { + val builder = StringBuilder.newBuilder - builder.toString() + val isExternal = if (metadata.tableType == EXTERNAL) " EXTERNAL" else "" + builder ++= s"CREATE$isExternal TABLE ${metadata.identifier.quotedString}" + + showHiveTableHeader(metadata, builder) + showHiveTableNonDataColumns(metadata, builder) + showHiveTableStorageInfo(metadata, builder) + showHiveTableProperties(metadata, builder) + + builder.toString } private def showHiveTableHeader(metadata: CatalogTable, builder: StringBuilder): Unit = { @@ -915,47 +910,55 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman } } - private def showCreateDataSourceTable(metadata: CatalogTable): String = { + private def showCreateTable(metadata: CatalogTable): String = { val builder = StringBuilder.newBuilder - builder ++= s"CREATE TABLE ${table.quotedString} " - showDataSourceTableDataColumns(metadata, builder) - showDataSourceTableOptions(metadata, builder) - showDataSourceTableNonDataColumns(metadata, builder) + builder ++= s"CREATE TABLE ${metadata.identifier.quotedString} " + + showColumns(metadata, builder) + showDataSourceOptions(metadata, builder) + showPartitioningAndBucketing(metadata, builder) + + if (metadata.tableType == EXTERNAL) { + builder ++= s"LOCATION '${metadata.storage.locationUri.get}'\n" + } + + metadata.comment.foreach { comment => + builder ++= s"COMMENT '${escapeSingleQuotedString(comment)}'" + } builder.toString() } - private def showDataSourceTableDataColumns( - metadata: CatalogTable, builder: StringBuilder): Unit = { + private def showColumns(metadata: CatalogTable, builder: StringBuilder): Unit = { val columns = metadata.schema.fields.map(f => s"${quoteIdentifier(f.name)} ${f.dataType.sql}") builder ++= columns.mkString("(", ", ", ")\n") } - private def showDataSourceTableOptions(metadata: CatalogTable, builder: StringBuilder): Unit = { + private def showDataSourceOptions(metadata: CatalogTable, builder: StringBuilder): Unit = { builder ++= s"USING ${metadata.provider.get}\n" val dataSourceOptions = metadata.storage.properties.map { case (key, value) => s"${quoteIdentifier(key)} '${escapeSingleQuotedString(value)}'" - } ++ metadata.storage.locationUri.flatMap { location => - if (metadata.tableType == MANAGED) { - // If it's a managed table, omit PATH option. Spark SQL always creates external table - // when the table creation DDL contains the PATH option. - None - } else { - Some(s"path '${escapeSingleQuotedString(location)}'") - } + } + + val hiveOptions = if (DDLUtils.isHiveTable(metadata)) { + Seq( + s"${DDLUtils.HIVE_SERDE_OPTION} '${metadata.storage.serde.get}'", + s"${DDLUtils.HIVE_INPUT_FORMAT_OPTION} '${metadata.storage.inputFormat.get}'", + s"${DDLUtils.HIVE_OUTPUT_FORMAT_OPTION} '${metadata.storage.outputFormat.get}'") + } else { + Seq.empty[String] } if (dataSourceOptions.nonEmpty) { builder ++= "OPTIONS (\n" - builder ++= dataSourceOptions.mkString(" ", ",\n ", "\n") + builder ++= (dataSourceOptions ++ hiveOptions).mkString(" ", ",\n ", "\n") builder ++= ")\n" } } - private def showDataSourceTableNonDataColumns( - metadata: CatalogTable, builder: StringBuilder): Unit = { + private def showPartitioningAndBucketing(metadata: CatalogTable, builder: StringBuilder): Unit = { val partCols = metadata.partitionColumnNames if (partCols.nonEmpty) { builder ++= s"PARTITIONED BY ${partCols.mkString("(", ", ", ")")}\n" diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveOptions.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveOptions.scala index 35b7a681f12e..48d55e93c06b 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveOptions.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveOptions.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.hive.execution import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.execution.command.DDLUtils /** * Options for the Hive data source. Note that rule `DetermineHiveSerde` will extract Hive @@ -87,9 +88,9 @@ object HiveOptions { } val FILE_FORMAT = newOption("fileFormat") - val INPUT_FORMAT = newOption("inputFormat") - val OUTPUT_FORMAT = newOption("outputFormat") - val SERDE = newOption("serde") + val INPUT_FORMAT = newOption(DDLUtils.HIVE_INPUT_FORMAT_OPTION) + val OUTPUT_FORMAT = newOption(DDLUtils.HIVE_OUTPUT_FORMAT_OPTION) + val SERDE = newOption(DDLUtils.HIVE_SERDE_OPTION) // A map from the public delimiter option keys to the underlying Hive serde property keys. val delimiterOptions = Map( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala index cc26b3218497..bd9f03fb7ce1 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala @@ -338,9 +338,7 @@ class ShowCreateTableSuite extends QueryTest with SQLTestUtils with TestHiveSing "totalSize", "totalNumberFiles", "maxFileSize", - "minFileSize", - // EXTERNAL is not non-deterministic, but it is filtered out for external tables. - "EXTERNAL" + "minFileSize" ) table.copy(