apache
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala‎
Lines changed: 2 additions & 0 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala‎
Lines changed: 6 additions & 2 deletions b/‎sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala‎
Lines changed: 3 additions & 2 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala‎
Lines changed: 21 additions & 207 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala‎
Lines changed: 21 additions & 207 deletions
@@ -112,6 +112,7 @@ case class BucketSpec(
  * Note that Hive's metastore also tracks skewed columns. We should consider adding that in the
  * future once we have a better understanding of how we want to handle skewed columns.
  *
+ * @param provider the name of the data source provider for this table, e.g. parquet, json, etc.
  * @param unsupportedFeatures is a list of string descriptions of features that are used by the
  *        underlying table but not supported by Spark SQL yet.
  */
@@ -120,6 +121,7 @@ case class CatalogTable(
     tableType: CatalogTableType,
     storage: CatalogStorageFormat,
     schema: StructType,
+    provider: Option[String] = None,
     partitionColumnNames: Seq[String] = Seq.empty,
     bucketSpec: Option[BucketSpec] = None,
     owner: String = "",
 
@@ -552,7 +552,8 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
       identifier = TableIdentifier("my_table", Some("db1")),
       tableType = CatalogTableType.MANAGED,
       storage = CatalogStorageFormat(None, None, None, None, false, Map.empty),
-      schema = new StructType().add("a", "int").add("b", "string")
+      schema = new StructType().add("a", "int").add("b", "string"),
+      provider = Some("hive")
     )
 
     catalog.createTable("db1", table, ignoreIfExists = false)
@@ -571,7 +572,8 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
       storage = CatalogStorageFormat(
         Some(Utils.createTempDir().getAbsolutePath),
         None, None, None, false, Map.empty),
-      schema = new StructType().add("a", "int").add("b", "string")
+      schema = new StructType().add("a", "int").add("b", "string"),
+      provider = Some("hive")
     )
     catalog.createTable("db1", externalTable, ignoreIfExists = false)
     assert(!exists(db.locationUri, "external_table"))
@@ -589,6 +591,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
         .add("col2", "string")
         .add("a", "int")
         .add("b", "string"),
+      provider = Some("hive"),
       partitionColumnNames = Seq("a", "b")
     )
     catalog.createTable("db1", table, ignoreIfExists = false)
@@ -692,6 +695,7 @@ abstract class CatalogTestUtils {
         .add("col2", "string")
         .add("a", "int")
         .add("b", "string"),
+      provider = Some("hive"),
       partitionColumnNames = Seq("a", "b"),
       bucketSpec = Some(BucketSpec(4, Seq("col1"), Nil)))
   }
 
@@ -957,7 +957,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
     // Storage format
     val defaultStorage: CatalogStorageFormat = {
       val defaultStorageType = conf.getConfString("hive.default.fileformat", "textfile")
-      val defaultHiveSerde = HiveSerDe.sourceToSerDe(defaultStorageType, conf)
+      val defaultHiveSerde = HiveSerDe.sourceToSerDe(defaultStorageType)
       CatalogStorageFormat(
         locationUri = None,
         inputFormat = defaultHiveSerde.flatMap(_.inputFormat)
@@ -1001,6 +1001,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
       tableType = tableType,
       storage = storage,
       schema = schema,
+      provider = Some("hive"),
       partitionColumnNames = partitionCols.map(_.name),
       properties = properties,
       comment = comment)
@@ -1101,7 +1102,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
   override def visitGenericFileFormat(
       ctx: GenericFileFormatContext): CatalogStorageFormat = withOrigin(ctx) {
     val source = ctx.identifier.getText
-    HiveSerDe.sourceToSerDe(source, conf) match {
+    HiveSerDe.sourceToSerDe(source) match {
       case Some(s) =>
         CatalogStorageFormat.empty.copy(
           inputFormat = s.inputFormat,
 
@@ -19,9 +19,6 @@ package org.apache.spark.sql.execution.command
 
 import java.util.regex.Pattern
 
-import scala.collection.mutable
-import scala.util.control.NonFatal
-
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
@@ -30,7 +27,6 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.internal.HiveSerDe
 import org.apache.spark.sql.sources.{BaseRelation, InsertableRelation}
 import org.apache.spark.sql.types._
 
@@ -124,16 +120,17 @@ case class CreateDataSourceTableCommand(
       res
     }
 
-    CreateDataSourceTableUtils.createDataSourceTable(
-      sparkSession = sparkSession,
-      tableIdent = tableIdent,
+    val table = CatalogTable(
+      identifier = tableIdent,
+      tableType = if (isExternal) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED,
+      storage = CatalogStorageFormat.empty.copy(properties = optionsWithPath),
       schema = dataSource.schema,
-      partitionColumns = partitionColumns,
-      bucketSpec = bucketSpec,
-      provider = provider,
-      options = optionsWithPath,
-      isExternal = isExternal)
+      provider = Some(provider),
+      partitionColumnNames = partitionColumns,
+      bucketSpec = bucketSpec
+    )
 
+    sessionState.catalog.createTable(table, ignoreIfExists)
     Seq.empty[Row]
   }
 }
@@ -235,7 +232,7 @@ case class CreateDataSourceTableAsSelectCommand(
               }
               existingSchema = Some(l.schema)
             case s: SimpleCatalogRelation if DDLUtils.isDatasourceTable(s.metadata) =>
-              existingSchema = Some(DDLUtils.getSchemaFromTableProperties(s.metadata))
+              existingSchema = Some(s.metadata.schema)
             case o =>
               throw new AnalysisException(s"Saving data in ${o.toString} is not supported.")
           }
@@ -275,15 +272,17 @@ case class CreateDataSourceTableAsSelectCommand(
       // We will use the schema of resolved.relation as the schema of the table (instead of
       // the schema of df). It is important since the nullability may be changed by the relation
       // provider (for example, see org.apache.spark.sql.parquet.DefaultSource).
-      CreateDataSourceTableUtils.createDataSourceTable(
-        sparkSession = sparkSession,
-        tableIdent = tableIdent,
-        schema = result.schema,
-        partitionColumns = partitionColumns,
-        bucketSpec = bucketSpec,
-        provider = provider,
-        options = optionsWithPath,
-        isExternal = isExternal)
+      val schema = result.schema
+      val table = CatalogTable(
+        identifier = tableIdent,
+        tableType = if (isExternal) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED,
+        storage = CatalogStorageFormat.empty.copy(properties = optionsWithPath),
+        schema = schema,
+        provider = Some(provider),
+        partitionColumnNames = partitionColumns,
+        bucketSpec = bucketSpec
+      )
+      sessionState.catalog.createTable(table, ignoreIfExists = false)
     }
 
     // Refresh the cache of the table in the catalog.
@@ -324,189 +323,4 @@ object CreateDataSourceTableUtils extends Logging {
 
     matcher.matches()
   }
-
-  def createDataSourceTable(
-      sparkSession: SparkSession,
-      tableIdent: TableIdentifier,
-      schema: StructType,
-      partitionColumns: Array[String],
-      bucketSpec: Option[BucketSpec],
-      provider: String,
-      options: Map[String, String],
-      isExternal: Boolean): Unit = {
-    val tableProperties = new mutable.HashMap[String, String]
-    tableProperties.put(DATASOURCE_PROVIDER, provider)
-
-    // Serialized JSON schema string may be too long to be stored into a single metastore table
-    // property. In this case, we split the JSON string and store each part as a separate table
-    // property.
-    val threshold = sparkSession.sessionState.conf.schemaStringLengthThreshold
-    val schemaJsonString = schema.json
-    // Split the JSON string.
-    val parts = schemaJsonString.grouped(threshold).toSeq
-    tableProperties.put(DATASOURCE_SCHEMA_NUMPARTS, parts.size.toString)
-    parts.zipWithIndex.foreach { case (part, index) =>
-      tableProperties.put(s"$DATASOURCE_SCHEMA_PART_PREFIX$index", part)
-    }
-
-    if (partitionColumns.length > 0) {
-      tableProperties.put(DATASOURCE_SCHEMA_NUMPARTCOLS, partitionColumns.length.toString)
-      partitionColumns.zipWithIndex.foreach { case (partCol, index) =>
-        tableProperties.put(s"$DATASOURCE_SCHEMA_PARTCOL_PREFIX$index", partCol)
-      }
-    }
-
-    if (bucketSpec.isDefined) {
-      val BucketSpec(numBuckets, bucketColumnNames, sortColumnNames) = bucketSpec.get
-
-      tableProperties.put(DATASOURCE_SCHEMA_NUMBUCKETS, numBuckets.toString)
-      tableProperties.put(DATASOURCE_SCHEMA_NUMBUCKETCOLS, bucketColumnNames.length.toString)
-      bucketColumnNames.zipWithIndex.foreach { case (bucketCol, index) =>
-        tableProperties.put(s"$DATASOURCE_SCHEMA_BUCKETCOL_PREFIX$index", bucketCol)
-      }
-
-      if (sortColumnNames.nonEmpty) {
-        tableProperties.put(DATASOURCE_SCHEMA_NUMSORTCOLS, sortColumnNames.length.toString)
-        sortColumnNames.zipWithIndex.foreach { case (sortCol, index) =>
-          tableProperties.put(s"$DATASOURCE_SCHEMA_SORTCOL_PREFIX$index", sortCol)
-        }
-      }
-    }
-
-    val tableType = if (isExternal) {
-      tableProperties.put("EXTERNAL", "TRUE")
-      CatalogTableType.EXTERNAL
-    } else {
-      tableProperties.put("EXTERNAL", "FALSE")
-      CatalogTableType.MANAGED
-    }
-
-    val maybeSerDe = HiveSerDe.sourceToSerDe(provider, sparkSession.sessionState.conf)
-    val dataSource =
-      DataSource(
-        sparkSession,
-        userSpecifiedSchema = Some(schema),
-        partitionColumns = partitionColumns,
-        bucketSpec = bucketSpec,
-        className = provider,
-        options = options)
-
-    def newSparkSQLSpecificMetastoreTable(): CatalogTable = {
-      CatalogTable(
-        identifier = tableIdent,
-        tableType = tableType,
-        schema = new StructType,
-        storage = CatalogStorageFormat(
-          locationUri = None,
-          inputFormat = None,
-          outputFormat = None,
-          serde = None,
-          compressed = false,
-          properties = options
-        ),
-        properties = tableProperties.toMap)
-    }
-
-    def newHiveCompatibleMetastoreTable(
-        relation: HadoopFsRelation,
-        serde: HiveSerDe): CatalogTable = {
-      assert(partitionColumns.isEmpty)
-      assert(relation.partitionSchema.isEmpty)
-
-      CatalogTable(
-        identifier = tableIdent,
-        tableType = tableType,
-        storage = CatalogStorageFormat(
-          locationUri = Some(relation.location.paths.map(_.toUri.toString).head),
-          inputFormat = serde.inputFormat,
-          outputFormat = serde.outputFormat,
-          serde = serde.serde,
-          compressed = false,
-          properties = options
-        ),
-        schema = relation.schema,
-        properties = tableProperties.toMap,
-        viewText = None)
-    }
-
-    // TODO: Support persisting partitioned data source relations in Hive compatible format
-    val qualifiedTableName = tableIdent.quotedString
-    val skipHiveMetadata = options.getOrElse("skipHiveMetadata", "false").toBoolean
-    val resolvedRelation = dataSource.resolveRelation(checkPathExist = false)
-    val (hiveCompatibleTable, logMessage) = (maybeSerDe, resolvedRelation) match {
-      case _ if skipHiveMetadata =>
-        val message =
-          s"Persisting partitioned data source relation $qualifiedTableName into " +
-            "Hive metastore in Spark SQL specific format, which is NOT compatible with Hive."
-        (None, message)
-
-      case (Some(serde), relation: HadoopFsRelation) if relation.location.paths.length == 1 &&
-        relation.partitionSchema.isEmpty && relation.bucketSpec.isEmpty =>
-        val hiveTable = newHiveCompatibleMetastoreTable(relation, serde)
-        val message =
-          s"Persisting data source relation $qualifiedTableName with a single input path " +
-            s"into Hive metastore in Hive compatible format. Input path: " +
-            s"${relation.location.paths.head}."
-        (Some(hiveTable), message)
-
-      case (Some(serde), relation: HadoopFsRelation) if relation.partitionSchema.nonEmpty =>
-        val message =
-          s"Persisting partitioned data source relation $qualifiedTableName into " +
-            "Hive metastore in Spark SQL specific format, which is NOT compatible with Hive. " +
-            "Input path(s): " + relation.location.paths.mkString("\n", "\n", "")
-        (None, message)
-
-      case (Some(serde), relation: HadoopFsRelation) if relation.bucketSpec.nonEmpty =>
-        val message =
-          s"Persisting bucketed data source relation $qualifiedTableName into " +
-            "Hive metastore in Spark SQL specific format, which is NOT compatible with Hive. " +
-            "Input path(s): " + relation.location.paths.mkString("\n", "\n", "")
-        (None, message)
-
-      case (Some(serde), relation: HadoopFsRelation) =>
-        val message =
-          s"Persisting data source relation $qualifiedTableName with multiple input paths into " +
-            "Hive metastore in Spark SQL specific format, which is NOT compatible with Hive. " +
-            s"Input paths: " + relation.location.paths.mkString("\n", "\n", "")
-        (None, message)
-
-      case (Some(serde), _) =>
-        val message =
-          s"Data source relation $qualifiedTableName is not a " +
-            s"${classOf[HadoopFsRelation].getSimpleName}. Persisting it into Hive metastore " +
-            "in Spark SQL specific format, which is NOT compatible with Hive."
-        (None, message)
-
-      case _ =>
-        val message =
-          s"Couldn't find corresponding Hive SerDe for data source provider $provider. " +
-            s"Persisting data source relation $qualifiedTableName into Hive metastore in " +
-            s"Spark SQL specific format, which is NOT compatible with Hive."
-        (None, message)
-    }
-
-    (hiveCompatibleTable, logMessage) match {
-      case (Some(table), message) =>
-        // We first try to save the metadata of the table in a Hive compatible way.
-        // If Hive throws an error, we fall back to save its metadata in the Spark SQL
-        // specific way.
-        try {
-          logInfo(message)
-          sparkSession.sessionState.catalog.createTable(table, ignoreIfExists = false)
-        } catch {
-          case NonFatal(e) =>
-            val warningMessage =
-              s"Could not persist $qualifiedTableName in a Hive compatible way. Persisting " +
-                s"it into Hive metastore in Spark SQL specific format."
-            logWarning(warningMessage, e)
-            val table = newSparkSQLSpecificMetastoreTable()
-            sparkSession.sessionState.catalog.createTable(table, ignoreIfExists = false)
-        }
-
-      case (None, message) =>
-        logWarning(message)
-        val table = newSparkSQLSpecificMetastoreTable()
-        sparkSession.sessionState.catalog.createTable(table, ignoreIfExists = false)
-    }
-  }
 }