Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,14 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive

// TODO: Support persisting partitioned data source relations in Hive compatible format
val qualifiedTableName = tableIdent.quotedString
val skipHiveMetadata = options.getOrElse("skipHiveMetadata", "false").toBoolean
val (hiveCompatibleTable, logMessage) = (maybeSerDe, dataSource.relation) match {
case _ if skipHiveMetadata =>
val message =
s"Persisting partitioned data source relation $qualifiedTableName into " +
"Hive metastore in Spark SQL specific format, which is NOT compatible with Hive."
(None, message)

case (Some(serde), relation: HadoopFsRelation)
if relation.paths.length == 1 && relation.partitionColumns.isEmpty =>
val hiveTable = newHiveCompatibleMetastoreTable(relation, serde)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -847,4 +847,36 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
sqlContext.sql("""use default""")
sqlContext.sql("""drop database if exists testdb8156 CASCADE""")
}

test("skip hive metadata on table creation") {
val schema = StructType((1 to 5).map(i => StructField(s"c_$i", StringType)))

catalog.createDataSourceTable(
tableIdent = TableIdentifier("not_skip_hive_metadata"),
userSpecifiedSchema = Some(schema),
partitionColumns = Array.empty[String],
bucketSpec = None,
provider = "parquet",
options = Map("path" -> "just a dummy path", "skipHiveMetadata" -> "false"),
isExternal = false)

// As a proxy for verifying that the table was stored in Hive compatible format, we verify that
// each column of the table is of native type StringType.
assert(catalog.client.getTable("default", "not_skip_hive_metadata").schema
.forall(column => HiveMetastoreTypes.toDataType(column.hiveType) == StringType))

catalog.createDataSourceTable(
tableIdent = TableIdentifier("skip_hive_metadata"),
userSpecifiedSchema = Some(schema),
partitionColumns = Array.empty[String],
bucketSpec = None,
provider = "parquet",
options = Map("path" -> "just a dummy path", "skipHiveMetadata" -> "true"),
isExternal = false)

// As a proxy for verifying that the table was stored in SparkSQL format, we verify that
// the table has a column type as array of StringType.
assert(catalog.client.getTable("default", "skip_hive_metadata").schema
.forall(column => HiveMetastoreTypes.toDataType(column.hiveType) == ArrayType(StringType)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add comments to explain why we need to check this.

}
}