Skip to content

Commit 014fcee

Browse files
cloud-fanrxin
authored andcommitted
[SPARK-18464][SQL] support old table which doesn't store schema in metastore
## What changes were proposed in this pull request? Before Spark 2.1, users can create an external data source table without schema, and we will infer the table schema at runtime. In Spark 2.1, we decided to infer the schema when the table was created, so that we don't need to infer it again and again at runtime. This is a good improvement, but we should still respect and support old tables which doesn't store table schema in metastore. ## How was this patch tested? regression test. Author: Wenchen Fan <[email protected]> Closes #15900 from cloud-fan/hive-catalog. (cherry picked from commit 07b3f04) Signed-off-by: Reynold Xin <[email protected]>
1 parent 6a3cbbc commit 014fcee

File tree

4 files changed

+37
-2
lines changed

4 files changed

+37
-2
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,13 @@ case class DescribeTableCommand(
431431
describeSchema(catalog.lookupRelation(table).schema, result)
432432
} else {
433433
val metadata = catalog.getTableMetadata(table)
434-
describeSchema(metadata.schema, result)
434+
if (metadata.schema.isEmpty) {
435+
// In older version(prior to 2.1) of Spark, the table schema can be empty and should be
436+
// inferred at runtime. We should still support it.
437+
describeSchema(catalog.lookupRelation(metadata.identifier).schema, result)
438+
} else {
439+
describeSchema(metadata.schema, result)
440+
}
435441

436442
describePartitionInfo(metadata, result)
437443

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,6 +1023,11 @@ object HiveExternalCatalog {
10231023
// After SPARK-6024, we removed this flag.
10241024
// Although we are not using `spark.sql.sources.schema` any more, we need to still support.
10251025
DataType.fromJson(schema.get).asInstanceOf[StructType]
1026+
} else if (props.filterKeys(_.startsWith(DATASOURCE_SCHEMA_PREFIX)).isEmpty) {
1027+
// If there is no schema information in table properties, it means the schema of this table
1028+
// was empty when saving into metastore, which is possible in older version(prior to 2.1) of
1029+
// Spark. We should respect it.
1030+
new StructType()
10261031
} else {
10271032
val numSchemaParts = props.get(DATASOURCE_SCHEMA_NUMPARTS)
10281033
if (numSchemaParts.isDefined) {

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,9 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
6464
val dataSource =
6565
DataSource(
6666
sparkSession,
67-
userSpecifiedSchema = Some(table.schema),
67+
// In older version(prior to 2.1) of Spark, the table schema can be empty and should be
68+
// inferred at runtime. We should still support it.
69+
userSpecifiedSchema = if (table.schema.isEmpty) None else Some(table.schema),
6870
partitionColumns = table.partitionColumnNames,
6971
bucketSpec = table.bucketSpec,
7072
className = table.provider.get,

sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1371,4 +1371,26 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
13711371
}
13721372
}
13731373
}
1374+
1375+
test("SPARK-18464: support old table which doesn't store schema in table properties") {
1376+
withTable("old") {
1377+
withTempPath { path =>
1378+
Seq(1 -> "a").toDF("i", "j").write.parquet(path.getAbsolutePath)
1379+
val tableDesc = CatalogTable(
1380+
identifier = TableIdentifier("old", Some("default")),
1381+
tableType = CatalogTableType.EXTERNAL,
1382+
storage = CatalogStorageFormat.empty.copy(
1383+
properties = Map("path" -> path.getAbsolutePath)
1384+
),
1385+
schema = new StructType(),
1386+
properties = Map(
1387+
HiveExternalCatalog.DATASOURCE_PROVIDER -> "parquet"))
1388+
hiveClient.createTable(tableDesc, ignoreIfExists = false)
1389+
1390+
checkAnswer(spark.table("old"), Row(1, "a"))
1391+
1392+
checkAnswer(sql("DESC old"), Row("i", "int", null) :: Row("j", "string", null) :: Nil)
1393+
}
1394+
}
1395+
}
13741396
}

0 commit comments

Comments
 (0)