Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,13 @@ case class DescribeTableCommand(
describeSchema(catalog.lookupRelation(table).schema, result)
} else {
val metadata = catalog.getTableMetadata(table)
describeSchema(metadata.schema, result)
if (metadata.schema.isEmpty) {
// In older version(prior to 2.1) of Spark, the table schema can be empty and should be
// inferred at runtime. We should still support it.
describeSchema(catalog.lookupRelation(metadata.identifier).schema, result)
} else {
describeSchema(metadata.schema, result)
}

describePartitionInfo(metadata, result)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1023,6 +1023,11 @@ object HiveExternalCatalog {
// After SPARK-6024, we removed this flag.
// Although we are not using `spark.sql.sources.schema` any more, we need to still support.
DataType.fromJson(schema.get).asInstanceOf[StructType]
} else if (props.filterKeys(_.startsWith(DATASOURCE_SCHEMA_PREFIX)).isEmpty) {
// If there is no schema information in table properties, it means the schema of this table
// was empty when saving into metastore, which is possible in older version(prior to 2.1) of
// Spark. We should respect it.
new StructType()
Copy link
Contributor

@yhuai yhuai Nov 16, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

btw, a clarification question. This function is only needed for data source tables, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, since we also store schema for hive table, hive table will also call this function. But hive table will never go into this branch, as it always has a schema.(the removal of runtime schema inference happened before we store schema of hive table)

} else {
val numSchemaParts = props.get(DATASOURCE_SCHEMA_NUMPARTS)
if (numSchemaParts.isDefined) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
val dataSource =
DataSource(
sparkSession,
userSpecifiedSchema = Some(table.schema),
// In older version(prior to 2.1) of Spark, the table schema can be empty and should be
// inferred at runtime. We should still support it.
userSpecifiedSchema = if (table.schema.isEmpty) None else Some(table.schema),
partitionColumns = table.partitionColumnNames,
bucketSpec = table.bucketSpec,
className = table.provider.get,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1371,4 +1371,26 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
}
}
}

test("SPARK-18464: support old table which doesn't store schema in table properties") {
withTable("old") {
withTempPath { path =>
Seq(1 -> "a").toDF("i", "j").write.parquet(path.getAbsolutePath)
val tableDesc = CatalogTable(
identifier = TableIdentifier("old", Some("default")),
tableType = CatalogTableType.EXTERNAL,
storage = CatalogStorageFormat.empty.copy(
properties = Map("path" -> path.getAbsolutePath)
),
schema = new StructType(),
properties = Map(
HiveExternalCatalog.DATASOURCE_PROVIDER -> "parquet"))
hiveClient.createTable(tableDesc, ignoreIfExists = false)

checkAnswer(spark.table("old"), Row(1, "a"))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we also test describe table and make sure it can provide correct column info?


checkAnswer(sql("DESC old"), Row("i", "int", null) :: Row("j", "string", null) :: Nil)
}
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will be good to actually create a set of compatibility tests to make sure a new version of Spark can access table metadata created by a older version (starting from Spark 1.3) without problem. Let's create a follow-up jira for this task and do it during the QA period of spark 2.1.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}