From 975e4115b3695c556d48ea765c732ce48cd772a4 Mon Sep 17 00:00:00 2001 From: Burak Yavuz Date: Thu, 5 Mar 2020 17:07:25 -0800 Subject: [PATCH 1/2] AlterTable should be able to change the provider of a table --- .../spark/sql/hive/HiveExternalCatalog.scala | 5 ++++- .../sql/hive/HiveExternalCatalogSuite.scala | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index ca292f65efee..93bf96214a5e 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -634,7 +634,10 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat k.startsWith(DATASOURCE_PREFIX) || k.startsWith(STATISTICS_PREFIX) || k.startsWith(CREATED_SPARK_VERSION) } - val newTableProps = propsFromOldTable ++ tableDefinition.properties + partitionProviderProp + val newFormatIfExists = tableDefinition.provider.map(p => Map(DATASOURCE_PROVIDER -> p)) + .getOrElse(Map.empty) + val newTableProps = propsFromOldTable ++ tableDefinition.properties ++ + newFormatIfExists + partitionProviderProp // // Add old table's owner if we need to restore val owner = Option(tableDefinition.owner).filter(_.nonEmpty).getOrElse(oldTableDef.owner) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala index 0a8889885df7..28a9e0b3dd00 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala @@ -178,4 +178,23 @@ class HiveExternalCatalogSuite extends ExternalCatalogSuite { assertThrows[QueryExecutionException](client.runSqlHive( "INSERT overwrite directory \"fs://localhost/tmp\" select 1 as a")) } + + test("SPARK-31061: alterTable should be able to change table provider") { + val catalog = newBasicCatalog() + val parquetTable = CatalogTable( + identifier = TableIdentifier("parq_tbl", Some("db1")), + tableType = CatalogTableType.MANAGED, + storage = storageFormat, + schema = new StructType().add("col1", "int").add("col2", "string"), + provider = Some("parquet")) + catalog.createTable(parquetTable, ignoreIfExists = false) + + val rawTable = externalCatalog.client.getTable("db1", "parq_tbl") + assert(rawTable.provider === Some("parquet")) + + val fooTable = parquetTable.copy(provider = Some("foo")) + catalog.alterTable(fooTable) + val alteredTable = externalCatalog.client.getTable("db1", "parq_tbl") + assert(alteredTable.provider === Some("foo")) + } } From 7a5f2c237bd04ab2adf38b3bea841f988a68d25d Mon Sep 17 00:00:00 2001 From: Burak Yavuz Date: Thu, 5 Mar 2020 17:41:25 -0800 Subject: [PATCH 2/2] fix bug --- .../spark/sql/hive/HiveExternalCatalog.scala | 13 ++++++--- .../sql/hive/HiveExternalCatalogSuite.scala | 27 ++++++++++++++++--- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index 93bf96214a5e..be6d824ece68 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -634,10 +634,15 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat k.startsWith(DATASOURCE_PREFIX) || k.startsWith(STATISTICS_PREFIX) || k.startsWith(CREATED_SPARK_VERSION) } - val newFormatIfExists = tableDefinition.provider.map(p => Map(DATASOURCE_PROVIDER -> p)) - .getOrElse(Map.empty) - val newTableProps = propsFromOldTable ++ tableDefinition.properties ++ - newFormatIfExists + partitionProviderProp + val newFormatIfExists = tableDefinition.provider.flatMap { p => + if (DDLUtils.isDatasourceTable(tableDefinition)) { + Some(DATASOURCE_PROVIDER -> p) + } else { + None + } + } + val newTableProps = + propsFromOldTable ++ tableDefinition.properties + partitionProviderProp ++ newFormatIfExists // // Add old table's owner if we need to restore val owner = Option(tableDefinition.owner).filter(_.nonEmpty).getOrElse(oldTableDef.owner) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala index 28a9e0b3dd00..6247861a0367 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.hive +import java.net.URI + import org.apache.hadoop.conf.Configuration import org.apache.spark.SparkConf @@ -184,17 +186,36 @@ class HiveExternalCatalogSuite extends ExternalCatalogSuite { val parquetTable = CatalogTable( identifier = TableIdentifier("parq_tbl", Some("db1")), tableType = CatalogTableType.MANAGED, - storage = storageFormat, + storage = storageFormat.copy(locationUri = Some(new URI("file:/some/path"))), schema = new StructType().add("col1", "int").add("col2", "string"), provider = Some("parquet")) catalog.createTable(parquetTable, ignoreIfExists = false) - val rawTable = externalCatalog.client.getTable("db1", "parq_tbl") + val rawTable = externalCatalog.getTable("db1", "parq_tbl") assert(rawTable.provider === Some("parquet")) val fooTable = parquetTable.copy(provider = Some("foo")) catalog.alterTable(fooTable) - val alteredTable = externalCatalog.client.getTable("db1", "parq_tbl") + val alteredTable = externalCatalog.getTable("db1", "parq_tbl") + assert(alteredTable.provider === Some("foo")) + } + + test("SPARK-31061: alterTable should be able to change table provider from hive") { + val catalog = newBasicCatalog() + val parquetTable = CatalogTable( + identifier = TableIdentifier("parq_tbl", Some("db1")), + tableType = CatalogTableType.MANAGED, + storage = storageFormat, + schema = new StructType().add("col1", "int").add("col2", "string"), + provider = Some("hive")) + catalog.createTable(parquetTable, ignoreIfExists = false) + + val rawTable = externalCatalog.getTable("db1", "parq_tbl") + assert(rawTable.provider === Some("hive")) + + val fooTable = rawTable.copy(provider = Some("foo")) + catalog.alterTable(fooTable) + val alteredTable = externalCatalog.getTable("db1", "parq_tbl") assert(alteredTable.provider === Some("foo")) } }