Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,15 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
}

private def getLocationFromStorageProps(table: CatalogTable): Option[String] = {
CaseInsensitiveMap(table.storage.properties).get("path")
val storageLoc = table.storage.locationUri.map(CatalogUtils.URIToString(_))
val storageProp = CaseInsensitiveMap(table.storage.properties).get("path")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit worried about string comparison. Are you sure the path string is always equal to the URI string? Shall we do normalization before comparing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We normalization URI as toString => CatalogUtils.URIToString(_) may be better ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to be safe, can we compare the URI? we can convert path string to URI with CatalogUtils.stringToURI.

// storageProp == None is hive table
if (storageLoc.equals(storageProp) || storageProp == None) {
storageLoc
} else {
throw new AnalysisException(s"path in location ${storageLoc} " +
s"not equal to table prop path ${storageProp}, please use alter table in spark")
}
}

private def updateLocationInStorageProps(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import java.net.URI
import org.apache.hadoop.conf.Configuration

import org.apache.spark.SparkConf
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.execution.QueryExecutionException
Expand Down Expand Up @@ -202,4 +203,26 @@ class HiveExternalCatalogSuite extends ExternalCatalogSuite {
assert(alteredTable.provider === Some("foo"))
})
}

test("SPARK-31751: serde property `path` overwrites hive table property location") {
val catalog = newBasicCatalog()
val hiveTable = CatalogTable(
identifier = TableIdentifier("parq_alter", Some("db1")),
tableType = CatalogTableType.MANAGED,
storage = storageFormat,
schema = new StructType().add("col1", "int"),
provider = Some("parquet"))
catalog.createTable(hiveTable, ignoreIfExists = false)
val beforeAlterTable = externalCatalog.getTable("db1", "parq_alter")
assert(beforeAlterTable.storage.locationUri.toString.contains("parq_alter"))

externalCatalog.client.runSqlHive(
"alter table db1.parq_alter rename to db1.parq_alter2")

val e = intercept[AnalysisException](
externalCatalog.getTable("db1", "parq_alter2")
)
assert(e.getMessage.contains("not equal to table prop path")
&& e.getMessage.contains("parq_alter2"))
}
Comment on lines +219 to +227
Copy link
Contributor Author

@TJX2014 TJX2014 Jun 29, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will get an exception when the path property is not consistent with storage location.

}