Skip to content

Commit b0e1a42

Browse files
committed
Address comments.
1 parent 83d9846 commit b0e1a42

File tree

3 files changed

+23
-12
lines changed

3 files changed

+23
-12
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,14 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
106106
}
107107

108108
override def refreshTable(databaseName: String, tableName: String): Unit = {
109-
// refresh table does not eagerly reload the cache. It just invalidate the cache.
109+
// refreshTable does not eagerly reload the cache. It just invalidate the cache.
110110
// Next time when we use the table, it will be populated in the cache.
111+
// Since we also cache ParquetRealtions converted from Hive Parquet tables and
112+
// adding converted ParquetRealtions into the cache is not defined in the load function
113+
// of the cache (instead, we add the cache entry in convertToParquetRelation),
114+
// it is better at here to invalidate the cache to avoid confusing waring logs from the
115+
// cache loader (e.g. cannot find data source provider, which is only defined for
116+
// data source table.).
111117
invalidateTable(databaseName, tableName)
112118
}
113119

@@ -226,21 +232,27 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with
226232
QualifiedTableName(metastoreRelation.databaseName, metastoreRelation.tableName)
227233

228234
def getCached(
229-
tableIdentifier: QualifiedTableName,
230-
pathsInMetastore: Seq[String],
231-
schemaInMetastore: StructType,
232-
partitionSpecInMetastore: Option[PartitionSpec]): Option[LogicalRelation] = {
235+
tableIdentifier: QualifiedTableName,
236+
pathsInMetastore: Seq[String],
237+
schemaInMetastore: StructType,
238+
partitionSpecInMetastore: Option[PartitionSpec]): Option[LogicalRelation] = {
233239
cachedDataSourceTables.getIfPresent(tableIdentifier) match {
234240
case null => None // Cache miss
235-
case logical @ LogicalRelation(parquetRelation: ParquetRelation2) =>
241+
case logical@LogicalRelation(parquetRelation: ParquetRelation2) =>
236242
// If we have the same paths, same schema, and same partition spec,
237243
// we will use the cached Parquet Relation.
238244
val useCached =
239-
parquetRelation.paths == pathsInMetastore &&
245+
parquetRelation.paths.toSet == pathsInMetastore.toSet &&
240246
logical.schema.sameType(metastoreSchema) &&
241247
parquetRelation.maybePartitionSpec == partitionSpecInMetastore
242248

243-
if (useCached) Some(logical) else None
249+
if (useCached) {
250+
Some(logical)
251+
} else {
252+
// If the cached relation is not updated, we invalidate it right away.
253+
cachedDataSourceTables.invalidate(tableIdentifier)
254+
None
255+
}
244256
case other =>
245257
logWarning(
246258
s"${metastoreRelation.databaseName}.${metastoreRelation.tableName} shold be stored " +

sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,13 @@ case class DropTable(
5858
try {
5959
hiveContext.cacheManager.tryUncacheQuery(hiveContext.table(tableName))
6060
} catch {
61-
// This table's metadata is not in
61+
// This table's metadata is not in Hive metastore (e.g. the table does not exist).
6262
case _: org.apache.hadoop.hive.ql.metadata.InvalidTableException =>
63+
case _: org.apache.spark.sql.catalyst.analysis.NoSuchTableException =>
6364
// Other Throwables can be caused by users providing wrong parameters in OPTIONS
6465
// (e.g. invalid paths). We catch it and log a warning message.
6566
// Users should be able to drop such kinds of tables regardless if there is an error.
66-
case e: Throwable => log.warn(s"${e.getMessage}")
67+
case e: Throwable => log.warn(s"${e.getMessage}", e)
6768
}
6869
hiveContext.invalidateTable(tableName)
6970
hiveContext.runSqlHive(s"DROP TABLE $ifExistsClause$tableName")

sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -473,15 +473,13 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase {
473473
// Right now, insert into a partitioned Parquet is not supported in data source Parquet.
474474
// So, we expect it is not cached.
475475
assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifer) === null)
476-
conf.setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, "false")
477476
sql(
478477
"""
479478
|INSERT INTO TABLE test_parquet_partitioned_cache_test
480479
|PARTITION (date='2015-04-02')
481480
|select a, b from jt
482481
""".stripMargin)
483482
assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifer) === null)
484-
conf.setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, "true")
485483

486484
// Make sure we can cache the partitioned table.
487485
table("test_parquet_partitioned_cache_test")

0 commit comments

Comments
 (0)