Skip to content

Commit ce89ff4

Browse files
marmbrusyhuai
authored andcommitted
[SPARK-9386] [SQL] Feature flag for metastore partition pruning
Since we have been seeing a lot of failures related to this new feature, lets put it behind a flag and turn it off by default. Author: Michael Armbrust <[email protected]> Closes apache#7703 from marmbrus/optionalMetastorePruning and squashes the following commits: 6ad128c [Michael Armbrust] style 8447835 [Michael Armbrust] [SPARK-9386][SQL] Feature flag for metastore partition pruning fd37b87 [Michael Armbrust] add config flag
1 parent 8ddfa52 commit ce89ff4

File tree

3 files changed

+22
-7
lines changed

3 files changed

+22
-7
lines changed

sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,11 @@ private[spark] object SQLConf {
301301
defaultValue = Some(true),
302302
doc = "<TODO>")
303303

304+
val HIVE_METASTORE_PARTITION_PRUNING = booleanConf("spark.sql.hive.metastorePartitionPruning",
305+
defaultValue = Some(false),
306+
doc = "When true, some predicates will be pushed down into the Hive metastore so that " +
307+
"unmatching partitions can be eliminated earlier.")
308+
304309
val COLUMN_NAME_OF_CORRUPT_RECORD = stringConf("spark.sql.columnNameOfCorruptRecord",
305310
defaultValue = Some("_corrupt_record"),
306311
doc = "<TODO>")
@@ -456,6 +461,8 @@ private[sql] class SQLConf extends Serializable with CatalystConf {
456461

457462
private[spark] def verifyPartitionPath: Boolean = getConf(HIVE_VERIFY_PARTITION_PATH)
458463

464+
private[spark] def metastorePartitionPruning: Boolean = getConf(HIVE_METASTORE_PARTITION_PRUNING)
465+
459466
private[spark] def externalSortEnabled: Boolean = getConf(EXTERNAL_SORT)
460467

461468
private[spark] def sortMergeJoinEnabled: Boolean = getConf(SORTMERGE_JOIN)

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -678,8 +678,18 @@ private[hive] case class MetastoreRelation
678678
}
679679
)
680680

681+
// When metastore partition pruning is turned off, we cache the list of all partitions to
682+
// mimic the behavior of Spark < 1.5
683+
lazy val allPartitions = table.getAllPartitions
684+
681685
def getHiveQlPartitions(predicates: Seq[Expression] = Nil): Seq[Partition] = {
682-
table.getPartitions(predicates).map { p =>
686+
val rawPartitions = if (sqlContext.conf.metastorePartitionPruning) {
687+
table.getPartitions(predicates)
688+
} else {
689+
allPartitions
690+
}
691+
692+
rawPartitions.map { p =>
683693
val tPartition = new org.apache.hadoop.hive.metastore.api.Partition
684694
tPartition.setDbName(databaseName)
685695
tPartition.setTableName(tableName)

sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientInterface.scala

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,10 @@ private[hive] case class HiveTable(
7272

7373
def isPartitioned: Boolean = partitionColumns.nonEmpty
7474

75-
def getPartitions(predicates: Seq[Expression]): Seq[HivePartition] = {
76-
predicates match {
77-
case Nil => client.getAllPartitions(this)
78-
case _ => client.getPartitionsByFilter(this, predicates)
79-
}
80-
}
75+
def getAllPartitions: Seq[HivePartition] = client.getAllPartitions(this)
76+
77+
def getPartitions(predicates: Seq[Expression]): Seq[HivePartition] =
78+
client.getPartitionsByFilter(this, predicates)
8179

8280
// Hive does not support backticks when passing names to the client.
8381
def qualifiedName: String = s"$database.$name"

0 commit comments

Comments
 (0)