Skip to content

Commit 68ace26

Browse files
committed
add org.apache.spark.sql.internal.connector.SupportsPushDownCatalystFilters
1 parent 095a7b4 commit 68ace26

File tree

3 files changed

+41
-3
lines changed

3 files changed

+41
-3
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.spark.sql.internal.connector
18+
19+
import org.apache.spark.sql.catalyst.expressions.Expression
20+
21+
/**
22+
* A mix-in interface for {@link FileScanBuilder}. This can be used to push down partitionFilters
23+
* and dataFilters to FileIndex in the format of catalyst Expression.
24+
*/
25+
trait SupportsPushDownCatalystFilters {
26+
/**
27+
* Pushes down partitionFilters and dataFilters to FileIndex in the format of catalyst
28+
* Expression. These catalyst Expression filters are used for partition pruning. The dataFilters
29+
* are also translated into data source filters and used for selecting records.
30+
*/
31+
def pushCatalystFilters(partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): Unit
32+
}

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,17 @@ import org.apache.spark.sql.{sources, SparkSession}
2222
import org.apache.spark.sql.catalyst.expressions.Expression
2323
import org.apache.spark.sql.connector.read.{ScanBuilder, SupportsPushDownRequiredColumns}
2424
import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, PartitioningAwareFileIndex, PartitioningUtils}
25+
import org.apache.spark.sql.internal.connector.SupportsPushDownCatalystFilters
2526
import org.apache.spark.sql.sources.Filter
2627
import org.apache.spark.sql.types.StructType
2728

2829
abstract class FileScanBuilder(
2930
sparkSession: SparkSession,
3031
fileIndex: PartitioningAwareFileIndex,
31-
dataSchema: StructType) extends ScanBuilder with SupportsPushDownRequiredColumns {
32+
dataSchema: StructType)
33+
extends ScanBuilder
34+
with SupportsPushDownRequiredColumns
35+
with SupportsPushDownCatalystFilters {
3236
private val partitionSchema = fileIndex.partitionSchema
3337
private val isCaseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
3438
protected val supportsNestedSchemaPruning = false
@@ -66,7 +70,9 @@ abstract class FileScanBuilder(
6670

6771
// Note: The partitionFilters and dataFilters need to be pushed to FileIndex in the format of
6872
// Expression because partition pruning uses the Expression Filters, not sources.Filters.
69-
def pushFilters(partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): Unit = {
73+
override def pushCatalystFilters(
74+
partitionFilters: Seq[Expression],
75+
dataFilters: Seq[Expression]): Unit = {
7076
this.partitionFilters = partitionFilters
7177
this.dataFilters = dataFilters
7278
val translatedFilters = mutable.ArrayBuffer.empty[sources.Filter]

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ object PushDownUtils extends PredicateHelper {
7474
val (partitionFilters, dataFilters) =
7575
DataSourceUtils.getPartitionKeyFiltersAndDataFilters(
7676
f.getSparkSession, scanBuilderHolder.relation, f.readPartitionSchema(), filters)
77-
f.pushFilters(ExpressionSet(partitionFilters).toSeq, dataFilters)
77+
f.pushCatalystFilters(ExpressionSet(partitionFilters).toSeq, dataFilters)
7878
(Nil, dataFilters)
7979
case _ => (Nil, filters)
8080
}

0 commit comments

Comments
 (0)