-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-38959][SQL] DS V2: Support runtime group filtering in row-level commands #36304
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -412,6 +412,21 @@ object SQLConf { | |
| .longConf | ||
| .createWithDefault(67108864L) | ||
|
|
||
| val RUNTIME_ROW_LEVEL_OPERATION_GROUP_FILTER_ENABLED = | ||
| buildConf("spark.sql.optimizer.runtime.rowLevelOperationGroupFilter.enabled") | ||
| .doc("Enables runtime group filtering for group-based row-level operations. " + | ||
| "Data sources that replace groups of data (e.g. files, partitions) may prune entire " + | ||
| "groups using provided data source filters when planning a row-level operation scan. " + | ||
| "However, such filtering is limited as not all expressions can be converted into data " + | ||
| "source filters and some expressions can only be evaluated by Spark (e.g. subqueries). " + | ||
| "Since rewriting groups is expensive, Spark can execute a query at runtime to find what " + | ||
| "records match the condition of the row-level operation. The information about matching " + | ||
| "records will be passed back to the row-level operation scan, allowing data sources to " + | ||
| "discard groups that don't have to be rewritten.") | ||
| .version("3.4.0") | ||
| .booleanConf | ||
| .createWithDefault(true) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with starting with |
||
|
|
||
| val PLANNED_WRITE_ENABLED = buildConf("spark.sql.optimizer.plannedWrite.enabled") | ||
| .internal() | ||
| .doc("When set to true, Spark optimizer will add logical sort operators to V1 write commands " + | ||
|
|
@@ -4084,6 +4099,9 @@ class SQLConf extends Serializable with Logging { | |
| def runtimeFilterCreationSideThreshold: Long = | ||
| getConf(RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD) | ||
|
|
||
| def runtimeRowLevelOperationGroupFilterEnabled: Boolean = | ||
| getConf(RUNTIME_ROW_LEVEL_OPERATION_GROUP_FILTER_ENABLED) | ||
|
|
||
| def stateStoreProviderClass: String = getConf(STATE_STORE_PROVIDER_CLASS) | ||
|
|
||
| def isStateSchemaCheckEnabled: Boolean = getConf(STATE_SCHEMA_CHECK_ENABLED) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,6 +34,9 @@ class InMemoryRowLevelOperationTable( | |
| properties: util.Map[String, String]) | ||
| extends InMemoryTable(name, schema, partitioning, properties) with SupportsRowLevelOperations { | ||
|
|
||
| // used in row-level operation tests to verify replaced partitions | ||
| var replacedPartitions: Seq[Seq[Any]] = Seq.empty | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe add a comment to mention this is for test.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a comment above. |
||
|
|
||
| override def newRowLevelOperationBuilder( | ||
| info: RowLevelOperationInfo): RowLevelOperationBuilder = { | ||
| () => PartitionBasedOperation(info.command) | ||
|
|
@@ -88,8 +91,9 @@ class InMemoryRowLevelOperationTable( | |
| override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized { | ||
| val newData = messages.map(_.asInstanceOf[BufferedRows]) | ||
| val readRows = scan.data.flatMap(_.asInstanceOf[BufferedRows].rows) | ||
| val readPartitions = readRows.map(r => getKey(r, schema)) | ||
| val readPartitions = readRows.map(r => getKey(r, schema)).distinct | ||
| dataMap --= readPartitions | ||
| replacedPartitions = readPartitions | ||
| withData(newData, schema) | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.rules.Rule | |
| import org.apache.spark.sql.connector.catalog.CatalogManager | ||
| import org.apache.spark.sql.execution.datasources.{PruneFileSourcePartitions, SchemaPruning, V1Writes} | ||
| import org.apache.spark.sql.execution.datasources.v2.{GroupBasedRowLevelOperationScanPlanning, OptimizeMetadataOnlyDeleteFromTable, V2ScanPartitioningAndOrdering, V2ScanRelationPushDown, V2Writes} | ||
| import org.apache.spark.sql.execution.dynamicpruning.{CleanupDynamicPruningFilters, PartitionPruning} | ||
| import org.apache.spark.sql.execution.dynamicpruning.{CleanupDynamicPruningFilters, PartitionPruning, RowLevelOperationRuntimeGroupFiltering} | ||
| import org.apache.spark.sql.execution.python.{ExtractGroupingPythonUDFFromAggregate, ExtractPythonUDFFromAggregate, ExtractPythonUDFs} | ||
|
|
||
| class SparkOptimizer( | ||
|
|
@@ -50,7 +50,8 @@ class SparkOptimizer( | |
| override def defaultBatches: Seq[Batch] = (preOptimizationBatches ++ super.defaultBatches :+ | ||
| Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog)) :+ | ||
| Batch("PartitionPruning", Once, | ||
| PartitionPruning) :+ | ||
| PartitionPruning, | ||
| RowLevelOperationRuntimeGroupFiltering(OptimizeSubqueries)) :+ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think another idea is to run
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This would be much cleaner but SPARK-36444 removed |
||
| Batch("InjectRuntimeFilter", FixedPoint(1), | ||
| InjectRuntimeFilter) :+ | ||
| Batch("MergeScalarSubqueries", Once, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,98 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.execution.dynamicpruning | ||
|
|
||
| import org.apache.spark.sql.catalyst.expressions.{And, Attribute, DynamicPruningSubquery, Expression, PredicateHelper, V2ExpressionUtils} | ||
| import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral | ||
| import org.apache.spark.sql.catalyst.planning.GroupBasedRowLevelOperation | ||
| import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} | ||
| import org.apache.spark.sql.catalyst.rules.Rule | ||
| import org.apache.spark.sql.connector.read.SupportsRuntimeV2Filtering | ||
| import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Implicits, DataSourceV2Relation, DataSourceV2ScanRelation} | ||
|
|
||
| /** | ||
| * A rule that assigns a subquery to filter groups in row-level operations at runtime. | ||
| * | ||
| * Data skipping during job planning for row-level operations is limited to expressions that can be | ||
| * converted to data source filters. Since not all expressions can be pushed down that way and | ||
| * rewriting groups is expensive, Spark allows data sources to filter group at runtime. | ||
| * If the primary scan in a group-based row-level operation supports runtime filtering, this rule | ||
| * will inject a subquery to find all rows that match the condition so that data sources know | ||
| * exactly which groups must be rewritten. | ||
| * | ||
| * Note this rule only applies to group-based row-level operations. | ||
| */ | ||
| case class RowLevelOperationRuntimeGroupFiltering(optimizeSubqueries: Rule[LogicalPlan]) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we need to pass the rule as a parameter? Can't we call
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also thought about this, but I think it's hard to reference
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @sunchao is correct. It wasn't easy to call @cloud-fan, I also considered simply adding
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. An alternative idea could be to move |
||
| extends Rule[LogicalPlan] with PredicateHelper { | ||
|
|
||
| import DataSourceV2Implicits._ | ||
|
|
||
| override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { | ||
| // apply special dynamic filtering only for group-based row-level operations | ||
| case GroupBasedRowLevelOperation(replaceData, cond, | ||
| DataSourceV2ScanRelation(_, scan: SupportsRuntimeV2Filtering, _, _, _)) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the optimizer rule that checks whether the primary row-level scan supports runtime filtering. As long as a data source implements Also, the runtime group filter uses the existing framework for runtime filtering in DS V2, meaning we get all the benefits like subquery reuse, etc. |
||
| if conf.runtimeRowLevelOperationGroupFilterEnabled && cond != TrueLiteral => | ||
|
|
||
| // use reference equality on scan to find required scan relations | ||
| val newQuery = replaceData.query transformUp { | ||
| case r: DataSourceV2ScanRelation if r.scan eq scan => | ||
| // use the original table instance that was loaded for this row-level operation | ||
| // in order to leverage a regular batch scan in the group filter query | ||
| val originalTable = r.relation.table.asRowLevelOperationTable.table | ||
| val relation = r.relation.copy(table = originalTable) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We build |
||
| val matchingRowsPlan = buildMatchingRowsPlan(relation, cond) | ||
|
|
||
| val filterAttrs = scan.filterAttributes | ||
| val buildKeys = V2ExpressionUtils.resolveRefs[Attribute](filterAttrs, matchingRowsPlan) | ||
| val pruningKeys = V2ExpressionUtils.resolveRefs[Attribute](filterAttrs, r) | ||
| val dynamicPruningCond = buildDynamicPruningCond(matchingRowsPlan, buildKeys, pruningKeys) | ||
|
|
||
| Filter(dynamicPruningCond, r) | ||
| } | ||
|
|
||
| // optimize subqueries to rewrite them as joins and trigger job planning | ||
| replaceData.copy(query = optimizeSubqueries(newQuery)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this mean we revert what we did in
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not really, @cloud-fan. This rule simply attaches a runtime filter to the plan that was created while rewriting the delete. We do replace the query but it is pretty much the same plan, just with an extra runtime filter. |
||
| } | ||
|
|
||
| private def buildMatchingRowsPlan( | ||
| relation: DataSourceV2Relation, | ||
| cond: Expression): LogicalPlan = { | ||
|
|
||
| val matchingRowsPlan = Filter(cond, relation) | ||
|
|
||
| // clone the relation and assign new expr IDs to avoid conflicts | ||
| matchingRowsPlan transformUpWithNewOutput { | ||
| case r: DataSourceV2Relation if r eq relation => | ||
| val oldOutput = r.output | ||
| val newOutput = oldOutput.map(_.newInstance()) | ||
| r.copy(output = newOutput) -> oldOutput.zip(newOutput) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: |
||
| } | ||
| } | ||
|
|
||
| private def buildDynamicPruningCond( | ||
| matchingRowsPlan: LogicalPlan, | ||
| buildKeys: Seq[Attribute], | ||
| pruningKeys: Seq[Attribute]): Expression = { | ||
|
|
||
| val buildQuery = Project(buildKeys, matchingRowsPlan) | ||
| val dynamicPruningSubqueries = pruningKeys.zipWithIndex.map { case (key, index) => | ||
| DynamicPruningSubquery(key, buildQuery, buildKeys, index, onlyInBroadcast = false) | ||
| } | ||
| dynamicPruningSubqueries.reduce(And) | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I went back and forth on the name. On one hand, we have dynamic partition pruning. On the other hand, we call it runtime filtering in DS V2. Ideas are welcome.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I also used the
spark.sql.optimizer.runtimeprefix like for runtime Bloom filter joins. There are other runtime-related configs that don't use this prefix so let me know the correct config namespace.