-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-38085][SQL] DataSource V2: Handle DELETE commands for group-based sources #35395
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,89 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.catalyst.analysis | ||
|
|
||
| import org.apache.spark.sql.catalyst.expressions.{EqualNullSafe, Expression, Not} | ||
| import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral | ||
| import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, Filter, LogicalPlan, ReplaceData} | ||
| import org.apache.spark.sql.connector.catalog.{SupportsDelete, SupportsRowLevelOperations, TruncatableTable} | ||
| import org.apache.spark.sql.connector.write.RowLevelOperation.Command.DELETE | ||
| import org.apache.spark.sql.connector.write.RowLevelOperationTable | ||
| import org.apache.spark.sql.errors.QueryCompilationErrors | ||
| import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation | ||
| import org.apache.spark.sql.util.CaseInsensitiveStringMap | ||
|
|
||
| /** | ||
| * A rule that rewrites DELETE operations using plans that operate on individual or groups of rows. | ||
| * | ||
| * If a table implements [[SupportsDelete]] and [[SupportsRowLevelOperations]], this rule will | ||
| * still rewrite the DELETE operation but the optimizer will check whether this particular DELETE | ||
| * statement can be handled by simply passing delete filters to the connector. If so, the optimizer | ||
| * will discard the rewritten plan and will allow the data source to delete using filters. | ||
| */ | ||
| object RewriteDeleteFromTable extends RewriteRowLevelCommand { | ||
|
|
||
| override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { | ||
| case d @ DeleteFromTable(aliasedTable, cond) if d.resolved => | ||
| EliminateSubqueryAliases(aliasedTable) match { | ||
| case DataSourceV2Relation(_: TruncatableTable, _, _, _, _) if cond == TrueLiteral => | ||
| // don't rewrite as the table supports truncation | ||
| d | ||
|
|
||
| case r @ DataSourceV2Relation(t: SupportsRowLevelOperations, _, _, _, _) => | ||
| val table = buildOperationTable(t, DELETE, CaseInsensitiveStringMap.empty()) | ||
| buildReplaceDataPlan(r, table, cond) | ||
|
|
||
| case DataSourceV2Relation(_: SupportsDelete, _, _, _, _) => | ||
| // don't rewrite as the table supports deletes only with filters | ||
| d | ||
|
|
||
| case DataSourceV2Relation(t, _, _, _, _) => | ||
| throw QueryCompilationErrors.tableDoesNotSupportDeletesError(t) | ||
|
|
||
| case _ => | ||
| d | ||
| } | ||
| } | ||
|
|
||
| // build a rewrite plan for sources that support replacing groups of data (e.g. files, partitions) | ||
| private def buildReplaceDataPlan( | ||
| relation: DataSourceV2Relation, | ||
| operationTable: RowLevelOperationTable, | ||
| cond: Expression): ReplaceData = { | ||
|
|
||
| // resolve all required metadata attrs that may be used for grouping data on write | ||
| // for instance, JDBC data source may cluster data by shard/host before writing | ||
| val metadataAttrs = resolveRequiredMetadataAttrs(relation, operationTable.operation) | ||
|
||
|
|
||
| // construct a read relation and include all required metadata columns | ||
| val readRelation = buildRelationWithAttrs(relation, operationTable, metadataAttrs) | ||
|
|
||
| // construct a plan that contains unmatched rows in matched groups that must be carried over | ||
| // such rows do not match the condition but have to be copied over as the source can replace | ||
| // only groups of rows (e.g. if a source supports replacing files, unmatched rows in matched | ||
| // files must be carried over) | ||
aokolnychyi marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| // it is safe to negate the condition here as the predicate pushdown for group-based row-level | ||
| // operations is handled in a special way | ||
| val remainingRowsFilter = Not(EqualNullSafe(cond, TrueLiteral)) | ||
|
||
| val remainingRowsPlan = Filter(remainingRowsFilter, readRelation) | ||
|
|
||
| // build a plan to replace read groups in the table | ||
| val writeRelation = relation.copy(table = operationTable) | ||
| ReplaceData(writeRelation, cond, remainingRowsPlan, relation) | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,71 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.catalyst.analysis | ||
|
|
||
| import scala.collection.mutable | ||
|
|
||
| import org.apache.spark.sql.catalyst.expressions.{AttributeReference, ExprId, V2ExpressionUtils} | ||
| import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan | ||
| import org.apache.spark.sql.catalyst.rules.Rule | ||
| import org.apache.spark.sql.connector.catalog.SupportsRowLevelOperations | ||
| import org.apache.spark.sql.connector.write.{RowLevelOperation, RowLevelOperationInfoImpl, RowLevelOperationTable} | ||
| import org.apache.spark.sql.connector.write.RowLevelOperation.Command | ||
| import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation | ||
| import org.apache.spark.sql.util.CaseInsensitiveStringMap | ||
|
|
||
| trait RewriteRowLevelCommand extends Rule[LogicalPlan] { | ||
|
|
||
| protected def buildOperationTable( | ||
|
||
| table: SupportsRowLevelOperations, | ||
| command: Command, | ||
| options: CaseInsensitiveStringMap): RowLevelOperationTable = { | ||
| val info = RowLevelOperationInfoImpl(command, options) | ||
| val operation = table.newRowLevelOperationBuilder(info).build() | ||
| RowLevelOperationTable(table, operation) | ||
| } | ||
|
|
||
| protected def buildRelationWithAttrs( | ||
| relation: DataSourceV2Relation, | ||
| table: RowLevelOperationTable, | ||
| metadataAttrs: Seq[AttributeReference]): DataSourceV2Relation = { | ||
|
|
||
| val attrs = dedupAttrs(relation.output ++ metadataAttrs) | ||
| relation.copy(table = table, output = attrs) | ||
| } | ||
|
|
||
| protected def dedupAttrs(attrs: Seq[AttributeReference]): Seq[AttributeReference] = { | ||
| val exprIds = mutable.Set.empty[ExprId] | ||
| attrs.flatMap { attr => | ||
| if (exprIds.contains(attr.exprId)) { | ||
| None | ||
| } else { | ||
| exprIds += attr.exprId | ||
| Some(attr) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| protected def resolveRequiredMetadataAttrs( | ||
| relation: DataSourceV2Relation, | ||
| operation: RowLevelOperation): Seq[AttributeReference] = { | ||
|
|
||
| V2ExpressionUtils.resolveRefs[AttributeReference]( | ||
| operation.requiredMetadataAttributes, | ||
| relation) | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,16 +17,18 @@ | |
|
|
||
| package org.apache.spark.sql.catalyst.plans.logical | ||
|
|
||
| import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, FieldName, NamedRelation, PartitionSpec, ResolvedDBObjectName, UnresolvedException} | ||
| import org.apache.spark.sql.{sources, AnalysisException} | ||
| import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, EliminateSubqueryAliases, FieldName, NamedRelation, PartitionSpec, ResolvedDBObjectName, UnresolvedException} | ||
| import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec | ||
| import org.apache.spark.sql.catalyst.catalog.FunctionResource | ||
| import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, Expression, Unevaluable} | ||
| import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, Expression, MetadataAttribute, Unevaluable} | ||
| import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema | ||
| import org.apache.spark.sql.catalyst.trees.BinaryLike | ||
| import org.apache.spark.sql.catalyst.util.CharVarcharUtils | ||
| import org.apache.spark.sql.connector.catalog._ | ||
| import org.apache.spark.sql.connector.expressions.Transform | ||
| import org.apache.spark.sql.connector.write.Write | ||
| import org.apache.spark.sql.connector.write.{RowLevelOperation, RowLevelOperationTable, Write} | ||
| import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation | ||
| import org.apache.spark.sql.types.{BooleanType, DataType, MetadataBuilder, StringType, StructType} | ||
|
|
||
| /** | ||
|
|
@@ -176,6 +178,80 @@ object OverwritePartitionsDynamic { | |
| } | ||
| } | ||
|
|
||
| trait RowLevelWrite extends V2WriteCommand with SupportsSubquery { | ||
| def operation: RowLevelOperation | ||
| def condition: Expression | ||
| def originalTable: NamedRelation | ||
| } | ||
|
|
||
| /** | ||
| * Replace groups of data in an existing table during a row-level operation. | ||
| * | ||
| * This node is constructed in rules that rewrite DELETE, UPDATE, MERGE operations for data sources | ||
| * that can replace groups of data (e.g. files, partitions). | ||
| * | ||
| * @param table a plan that references a row-level operation table | ||
| * @param condition a condition that defines matching groups | ||
| * @param query a query with records that should replace the records that were read | ||
| * @param originalTable a plan for the original table for which the row-level command was triggered | ||
| * @param write a logical write, if already constructed | ||
| */ | ||
| case class ReplaceData( | ||
| table: NamedRelation, | ||
| condition: Expression, | ||
| query: LogicalPlan, | ||
| originalTable: NamedRelation, | ||
| write: Option[Write] = None) extends RowLevelWrite { | ||
|
|
||
| override val isByName: Boolean = false | ||
| override val stringArgs: Iterator[Any] = Iterator(table, query, write) | ||
|
|
||
| override lazy val references: AttributeSet = query.outputSet | ||
|
|
||
| lazy val operation: RowLevelOperation = { | ||
| EliminateSubqueryAliases(table) match { | ||
| case DataSourceV2Relation(RowLevelOperationTable(_, operation), _, _, _, _) => | ||
| operation | ||
| case _ => | ||
| throw new AnalysisException(s"Cannot retrieve row-level operation from $table") | ||
| } | ||
| } | ||
|
|
||
| // the incoming query may include metadata columns | ||
| lazy val dataInput: Seq[Attribute] = { | ||
| query.output.filter { | ||
| case MetadataAttribute(_) => false | ||
| case _ => true | ||
| } | ||
| } | ||
|
|
||
| override def outputResolved: Boolean = { | ||
| assert(table.resolved && query.resolved, | ||
| "`outputResolved` can only be called when `table` and `query` are both resolved.") | ||
|
|
||
| // take into account only incoming data columns and ignore metadata columns in the query | ||
| // they will be discarded after the logical write is built in the optimizer | ||
| // metadata columns may be needed to request a correct distribution or ordering | ||
| // but are not passed back to the data source during writes | ||
|
|
||
| table.skipSchemaResolution || (dataInput.size == table.output.size && | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we really need to check this? the input query is built by spark and is directly reading the table.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It may be redundant in case of DELETE but it will be required for UPDATE and MERGE when the incoming values no longer solely depend on what was read. This will prevent setting nullable values for non-nullable attributes, for instance. |
||
| dataInput.zip(table.output).forall { case (inAttr, outAttr) => | ||
| val outType = CharVarcharUtils.getRawType(outAttr.metadata).getOrElse(outAttr.dataType) | ||
| // names and types must match, nullability must be compatible | ||
| inAttr.name == outAttr.name && | ||
| DataType.equalsIgnoreCompatibleNullability(inAttr.dataType, outType) && | ||
| (outAttr.nullable || !inAttr.nullable) | ||
| }) | ||
| } | ||
|
|
||
| override def withNewQuery(newQuery: LogicalPlan): ReplaceData = copy(query = newQuery) | ||
|
|
||
| override def withNewTable(newTable: NamedRelation): ReplaceData = copy(table = newTable) | ||
|
|
||
| override protected def withNewChildInternal(newChild: LogicalPlan): ReplaceData = { | ||
| copy(query = newChild) | ||
| } | ||
| } | ||
|
|
||
| /** A trait used for logical plan nodes that create or replace V2 table definitions. */ | ||
| trait V2CreateTablePlan extends LogicalPlan { | ||
|
|
@@ -457,6 +533,16 @@ case class DeleteFromTable( | |
| copy(table = newChild) | ||
| } | ||
|
|
||
| /** | ||
| * The logical plan of the DELETE FROM command that can be executed using data source filters. | ||
| * | ||
| * As opposed to [[DeleteFromTable]], this node represents a DELETE operation where the condition | ||
| * was converted into filters and the data source reported that it can handle all of them. | ||
| */ | ||
| case class DeleteFromTableWithFilters( | ||
| table: LogicalPlan, | ||
| condition: Seq[sources.Filter]) extends LeafCommand | ||
|
||
|
|
||
| /** | ||
| * The logical plan of the UPDATE TABLE command. | ||
| */ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should we pass
optionsfrom the V2 relation instead of just usingempty?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not sure. These are options passed into
newRowLevelOperationBuilderand I thought they should come from the SQL operation. For example, if Spark adds a clauseOPTIONSto its SQL for DELETE, UPDATE, MERGE, then these values will be propagated here.