-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-46378][SQL][FOLLOWUP] Do not rely on TreeNodeTag in Project #44429
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -211,7 +211,8 @@ abstract class Optimizer(catalogManager: CatalogManager) | |
| Batch("Join Reorder", FixedPoint(1), | ||
| CostBasedJoinReorder) :+ | ||
| Batch("Eliminate Sorts", Once, | ||
| EliminateSorts) :+ | ||
| EliminateSorts, | ||
| RemoveRedundantSorts) :+ | ||
| Batch("Decimal Optimizations", fixedPoint, | ||
| DecimalAggregates) :+ | ||
| // This batch must run after "Decimal Optimizations", as that one may change the | ||
|
|
@@ -769,11 +770,11 @@ object LimitPushDown extends Rule[LogicalPlan] { | |
| LocalLimit(exp, project.copy(child = pushLocalLimitThroughJoin(exp, join))) | ||
| // Push down limit 1 through Aggregate and turn Aggregate into Project if it is group only. | ||
| case Limit(le @ IntegerLiteral(1), a: Aggregate) if a.groupOnly => | ||
| val project = Project(a.aggregateExpressions, LocalLimit(le, a.child)) | ||
| project.setTagValue(Project.dataOrderIrrelevantTag, ()) | ||
| Limit(le, project) | ||
| val newAgg = EliminateSorts(a.copy(child = LocalLimit(le, a.child))).asInstanceOf[Aggregate] | ||
| Limit(le, Project(newAgg.aggregateExpressions, newAgg.child)) | ||
| case Limit(le @ IntegerLiteral(1), p @ Project(_, a: Aggregate)) if a.groupOnly => | ||
| Limit(le, p.copy(child = Project(a.aggregateExpressions, LocalLimit(le, a.child)))) | ||
| val newAgg = EliminateSorts(a.copy(child = LocalLimit(le, a.child))).asInstanceOf[Aggregate] | ||
| Limit(le, p.copy(child = Project(newAgg.aggregateExpressions, newAgg.child))) | ||
| // Merge offset value and limit value into LocalLimit and pushes down LocalLimit through Offset. | ||
| case LocalLimit(le, Offset(oe, grandChild)) => | ||
| Offset(oe, LocalLimit(Add(le, oe), grandChild)) | ||
|
|
@@ -1555,38 +1556,30 @@ object CombineFilters extends Rule[LogicalPlan] with PredicateHelper { | |
| * Note that changes in the final output ordering may affect the file size (SPARK-32318). | ||
| * This rule handles the following cases: | ||
| * 1) if the sort order is empty or the sort order does not have any reference | ||
| * 2) if the Sort operator is a local sort and the child is already sorted | ||
| * 3) if there is another Sort operator separated by 0...n Project, Filter, Repartition or | ||
| * 2) if there is another Sort operator separated by 0...n Project, Filter, Repartition or | ||
| * RepartitionByExpression, RebalancePartitions (with deterministic expressions) operators | ||
| * 4) if the Sort operator is within Join separated by 0...n Project, Filter, Repartition or | ||
| * 3) if the Sort operator is within Join separated by 0...n Project, Filter, Repartition or | ||
| * RepartitionByExpression, RebalancePartitions (with deterministic expressions) operators only | ||
| * and the Join condition is deterministic | ||
| * 5) if the Sort operator is within GroupBy separated by 0...n Project, Filter, Repartition or | ||
| * 4) if the Sort operator is within GroupBy separated by 0...n Project, Filter, Repartition or | ||
| * RepartitionByExpression, RebalancePartitions (with deterministic expressions) operators only | ||
| * and the aggregate function is order irrelevant | ||
| */ | ||
| object EliminateSorts extends Rule[LogicalPlan] { | ||
| def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning( | ||
| _.containsPattern(SORT))(applyLocally) | ||
|
|
||
| private val applyLocally: PartialFunction[LogicalPlan, LogicalPlan] = { | ||
| def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithPruning(_.containsPattern(SORT)) { | ||
| case s @ Sort(orders, _, child) if orders.isEmpty || orders.exists(_.child.foldable) => | ||
| val newOrders = orders.filterNot(_.child.foldable) | ||
| if (newOrders.isEmpty) { | ||
| applyLocally.lift(child).getOrElse(child) | ||
| child | ||
| } else { | ||
| s.copy(order = newOrders) | ||
| } | ||
| case Sort(orders, false, child) if SortOrder.orderingSatisfies(child.outputOrdering, orders) => | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the expensive part as it need to calculate the ordering of children. |
||
| applyLocally.lift(child).getOrElse(child) | ||
| case s @ Sort(_, global, child) => s.copy(child = recursiveRemoveSort(child, global)) | ||
| case j @ Join(originLeft, originRight, _, cond, _) if cond.forall(_.deterministic) => | ||
| j.copy(left = recursiveRemoveSort(originLeft, true), | ||
| right = recursiveRemoveSort(originRight, true)) | ||
| case g @ Aggregate(_, aggs, originChild) if isOrderIrrelevantAggs(aggs) => | ||
| g.copy(child = recursiveRemoveSort(originChild, true)) | ||
| case p: Project if p.getTagValue(Project.dataOrderIrrelevantTag).isDefined => | ||
| p.copy(child = recursiveRemoveSort(p.child, true)) | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -1602,12 +1595,6 @@ object EliminateSorts extends Rule[LogicalPlan] { | |
| plan match { | ||
| case Sort(_, global, child) if canRemoveGlobalSort || !global => | ||
| recursiveRemoveSort(child, canRemoveGlobalSort) | ||
| case Sort(sortOrder, true, child) => | ||
| // For this case, the upper sort is local so the ordering of present sort is unnecessary, | ||
| // so here we only preserve its output partitioning using `RepartitionByExpression`. | ||
| // We should use `None` as the optNumPartitions so AQE can coalesce shuffle partitions. | ||
| // This behavior is same with original global sort. | ||
| RepartitionByExpression(sortOrder, recursiveRemoveSort(child, true), None) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, previously this rule looks into this global Sort's child to remove local and global Sort recursively without condition. But in the new case s @ Sort(orders, true, child) =>
val newChild = recursiveRemoveSort(child, optimizeGlobalSort = false)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Said there are Sorts like We reach: case s @ Sort(_, global, child) => s.copy(child = recursiveRemoveSort(child, global))Previously we can get rid of the middle global Sort and the bottom local Sort by case Sort(_, global, child) if canRemoveGlobalSort || !global =>
recursiveRemoveSort(child, canRemoveGlobalSort)How does case s @ Sort(_, global, child) => s.copy(child = recursiveRemoveSort(child, global))But in Then the bottom local Sort under the rewritten Do I miss or misread something?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After running These two rules are in the same batch |
||
| case other if canEliminateSort(other) => | ||
| other.withNewChildren(other.children.map(c => recursiveRemoveSort(c, canRemoveGlobalSort))) | ||
| case other if canEliminateGlobalSort(other) => | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.catalyst.optimizer | ||
|
|
||
| import org.apache.spark.sql.catalyst.expressions.SortOrder | ||
| import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, RepartitionByExpression, Sort} | ||
| import org.apache.spark.sql.catalyst.rules.Rule | ||
| import org.apache.spark.sql.catalyst.trees.TreePattern.SORT | ||
|
|
||
| /** | ||
| * Remove redundant local [[Sort]] from the logical plan if its child is already sorted, and also | ||
| * rewrite global [[Sort]] under local [[Sort]] into [[RepartitionByExpression]]. | ||
| */ | ||
| object RemoveRedundantSorts extends Rule[LogicalPlan] { | ||
| override def apply(plan: LogicalPlan): LogicalPlan = { | ||
| recursiveRemoveSort(plan, optimizeGlobalSort = false) | ||
| } | ||
|
|
||
| private def recursiveRemoveSort(plan: LogicalPlan, optimizeGlobalSort: Boolean): LogicalPlan = { | ||
| if (!plan.containsPattern(SORT)) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shall we pull out this to
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should put it here to skip some children of a plan node.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When we traverse down a tree, we still need to apply the skipping for each plan node that has more than one children.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh I see, make sense. Here we traverse the tree manually |
||
| return plan | ||
| } | ||
| plan match { | ||
| case s @ Sort(orders, false, child) => | ||
| if (SortOrder.orderingSatisfies(child.outputOrdering, orders)) { | ||
| recursiveRemoveSort(child, optimizeGlobalSort = false) | ||
| } else { | ||
| s.withNewChildren(Seq(recursiveRemoveSort(child, optimizeGlobalSort = true))) | ||
| } | ||
|
|
||
| case s @ Sort(orders, true, child) => | ||
| val newChild = recursiveRemoveSort(child, optimizeGlobalSort = false) | ||
| if (optimizeGlobalSort) { | ||
| // For this case, the upper sort is local so the ordering of present sort is unnecessary, | ||
| // so here we only preserve its output partitioning using `RepartitionByExpression`. | ||
| // We should use `None` as the optNumPartitions so AQE can coalesce shuffle partitions. | ||
| // This behavior is same with original global sort. | ||
| RepartitionByExpression(orders, newChild, None) | ||
| } else { | ||
| s.withNewChildren(Seq(newChild)) | ||
| } | ||
|
|
||
| case _ => | ||
| plan.withNewChildren(plan.children.map(recursiveRemoveSort(_, optimizeGlobalSort = false))) | ||
| } | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This part is still in
EliminateSorts, soEliminateSortsis good enough forLimitPushDown