-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-21743][SQL][follow-up] top-most limit should not cause memory leak #18993
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -63,29 +63,24 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] { | |
| */ | ||
| object SpecialLimits extends Strategy { | ||
| override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { | ||
| case logical.ReturnAnswer(rootPlan) => rootPlan match { | ||
| case logical.Limit(IntegerLiteral(limit), logical.Sort(order, true, child)) => | ||
| execution.TakeOrderedAndProjectExec(limit, order, child.output, planLater(child)) :: Nil | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. kinda unrelated, remove these |
||
| case logical.Limit( | ||
| IntegerLiteral(limit), | ||
| logical.Project(projectList, logical.Sort(order, true, child))) => | ||
| execution.TakeOrderedAndProjectExec( | ||
| limit, order, projectList, planLater(child)) :: Nil | ||
| case logical.Limit(IntegerLiteral(limit), child) => | ||
| // Normally wrapping child with `LocalLimitExec` here is a no-op, because | ||
| // `CollectLimitExec.executeCollect` will call `LocalLimitExec.executeTake`, which | ||
| // calls `child.executeTake`. If child supports whole stage codegen, adding this | ||
| // `LocalLimitExec` can stop the processing of whole stage codegen and trigger the | ||
| // resource releasing work, after we consume `limit` rows. | ||
| execution.CollectLimitExec(limit, LocalLimitExec(limit, planLater(child))) :: Nil | ||
| case ReturnAnswer(rootPlan) => rootPlan match { | ||
| case Limit(IntegerLiteral(limit), Sort(order, true, child)) => | ||
| TakeOrderedAndProjectExec(limit, order, child.output, planLater(child)) :: Nil | ||
| case Limit(IntegerLiteral(limit), Project(projectList, Sort(order, true, child))) => | ||
| TakeOrderedAndProjectExec(limit, order, projectList, planLater(child)) :: Nil | ||
| case Limit(IntegerLiteral(limit), child) => | ||
| // With whole stage codegen, Spark releases resources only when all the output data of the | ||
| // query plan are consumed. It's possible that `CollectLimitExec` only consumes a little | ||
| // data from child plan and finishes the query without releasing resources. Here we wrap | ||
| // the child plan with `LocalLimitExec`, to stop the processing of whole stage codegen and | ||
| // trigger the resource releasing work, after we consume `limit` rows. | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comments updated. |
||
| CollectLimitExec(limit, LocalLimitExec(limit, planLater(child))) :: Nil | ||
| case other => planLater(other) :: Nil | ||
| } | ||
| case logical.Limit(IntegerLiteral(limit), logical.Sort(order, true, child)) => | ||
| execution.TakeOrderedAndProjectExec(limit, order, child.output, planLater(child)) :: Nil | ||
| case logical.Limit( | ||
| IntegerLiteral(limit), logical.Project(projectList, logical.Sort(order, true, child))) => | ||
| execution.TakeOrderedAndProjectExec( | ||
| limit, order, projectList, planLater(child)) :: Nil | ||
| case Limit(IntegerLiteral(limit), Sort(order, true, child)) => | ||
| TakeOrderedAndProjectExec(limit, order, child.output, planLater(child)) :: Nil | ||
| case Limit(IntegerLiteral(limit), Project(projectList, Sort(order, true, child))) => | ||
| TakeOrderedAndProjectExec(limit, order, projectList, planLater(child)) :: Nil | ||
| case _ => Nil | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -54,14 +54,6 @@ trait BaseLimitExec extends UnaryExecNode with CodegenSupport { | |
| val limit: Int | ||
| override def output: Seq[Attribute] = child.output | ||
|
|
||
| // Do not enable whole stage codegen for a single limit. | ||
| override def supportCodegen: Boolean = child match { | ||
| case plan: CodegenSupport => plan.supportCodegen | ||
| case _ => false | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is wrong, we may have more operators above |
||
| } | ||
|
|
||
| override def executeTake(n: Int): Array[InternalRow] = child.executeTake(math.min(n, limit)) | ||
|
|
||
| protected override def doExecute(): RDD[InternalRow] = child.execute().mapPartitions { iter => | ||
| iter.take(limit) | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is to fix
SQLQuerySuite.SPARK-19650: An action on a Command should not trigger a Spark job, limit over local relation should not trigger a spark job.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This kinda violates the idea that we shouldn't rely on optimization for correctness, but I suppose this is ok.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
technically this is not about correctness,
An action on a Command should not trigger a Spark jobis also kind of optimization.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, you are right about that.