-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[WIP][SPARK-21896][SQL] Fix Stack Overflow when window function is nested inside an aggregate function #19193
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1727,16 +1727,23 @@ class Analyzer( | |
| * it into the plan tree. | ||
| */ | ||
| object ExtractWindowExpressions extends Rule[LogicalPlan] { | ||
| private def hasWindowFunction(projectList: Seq[NamedExpression]): Boolean = | ||
| projectList.exists(hasWindowFunction) | ||
| private def hasWindowFunction(exprs: Seq[Expression]): Boolean = | ||
| exprs.exists(hasWindowFunction) | ||
|
|
||
| private def hasWindowFunction(expr: NamedExpression): Boolean = { | ||
| private def hasWindowFunction(expr: Expression): Boolean = { | ||
| expr.find { | ||
| case window: WindowExpression => true | ||
| case _ => false | ||
| }.isDefined | ||
| } | ||
|
|
||
| private def containsAggregateFunctionWithWindowExpression(exprs: Seq[Expression]): Boolean = { | ||
| exprs.exists(expr => expr.find { | ||
| case AggregateExpression(aggFunc, _, _, _) if hasWindowFunction(aggFunc.children) => true | ||
| case _ => false | ||
| }.isDefined) | ||
| } | ||
|
|
||
| /** | ||
| * From a Seq of [[NamedExpression]]s, extract expressions containing window expressions and | ||
| * other regular expressions that do not contain any window expression. For example, for | ||
|
|
@@ -1920,7 +1927,34 @@ class Analyzer( | |
|
|
||
| case p: LogicalPlan if !p.childrenResolved => p | ||
|
|
||
| // Aggregate without Having clause. | ||
| // Extract window expressions from aggregate functions. There might be an aggregate whose | ||
| // aggregate function contains a window expression as a child, which we need to extract. | ||
| // e.g., df.groupBy().agg(max(rank().over(window)) | ||
| case a @ Aggregate(groupingExprs, aggregateExprs, child) | ||
| if containsAggregateFunctionWithWindowExpression(aggregateExprs) && | ||
| a.expressions.forall(_.resolved) => | ||
|
|
||
| val windowExprAliases = new ArrayBuffer[NamedExpression]() | ||
| val newAggregateExprs = aggregateExprs.map { expr => | ||
| expr.transform { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The code below assumes that there are no window aggregates on top of a regular aggregate, and it will push the regular aggregate into the underlying window. An example of this:
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for looking into this. I am not sure I fully understood "it will push the regular aggregate into the underlying window". Could you, please, elaborate? This is what I tried: It produced the following plans: The result was: So, we have a window expression on top of a regular aggregate in
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, do you actually mean smth like this? |
||
| case aggExpr @ AggregateExpression(func, _, _, _) if hasWindowFunction(func.children) => | ||
| val newFuncChildren = func.children.map { funcExpr => | ||
| funcExpr.transform { | ||
| case we: WindowExpression => | ||
| // Replace window expressions with aliases to them | ||
| val windowExprAlias = Alias(we, s"_we${windowExprAliases.length}")() | ||
| windowExprAliases += windowExprAlias | ||
| windowExprAlias.toAttribute | ||
| } | ||
| } | ||
| val newFunc = func.withNewChildren(newFuncChildren).asInstanceOf[AggregateFunction] | ||
| aggExpr.copy(aggregateFunction = newFunc) | ||
| }.asInstanceOf[NamedExpression] | ||
| } | ||
| val window = addWindow(windowExprAliases, child) | ||
| // TODO do we also need a projection here? | ||
| Aggregate(groupingExprs, newAggregateExprs, window) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No you don't need a Project. |
||
|
|
||
| case a @ Aggregate(groupingExprs, aggregateExprs, child) | ||
| if hasWindowFunction(aggregateExprs) && | ||
| a.expressions.forall(_.resolved) => | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To be sure: What happens if there is a window function on-top of the aggregate function? This gets resolved in two passes right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a test case for this scenario?