|
17 | 17 |
|
18 | 18 | package org.apache.spark.sql.catalyst.analysis |
19 | 19 |
|
| 20 | +import scala.collection.mutable.ArrayBuffer |
| 21 | + |
20 | 22 | import org.apache.spark.util.collection.OpenHashSet |
21 | 23 | import org.apache.spark.sql.AnalysisException |
22 | 24 | import org.apache.spark.sql.catalyst.expressions._ |
@@ -61,6 +63,7 @@ class Analyzer( |
61 | 63 | ResolveGenerate :: |
62 | 64 | ImplicitGenerate :: |
63 | 65 | ResolveFunctions :: |
| 66 | + ResolveWindowFunction :: |
64 | 67 | GlobalAggregates :: |
65 | 68 | UnresolvedHavingClauseAttributes :: |
66 | 69 | TrimGroupingAliases :: |
@@ -529,6 +532,165 @@ class Analyzer( |
529 | 532 | makeGeneratorOutput(p.generator, p.generatorOutput), p.child) |
530 | 533 | } |
531 | 534 | } |
| 535 | + |
| 536 | + object ResolveWindowFunction extends Rule[LogicalPlan] { |
| 537 | + def hasWindowFunction(projectList: Seq[NamedExpression]): Boolean = |
| 538 | + projectList.exists(hasWindowFunction) |
| 539 | + |
| 540 | + def hasWindowFunction(expr: NamedExpression): Boolean = { |
| 541 | + expr.find { |
| 542 | + case window: WindowExpression => true |
| 543 | + case _ => false |
| 544 | + }.isDefined |
| 545 | + } |
| 546 | + |
| 547 | + /** |
| 548 | + * From a Seq of [[NamedExpression]]s, extract window expressions and |
| 549 | + * other regular expressions. |
| 550 | + */ |
| 551 | + def extract( |
| 552 | + expressions: Seq[NamedExpression]): (Seq[NamedExpression], Seq[NamedExpression]) = { |
| 553 | + val (windowExpressions, regularExpressions) = expressions.partition(hasWindowFunction) |
| 554 | + // Extract expressions which in windowExpressions but not in regularExpressions. |
| 555 | + val extractedExprBuffer = new ArrayBuffer[NamedExpression]() |
| 556 | + def extractExpr(expr: Expression): Expression = expr match { |
| 557 | + case ne: NamedExpression => |
| 558 | + // If a named expression is not in regularExpressions, add iut |
| 559 | + val missingExpr = |
| 560 | + AttributeSet(Seq(expr)) -- (regularExpressions ++ extractedExprBuffer) |
| 561 | + if (missingExpr.nonEmpty) { |
| 562 | + extractedExprBuffer += ne |
| 563 | + } |
| 564 | + ne.toAttribute |
| 565 | + case e: Expression if e.foldable => |
| 566 | + e // No need to create an attribute reference if it will be evaluated as a Literal. |
| 567 | + case e: Expression => |
| 568 | + val withName = Alias(e, s"_w${extractedExprBuffer.length}")() |
| 569 | + extractedExprBuffer += withName |
| 570 | + withName.toAttribute |
| 571 | + } |
| 572 | + |
| 573 | + val newWindowExpressions = windowExpressions.map { |
| 574 | + _.transform { |
| 575 | + case wf : WindowFunction => |
| 576 | + // Extracts children expressions of a WindowFunction. |
| 577 | + val newChildren = wf.children.map(extractExpr(_)) |
| 578 | + wf.withNewChildren(newChildren) |
| 579 | + case wsc @ WindowSpecDefinition(partitionSpec, orderSpec, _) => |
| 580 | + // Extracts expressions from the partition spec and order spec. |
| 581 | + val newPartitionSpec = partitionSpec.map(extractExpr(_)) |
| 582 | + val newOrderSpec = orderSpec.map { so => |
| 583 | + val newChild = extractExpr(so.child) |
| 584 | + so.copy(child = newChild) |
| 585 | + } |
| 586 | + wsc.copy(partitionSpec = newPartitionSpec, orderSpec = newOrderSpec) |
| 587 | + case agg: AggregateExpression => |
| 588 | + // We also need to take care aggregate expressions. |
| 589 | + val withName = Alias(agg, s"_w${extractedExprBuffer.length}")() |
| 590 | + extractedExprBuffer += withName |
| 591 | + withName.toAttribute |
| 592 | + }.asInstanceOf[NamedExpression] |
| 593 | + } |
| 594 | + (newWindowExpressions, regularExpressions ++ extractedExprBuffer) |
| 595 | + } |
| 596 | + |
| 597 | + /** |
| 598 | + * Add operators for Window Functions. Every Window operator handle a single Window Spec. |
| 599 | + */ |
| 600 | + def addWindow(windowExpressions: Seq[NamedExpression], child: LogicalPlan): LogicalPlan = { |
| 601 | + // First, we group window expressions based on their Window Spec. |
| 602 | + val groupedWindowExpression = windowExpressions.groupBy { expr => |
| 603 | + val windowExpression = expr.find { |
| 604 | + case window: WindowExpression => true |
| 605 | + case other => false |
| 606 | + }.map(_.asInstanceOf[WindowExpression].windowSpec) |
| 607 | + windowExpression.getOrElse( |
| 608 | + failAnalysis(s"$windowExpressions does not have any WindowExpression.")) |
| 609 | + }.toSeq |
| 610 | + |
| 611 | + // For every Window Spec, add a Window Operator. |
| 612 | + var currentChild = child |
| 613 | + var i = 0 |
| 614 | + while (i < groupedWindowExpression.size) { |
| 615 | + val (windowSpec, windowExpressions) = groupedWindowExpression(i) |
| 616 | + currentChild = Window(currentChild.output, windowExpressions, windowSpec, currentChild) |
| 617 | + |
| 618 | + i += 1 |
| 619 | + } |
| 620 | + |
| 621 | + currentChild |
| 622 | + } |
| 623 | + |
| 624 | + /** |
| 625 | + * We have to use transformDown at here to make sure the rule of |
| 626 | + * "Aggregate with Having clause" will be triggered. |
| 627 | + */ |
| 628 | + def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { |
| 629 | + // Fill WindowSpecDefinitions. This one work with unresolved children. |
| 630 | + case WithWindowDefinition(windowDefinitions, child) => |
| 631 | + child.transform { |
| 632 | + case plan => plan.transformExpressions { |
| 633 | + case UnresolvedWindowExpression(c, WindowSpecReference(windowName)) => |
| 634 | + val errorMessage = |
| 635 | + s"Window specification $windowName is not defined in the WINDOW clause." |
| 636 | + val windowSpecDefinition = |
| 637 | + windowDefinitions |
| 638 | + .get(windowName) |
| 639 | + .getOrElse(failAnalysis(errorMessage)) |
| 640 | + WindowExpression(c, windowSpecDefinition) |
| 641 | + } |
| 642 | + } |
| 643 | + |
| 644 | + // Aggregate with Having clause |
| 645 | + case f @ Filter(condition, a @ Aggregate(groupingExprs, aggregateExprs, child)) |
| 646 | + if child.resolved && |
| 647 | + hasWindowFunction(aggregateExprs) && |
| 648 | + !a.expressions.exists(!_.resolved) => |
| 649 | + val (windowExpressions, aggregateExpressions) = extract(aggregateExprs) |
| 650 | + // Create an Aggregate operator to evaluate aggregation functions. |
| 651 | + val withAggregate = Aggregate(groupingExprs, aggregateExpressions, child) |
| 652 | + // Add a Filter operator for conditions in the Having clause. |
| 653 | + val withFilter = Filter(condition, withAggregate) |
| 654 | + val withWindow = addWindow(windowExpressions, withFilter) |
| 655 | + |
| 656 | + // Finally, generate output columns according to the original projectList. |
| 657 | + val finalProjectList = aggregateExprs.map (_.toAttribute) |
| 658 | + Project(finalProjectList, withWindow) |
| 659 | + |
| 660 | + case p: LogicalPlan if !p.childrenResolved => p |
| 661 | + |
| 662 | + // Aggregate without Having clause |
| 663 | + case a @ Aggregate(groupingExprs, aggregateExprs, child) |
| 664 | + if hasWindowFunction(aggregateExprs) && |
| 665 | + !a.expressions.exists(!_.resolved) => |
| 666 | + val (windowExpressions, aggregateExpressions) = extract(aggregateExprs) |
| 667 | + |
| 668 | + // Create an Aggregate operator to evaluate aggregation functions. |
| 669 | + val withAggregate = Aggregate(groupingExprs, aggregateExpressions, child) |
| 670 | + // Add Window operators. |
| 671 | + val withWindow = addWindow(windowExpressions, withAggregate) |
| 672 | + |
| 673 | + // Finally, generate output columns according to the original projectList. |
| 674 | + val finalProjectList = aggregateExprs.map (_.toAttribute) |
| 675 | + Project(finalProjectList, withWindow) |
| 676 | + |
| 677 | + // We only extract Window Expressions after all expressions of the Project |
| 678 | + // have been resolved. |
| 679 | + case p @ Project(projectList, child) |
| 680 | + if hasWindowFunction(projectList) && !p.expressions.exists(!_.resolved) => |
| 681 | + val (windowExpressions, regularExpressions) = extract(projectList) |
| 682 | + |
| 683 | + // We add a project to get all needed expressions of window expressions in the |
| 684 | + // original projectList. |
| 685 | + val withProject = Project(regularExpressions, child) |
| 686 | + // Add Window operators. |
| 687 | + val withWindow = addWindow(windowExpressions, withProject) |
| 688 | + |
| 689 | + // Finally, generate output columns according to the original projectList. |
| 690 | + val finalProjectList = projectList.map (_.toAttribute) |
| 691 | + Project(finalProjectList, withWindow) |
| 692 | + } |
| 693 | + } |
532 | 694 | } |
533 | 695 |
|
534 | 696 | /** |
|
0 commit comments