CodingCat
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate2/aggregates.scala‎
Lines changed: 4 additions & 0 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate2/aggregates.scala‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala‎
Lines changed: 9 additions & 2 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala‎
Lines changed: 28 additions & 22 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala‎
Lines changed: 28 additions & 22 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate2/aggregateOperators.scala‎
Lines changed: 38 additions & 26 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate2/aggregateOperators.scala‎
Lines changed: 38 additions & 26 deletions
@@ -58,6 +58,10 @@ private[sql] case object Final extends AggregateMode
  */
 private[sql] case object Complete extends AggregateMode
 
+/**
+ * A place holder expressions used in code-gen, it does not change the corresponding value
+ * in the row.
+ */
 private[sql] case object NoOp extends Expression with Unevaluable {
   override def nullable: Boolean = true
   override def eval(input: InternalRow): Any = {
 
@@ -281,8 +281,15 @@ private[sql] case class EnsureRequirements(sqlContext: SQLContext) extends Rule[
         }
 
         def addSortIfNecessary(child: SparkPlan): SparkPlan = {
-          if (rowOrdering.nonEmpty && child.outputOrdering != rowOrdering) {
-            sqlContext.planner.BasicOperators.getSortOperator(rowOrdering, global = false, child)
+
+          if (rowOrdering.nonEmpty) {
+            // If child.outputOrdering is [a, b] and rowOrdering is [a], we do not need to sort.
+            val minSize = Seq(rowOrdering.size, child.outputOrdering.size).min
+            if (minSize == 0 || rowOrdering.take(minSize) != child.outputOrdering.take(minSize)) {
+              sqlContext.planner.BasicOperators.getSortOperator(rowOrdering, global = false, child)
+            } else {
+              child
+            }
           } else {
             child
           }
 
@@ -203,7 +203,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
     private def planAggregateWithoutDistinct(
         groupingExpressions: Seq[Expression],
         aggregateExpressions: Seq[AggregateExpression2],
-        aggregateFunctionMap: Map[AggregateFunction2, Attribute],
+        aggregateFunctionMap: Map[(AggregateFunction2, Boolean), Attribute],
         resultExpressions: Seq[NamedExpression],
         child: SparkPlan): Seq[SparkPlan] = {
       // 1. Create an Aggregate Operator for partial aggregations.
@@ -241,12 +241,12 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       }
       val finalAggregateAttributes =
         finalAggregateExpressions.map {
-          expr => aggregateFunctionMap(expr.aggregateFunction)
+          expr => aggregateFunctionMap(expr.aggregateFunction, expr.isDistinct)
         }
       val rewrittenResultExpressions = resultExpressions.map { expr =>
         expr.transform {
           case agg: AggregateExpression2 =>
-            aggregateFunctionMap(agg.aggregateFunction).toAttribute
+            aggregateFunctionMap(agg.aggregateFunction, agg.isDistinct).toAttribute
           case expression if groupExpressionMap.contains(expression) =>
             groupExpressionMap(expression).toAttribute
         }.asInstanceOf[NamedExpression]
@@ -266,7 +266,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       groupingExpressions: Seq[Expression],
       functionsWithDistinct: Seq[AggregateExpression2],
       functionsWithoutDistinct: Seq[AggregateExpression2],
-      aggregateFunctionMap: Map[AggregateFunction2, Attribute],
+      aggregateFunctionMap: Map[(AggregateFunction2, Boolean), Attribute],
       resultExpressions: Seq[NamedExpression],
       child: SparkPlan): Seq[SparkPlan] = {
 
@@ -306,7 +306,6 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       val partialAggregateAttributes = partialAggregateExpressions.flatMap { agg =>
         agg.aggregateFunction.bufferAttributes
       }
-      println("namedDistinctColumnExpressions " + namedDistinctColumnExpressions)
       val partialAggregate =
         Aggregate2Sort(
           None: Option[Seq[Expression]],
@@ -323,7 +322,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       }
       val partialMergeAggregateAttributes =
         partialMergeAggregateExpressions.map {
-          expr => aggregateFunctionMap(expr.aggregateFunction)
+          expr => aggregateFunctionMap(expr.aggregateFunction, expr.isDistinct)
         }
       val partialMergeAggregate =
         Aggregate2Sort(
@@ -336,34 +335,41 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
       // 3. Create an Aggregate Operator for partial merge aggregations.
       val finalAggregateExpressions = functionsWithoutDistinct.map {
-        Need to replace the children to distinctColumnAttributes
         case AggregateExpression2(aggregateFunction, mode, _) =>
           AggregateExpression2(aggregateFunction, Final, false)
       }
       val finalAggregateAttributes =
         finalAggregateExpressions.map {
-          expr => aggregateFunctionMap(expr.aggregateFunction)
-        }
-      val completeAggregateExpressions = functionsWithDistinct.map {
-        case AggregateExpression2(aggregateFunction, mode, _) =>
-          AggregateExpression2(aggregateFunction, Complete, false)
-      }
-      val completeAggregateAttributes =
-        completeAggregateExpressions.map {
-          expr => aggregateFunctionMap(expr.aggregateFunction)
+          expr => aggregateFunctionMap(expr.aggregateFunction, expr.isDistinct)
         }
+      val (completeAggregateExpressions, completeAggregateAttributes) = functionsWithDistinct.map {
+        // Children of an AggregateFunction with DISTINCT keyword has already
+        // been evaluated. At here, we need to replace original children
+        // to AttributeReferences.
+        case agg @ AggregateExpression2(aggregateFunction, mode, isDistinct) =>
+          val rewrittenAggregateFunction = aggregateFunction.transformDown {
+            case expr if distinctColumnExpressionMap.contains(expr) =>
+              distinctColumnExpressionMap(expr).toAttribute
+          }.asInstanceOf[AggregateFunction2]
+          // We rewrite the aggregate function to a non-distinct aggregation because
+          // its input will have distinct arguments.
+          val rewrittenAggregateExpression =
+            AggregateExpression2(rewrittenAggregateFunction, Complete, false)
+
+          val aggregateFunctionAttribute = aggregateFunctionMap(agg.aggregateFunction, isDistinct)
+          (rewrittenAggregateExpression -> aggregateFunctionAttribute)
+      }.unzip
 
       val rewrittenResultExpressions = resultExpressions.map { expr =>
         expr.transform {
           case agg: AggregateExpression2 =>
-            aggregateFunctionMap(agg.aggregateFunction).toAttribute
+            aggregateFunctionMap(agg.aggregateFunction, agg.isDistinct).toAttribute
           case expression if groupExpressionMap.contains(expression) =>
             groupExpressionMap(expression).toAttribute
-          case expression if distinctColumnExpressionMap.contains(expression) =>
-            distinctColumnExpressionMap(expression).toAttribute
         }.asInstanceOf[NamedExpression]
       }
       val finalAndCompleteAggregate = FinalAndCompleteAggregate2Sort(
+        namedGroupingAttributes ++ distinctColumnAttributes,
         namedGroupingAttributes,
         finalAggregateExpressions,
         finalAggregateAttributes,
@@ -378,7 +384,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case logical.Aggregate(groupingExpressions, resultExpressions, child)
         if sqlContext.conf.useSqlAggregate2 =>
-        // 1. Extracts all distinct aggregate expressions from the resultExpressions.
+        // Extracts all distinct aggregate expressions from the resultExpressions.
         val aggregateExpressions = resultExpressions.flatMap { expr =>
           expr.collect {
             case agg: AggregateExpression2 => agg
@@ -388,12 +394,12 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         // to the corresponding attribute of the function.
         val aggregateFunctionMap = aggregateExpressions.map { agg =>
           val aggregateFunction = agg.aggregateFunction
-          aggregateFunction -> Alias(aggregateFunction, aggregateFunction.toString)().toAttribute
+          (aggregateFunction, agg.isDistinct) ->
+            Alias(aggregateFunction, aggregateFunction.toString)().toAttribute
         }.toMap
 
         val (functionsWithDistinct, functionsWithoutDistinct) =
           aggregateExpressions.partition(_.isDistinct)
-        println("functionsWithDistinct " + functionsWithDistinct)
         if (functionsWithDistinct.map(_.aggregateFunction.children).distinct.length > 1) {
           // This is a sanity check. We should not reach here since we check the same thing in
           // CheckAggregateFunction.
 
@@ -34,9 +34,6 @@ case class Aggregate2Sort(
     child: SparkPlan)
   extends UnaryNode {
 
-  /** Indicates if this operator is for partial aggregations. */
-
-
   override def references: AttributeSet = {
     val referencesInResults =
       AttributeSet(resultExpressions.flatMap(_.references)) -- AttributeSet(aggregateAttributes)
@@ -55,8 +52,18 @@ case class Aggregate2Sort(
     }
   }
 
-  override def requiredChildOrdering: Seq[Seq[SortOrder]] =
+  override def requiredChildOrdering: Seq[Seq[SortOrder]] = {
+    // TODO: We should not sort the input rows if they are just in reversed order.
     groupingExpressions.map(SortOrder(_, Ascending)) :: Nil
+  }
+
+  override def outputOrdering: Seq[SortOrder] = {
+    // It is possible that the child.outputOrdering starts with the required
+    // ordering expressions (e.g. we require [a] as the sort expression and the
+    // child's outputOrdering is [a, b]). We can only guarantee the output rows
+    // are sorted by values of groupingExpressions.
+    groupingExpressions.map(SortOrder(_, Ascending))
+  }
 
   override def output: Seq[Attribute] = resultExpressions.map(_.toAttribute)
 
@@ -69,42 +76,46 @@ case class Aggregate2Sort(
           child.output,
           iter)
       } else {
-        val partialAggregation: Boolean = {
+        val aggregationIterator: SortAggregationIterator = {
           aggregateExpressions.map(_.mode).distinct.toList match {
-            case Partial :: Nil => true
-            case Final :: Nil => false
-            TODO: HANDLE PARTIAL MERGE
+            case Partial :: Nil =>
+              new PartialSortAggregationIterator(
+                groupingExpressions,
+                aggregateExpressions,
+                newMutableProjection,
+                child.output,
+                iter)
+            case PartialMerge :: Nil =>
+              new PartialMergeSortAggregationIterator(
+                groupingExpressions,
+                aggregateExpressions,
+                newMutableProjection,
+                child.output,
+                iter)
+            case Final :: Nil =>
+              new FinalSortAggregationIterator(
+                groupingExpressions,
+                aggregateExpressions,
+                aggregateAttributes,
+                resultExpressions,
+                newMutableProjection,
+                child.output,
+                iter)
             case other =>
               sys.error(
                 s"Could not evaluate ${aggregateExpressions} because we do not support evaluate " +
                   s"modes $other in this operator.")
           }
         }
-        val aggregationIterator =
-          if (partialAggregation) {
-            new PartialSortAggregationIterator(
-              groupingExpressions,
-              aggregateExpressions,
-              newMutableProjection,
-              child.output,
-              iter)
-          } else {
-            new FinalSortAggregationIterator(
-              groupingExpressions,
-              aggregateExpressions,
-              aggregateAttributes,
-              resultExpressions,
-              newMutableProjection,
-              child.output,
-              iter)
-          }
+
         aggregationIterator
       }
     }
   }
 }
 
 case class FinalAndCompleteAggregate2Sort(
+    previousGroupingExpressions: Seq[NamedExpression],
     groupingExpressions: Seq[NamedExpression],
     finalAggregateExpressions: Seq[AggregateExpression2],
     finalAggregateAttributes: Seq[Attribute],
@@ -143,6 +154,7 @@ case class FinalAndCompleteAggregate2Sort(
     child.execute().mapPartitions { iter =>
 
       new FinalAndCompleteSortAggregationIterator(
+        previousGroupingExpressions.length,
         groupingExpressions,
         finalAggregateExpressions,
         finalAggregateAttributes,