Skip to content

Commit 374ec3b

Browse files
author
Davies Liu
committed
remove prepare
1 parent 0451b00 commit 374ec3b

File tree

6 files changed

+69
-261
lines changed

6 files changed

+69
-261
lines changed

core/src/main/scala/org/apache/spark/rdd/MapPartitionsWithPreparationRDD.scala

Lines changed: 0 additions & 66 deletions
This file was deleted.

core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -73,16 +73,6 @@ private[spark] abstract class ZippedPartitionsBaseRDD[V: ClassTag](
7373
super.clearDependencies()
7474
rdds = null
7575
}
76-
77-
/**
78-
* Call the prepare method of every parent that has one.
79-
* This is needed for reserving execution memory in advance.
80-
*/
81-
protected def tryPrepareParents(): Unit = {
82-
rdds.collect {
83-
case rdd: MapPartitionsWithPreparationRDD[_, _, _] => rdd.prepare()
84-
}
85-
}
8676
}
8777

8878
private[spark] class ZippedPartitionsRDD2[A: ClassTag, B: ClassTag, V: ClassTag](
@@ -94,7 +84,6 @@ private[spark] class ZippedPartitionsRDD2[A: ClassTag, B: ClassTag, V: ClassTag]
9484
extends ZippedPartitionsBaseRDD[V](sc, List(rdd1, rdd2), preservesPartitioning) {
9585

9686
override def compute(s: Partition, context: TaskContext): Iterator[V] = {
97-
tryPrepareParents()
9887
val partitions = s.asInstanceOf[ZippedPartitionsPartition].partitions
9988
f(rdd1.iterator(partitions(0), context), rdd2.iterator(partitions(1), context))
10089
}
@@ -118,7 +107,6 @@ private[spark] class ZippedPartitionsRDD3
118107
extends ZippedPartitionsBaseRDD[V](sc, List(rdd1, rdd2, rdd3), preservesPartitioning) {
119108

120109
override def compute(s: Partition, context: TaskContext): Iterator[V] = {
121-
tryPrepareParents()
122110
val partitions = s.asInstanceOf[ZippedPartitionsPartition].partitions
123111
f(rdd1.iterator(partitions(0), context),
124112
rdd2.iterator(partitions(1), context),
@@ -146,7 +134,6 @@ private[spark] class ZippedPartitionsRDD4
146134
extends ZippedPartitionsBaseRDD[V](sc, List(rdd1, rdd2, rdd3, rdd4), preservesPartitioning) {
147135

148136
override def compute(s: Partition, context: TaskContext): Iterator[V] = {
149-
tryPrepareParents()
150137
val partitions = s.asInstanceOf[ZippedPartitionsPartition].partitions
151138
f(rdd1.iterator(partitions(0), context),
152139
rdd2.iterator(partitions(1), context),

core/src/test/scala/org/apache/spark/rdd/MapPartitionsWithPreparationRDDSuite.scala

Lines changed: 0 additions & 66 deletions
This file was deleted.

sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregate.scala

Lines changed: 29 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,14 @@
1717

1818
package org.apache.spark.sql.execution.aggregate
1919

20-
import org.apache.spark.TaskContext
21-
import org.apache.spark.rdd.{MapPartitionsWithPreparationRDD, RDD}
20+
import org.apache.spark.rdd.RDD
2221
import org.apache.spark.sql.catalyst.InternalRow
2322
import org.apache.spark.sql.catalyst.errors._
24-
import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression2
2523
import org.apache.spark.sql.catalyst.expressions._
24+
import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression2
2625
import org.apache.spark.sql.catalyst.plans.physical._
27-
import org.apache.spark.sql.execution.{UnsafeFixedWidthAggregationMap, UnaryNode, SparkPlan}
2826
import org.apache.spark.sql.execution.metric.SQLMetrics
27+
import org.apache.spark.sql.execution.{SparkPlan, UnaryNode, UnsafeFixedWidthAggregationMap}
2928
import org.apache.spark.sql.types.StructType
3029

3130
case class TungstenAggregate(
@@ -84,59 +83,39 @@ case class TungstenAggregate(
8483
val dataSize = longMetric("dataSize")
8584
val spillSize = longMetric("spillSize")
8685

87-
/**
88-
* Set up the underlying unsafe data structures used before computing the parent partition.
89-
* This makes sure our iterator is not starved by other operators in the same task.
90-
*/
91-
def preparePartition(): TungstenAggregationIterator = {
92-
new TungstenAggregationIterator(
93-
groupingExpressions,
94-
nonCompleteAggregateExpressions,
95-
nonCompleteAggregateAttributes,
96-
completeAggregateExpressions,
97-
completeAggregateAttributes,
98-
initialInputBufferOffset,
99-
resultExpressions,
100-
newMutableProjection,
101-
child.output,
102-
testFallbackStartsAt,
103-
numInputRows,
104-
numOutputRows,
105-
dataSize,
106-
spillSize)
107-
}
86+
child.execute().mapPartitions { iter =>
10887

109-
/** Compute a partition using the iterator already set up previously. */
110-
def executePartition(
111-
context: TaskContext,
112-
partitionIndex: Int,
113-
aggregationIterator: TungstenAggregationIterator,
114-
parentIterator: Iterator[InternalRow]): Iterator[UnsafeRow] = {
115-
val hasInput = parentIterator.hasNext
116-
if (!hasInput) {
117-
// We're not using the underlying map, so we just can free it here
118-
aggregationIterator.free()
119-
if (groupingExpressions.isEmpty) {
88+
val hasInput = iter.hasNext
89+
if (!hasInput && groupingExpressions.nonEmpty) {
90+
// This is a grouped aggregate and the input iterator is empty,
91+
// so return an empty iterator.
92+
Iterator.empty
93+
} else {
94+
val aggregationIterator =
95+
new TungstenAggregationIterator(
96+
groupingExpressions,
97+
nonCompleteAggregateExpressions,
98+
nonCompleteAggregateAttributes,
99+
completeAggregateExpressions,
100+
completeAggregateAttributes,
101+
initialInputBufferOffset,
102+
resultExpressions,
103+
newMutableProjection,
104+
child.output,
105+
iter,
106+
testFallbackStartsAt,
107+
numInputRows,
108+
numOutputRows,
109+
dataSize,
110+
spillSize)
111+
if (!hasInput && groupingExpressions.isEmpty) {
120112
numOutputRows += 1
121113
Iterator.single[UnsafeRow](aggregationIterator.outputForEmptyGroupingKeyWithoutInput())
122114
} else {
123-
// This is a grouped aggregate and the input iterator is empty,
124-
// so return an empty iterator.
125-
Iterator.empty
115+
aggregationIterator
126116
}
127-
} else {
128-
aggregationIterator.start(parentIterator)
129-
aggregationIterator
130117
}
131118
}
132-
133-
// Note: we need to set up the iterator in each partition before computing the
134-
// parent partition, so we cannot simply use `mapPartitions` here (SPARK-9747).
135-
val resultRdd = {
136-
new MapPartitionsWithPreparationRDD[UnsafeRow, InternalRow, TungstenAggregationIterator](
137-
child.execute(), preparePartition, executePartition, preservesPartitioning = true)
138-
}
139-
resultRdd.asInstanceOf[RDD[InternalRow]]
140119
}
141120

142121
override def simpleString: String = {

0 commit comments

Comments
 (0)