fix bug

Davies Liu · Davies Liu · commit d44be2dc3051 · 2015-08-29T10:35:08.000-07:00
diff --git a/core/src/main/scala/org/apache/spark/rdd/MapPartitionsWithPreparationRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsWithPreparationRDD.scala
@@ -24,6 +24,8 @@ import org.apache.spark.{Partition, Partitioner, TaskContext}
 /**
  * An RDD that applies a user provided function to every partition of the parent RDD, and
  * additionally allows the user to prepare each partition before computing the parent partition.
+ *
+ * TODO(davies): remove this once SPARK-10342 is fixed
  */
 private[spark] class MapPartitionsWithPreparationRDD[U: ClassTag, T: ClassTag, M: ClassTag](
     prev: RDD[T],
@@ -38,13 +40,24 @@ private[spark] class MapPartitionsWithPreparationRDD[U: ClassTag, T: ClassTag, M
 
   override def getPartitions: Array[Partition] = firstParent[T].partitions
 
-  lazy val preparedArgument: M = preparePartition()
+  private[this] var preparedArgument: Option[M] = None
+
+  def prepare(): Unit = {
+    // This could be called multiple times
+    if (preparedArgument.isEmpty) {
+      preparedArgument = Some(preparePartition())
+    }
+  }
 
   /**
    * Prepare a partition before computing it from its parent.
    */
   override def compute(partition: Partition, context: TaskContext): Iterator[U] = {
-    val prepared = preparedArgument
+    prepare()
+    // The same RDD could be called multiple times in one task, each call of compute() should
+    // have sep
+    val prepared = preparedArgument.get
+    preparedArgument = None
     val parentIterator = firstParent[T].iterator(partition, context)
     executePartition(context, partition.index, prepared, parentIterator)
   }
diff --git a/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
@@ -76,7 +76,7 @@ private[spark] abstract class ZippedPartitionsBaseRDD[V: ClassTag](
 
   protected def tryPrepareChildren() {
     rdds.foreach {
-      case rdd: MapPartitionsWithPreparationRDD[_, _, _] => rdd.preparedArgument
+      case rdd: MapPartitionsWithPreparationRDD[_, _, _] => rdd.prepare()
       case _ =>
     }
   }

Original file line number	Diff line number	Diff line change
`@@ -76,7 +76,7 @@ private[spark] abstract class ZippedPartitionsBaseRDD[V: ClassTag](`
`76`	`76`
`77`	`77`	`protected def tryPrepareChildren() {`
`78`	`78`	`rdds.foreach {`
`79`		`- case rdd: MapPartitionsWithPreparationRDD[_, _, _] => rdd.preparedArgument`
	`79`	`+ case rdd: MapPartitionsWithPreparationRDD[_, _, _] => rdd.prepare()`
`80`	`80`	`case _ =>`
`81`	`81`	`}`
`82`	`82`	`}`