@@ -225,10 +225,24 @@ abstract class RDD[T: ClassTag](
225225 /** Get the RDD's current storage level, or StorageLevel.NONE if none is set. */
226226 def getStorageLevel : StorageLevel = storageLevel
227227
228+ /**
229+ * Lock for all mutable state of this RDD (persistence, partitions, dependencies, etc.). We do
230+ * not use `this` because RDDs are user-visible, so users might have added their own locking on
231+ * RDDs; sharing that could lead to a deadlock.
232+ *
233+ * One thread might hold the lock on many of these, for a chain of RDD dependencies; but
234+ * because DAGs are acyclic, and we only ever hold locks for one path in that DAG, there is no
235+ * chance of deadlock.
236+ *
237+ * The use of Integer is simply so this is serializable -- executors may reference the shared
238+ * fields (though they should never mutate them, that only happens on the driver).
239+ */
240+ private val stateLock = new Integer (0 )
241+
228242 // Our dependencies and partitions will be gotten by calling subclass's methods below, and will
229243 // be overwritten when we're checkpointed
230- private var dependencies_ : Seq [Dependency [_]] = _
231- @ transient private var partitions_ : Array [Partition ] = _
244+ @ volatile private var dependencies_ : Seq [Dependency [_]] = _
245+ @ volatile @ transient private var partitions_ : Array [Partition ] = _
232246
233247 /** An Option holding our checkpoint RDD, if we are checkpointed */
234248 private def checkpointRDD : Option [CheckpointRDD [T ]] = checkpointData.flatMap(_.checkpointRDD)
@@ -240,7 +254,11 @@ abstract class RDD[T: ClassTag](
240254 final def dependencies : Seq [Dependency [_]] = {
241255 checkpointRDD.map(r => List (new OneToOneDependency (r))).getOrElse {
242256 if (dependencies_ == null ) {
243- dependencies_ = getDependencies
257+ stateLock.synchronized {
258+ if (dependencies_ == null ) {
259+ dependencies_ = getDependencies
260+ }
261+ }
244262 }
245263 dependencies_
246264 }
@@ -253,10 +271,14 @@ abstract class RDD[T: ClassTag](
253271 final def partitions : Array [Partition ] = {
254272 checkpointRDD.map(_.partitions).getOrElse {
255273 if (partitions_ == null ) {
256- partitions_ = getPartitions
257- partitions_.zipWithIndex.foreach { case (partition, index) =>
258- require(partition.index == index,
259- s " partitions( $index).partition == ${partition.index}, but it should equal $index" )
274+ stateLock.synchronized {
275+ if (partitions_ == null ) {
276+ partitions_ = getPartitions
277+ partitions_.zipWithIndex.foreach { case (partition, index) =>
278+ require(partition.index == index,
279+ s " partitions( $index).partition == ${partition.index}, but it should equal $index" )
280+ }
281+ }
260282 }
261283 }
262284 partitions_
@@ -1788,7 +1810,7 @@ abstract class RDD[T: ClassTag](
17881810 * Changes the dependencies of this RDD from its original parents to a new RDD (`newRDD`)
17891811 * created from the checkpoint file, and forget its old dependencies and partitions.
17901812 */
1791- private [spark] def markCheckpointed (): Unit = {
1813+ private [spark] def markCheckpointed (): Unit = stateLock. synchronized {
17921814 clearDependencies()
17931815 partitions_ = null
17941816 deps = null // Forget the constructor argument for dependencies too
@@ -1800,7 +1822,7 @@ abstract class RDD[T: ClassTag](
18001822 * collected. Subclasses of RDD may override this method for implementing their own cleaning
18011823 * logic. See [[org.apache.spark.rdd.UnionRDD ]] for an example.
18021824 */
1803- protected def clearDependencies (): Unit = {
1825+ protected def clearDependencies (): Unit = stateLock. synchronized {
18041826 dependencies_ = null
18051827 }
18061828
@@ -1959,6 +1981,7 @@ abstract class RDD[T: ClassTag](
19591981 deterministicLevelCandidates.maxBy(_.id)
19601982 }
19611983 }
1984+
19621985}
19631986
19641987
0 commit comments