From 7d1b769f943bdc35a410e0225584b047602f68f4 Mon Sep 17 00:00:00 2001
From: Aaron Davidson <aaron@databricks.com>
Date: Mon, 14 Jul 2014 14:58:58 -0700
Subject: [PATCH] SPARK-1097: Do not introduce deadlock while fixing
 concurrency bug

We recently added this lock on 'conf' in order to prevent concurrent
creation. However, it turns out that this can introduce a deadlock
because Hadoop also synchronizes on the Configuration objects when
creating new Configurations (and they do so via a static REGISTRY
which contains all created Configurations).

This fix forces all Spark initialization of Configuration objects
to occur serially by using a static lock that we control, and thus
also prevents introducing the deadlock.
---
 core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index 041028514399..e521612ffc27 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -140,8 +140,8 @@ class HadoopRDD[K, V](
       // Create a JobConf that will be cached and used across this RDD's getJobConf() calls in the
       // local process. The local cache is accessed through HadoopRDD.putCachedMetadata().
       // The caching helps minimize GC, since a JobConf can contain ~10KB of temporary objects.
-      // synchronize to prevent ConcurrentModificationException (Spark-1097, Hadoop-10456)
-      conf.synchronized {
+      // Synchronize to prevent ConcurrentModificationException (Spark-1097, Hadoop-10456).
+      HadoopRDD.CONFIGURATION_INSTANTIATION_LOCK.synchronized {
         val newJobConf = new JobConf(conf)
         initLocalJobConfFuncOpt.map(f => f(newJobConf))
         HadoopRDD.putCachedMetadata(jobConfCacheKey, newJobConf)
@@ -246,6 +246,9 @@ class HadoopRDD[K, V](
 }
 
 private[spark] object HadoopRDD {
+  /** Constructing Configuration objects is not threadsafe, use this lock to serialize. */
+  val CONFIGURATION_INSTANTIATION_LOCK = new Object()
+
   /**
    * The three methods below are helpers for accessing the local map, a property of the SparkEnv of
    * the local process.