@@ -132,24 +132,12 @@ class HadoopRDD[K, V](
132132 // Returns a JobConf that will be used on slaves to obtain input splits for Hadoop reads.
133133 protected def getJobConf (): JobConf = {
134134 val conf : Configuration = broadcastedConf.value.value
135- if (conf.isInstanceOf [JobConf ]) {
136- // A user-broadcasted JobConf was provided to the HadoopRDD, so always use it.
137- conf.asInstanceOf [JobConf ]
138- } else if (HadoopRDD .containsCachedMetadata(jobConfCacheKey)) {
139- // getJobConf() has been called previously, so there is already a local cache of the JobConf
140- // needed by this RDD.
141- HadoopRDD .getCachedMetadata(jobConfCacheKey).asInstanceOf [JobConf ]
142- } else {
143- // Create a JobConf that will be cached and used across this RDD's getJobConf() calls in the
144- // local process. The local cache is accessed through HadoopRDD.putCachedMetadata().
145- // The caching helps minimize GC, since a JobConf can contain ~10KB of temporary objects.
146- // Synchronize to prevent ConcurrentModificationException (Spark-1097, Hadoop-10456).
147- HadoopRDD .CONFIGURATION_INSTANTIATION_LOCK .synchronized {
148- val newJobConf = new JobConf (conf)
135+ HadoopRDD .CONFIGURATION_INSTANTIATION_LOCK .synchronized {
136+ val newJobConf = new JobConf (conf)
137+ if (! conf.isInstanceOf [JobConf ]) {
149138 initLocalJobConfFuncOpt.map(f => f(newJobConf))
150- HadoopRDD .putCachedMetadata(jobConfCacheKey, newJobConf)
151- newJobConf
152139 }
140+ newJobConf
153141 }
154142 }
155143
@@ -257,7 +245,10 @@ class HadoopRDD[K, V](
257245}
258246
259247private [spark] object HadoopRDD {
260- /** Constructing Configuration objects is not threadsafe, use this lock to serialize. */
248+ /**
249+ * Configuration's constructor is not threadsafe (see SPARK-1097 and HADOOP-10456).
250+ * Therefore, we synchronize on this lock before calling new JobConf() or new Configuration().
251+ */
261252 val CONFIGURATION_INSTANTIATION_LOCK = new Object ()
262253
263254 /**
0 commit comments