Skip to content

Commit dd25697

Browse files
committed
[SPARK-2546] [1.0 / 1.1 backport] Clone JobConf for each task.
1 parent 964e3aa commit dd25697

File tree

1 file changed

+8
-17
lines changed

1 file changed

+8
-17
lines changed

core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -132,24 +132,12 @@ class HadoopRDD[K, V](
132132
// Returns a JobConf that will be used on slaves to obtain input splits for Hadoop reads.
133133
protected def getJobConf(): JobConf = {
134134
val conf: Configuration = broadcastedConf.value.value
135-
if (conf.isInstanceOf[JobConf]) {
136-
// A user-broadcasted JobConf was provided to the HadoopRDD, so always use it.
137-
conf.asInstanceOf[JobConf]
138-
} else if (HadoopRDD.containsCachedMetadata(jobConfCacheKey)) {
139-
// getJobConf() has been called previously, so there is already a local cache of the JobConf
140-
// needed by this RDD.
141-
HadoopRDD.getCachedMetadata(jobConfCacheKey).asInstanceOf[JobConf]
142-
} else {
143-
// Create a JobConf that will be cached and used across this RDD's getJobConf() calls in the
144-
// local process. The local cache is accessed through HadoopRDD.putCachedMetadata().
145-
// The caching helps minimize GC, since a JobConf can contain ~10KB of temporary objects.
146-
// Synchronize to prevent ConcurrentModificationException (Spark-1097, Hadoop-10456).
147-
HadoopRDD.CONFIGURATION_INSTANTIATION_LOCK.synchronized {
148-
val newJobConf = new JobConf(conf)
135+
HadoopRDD.CONFIGURATION_INSTANTIATION_LOCK.synchronized {
136+
val newJobConf = new JobConf(conf)
137+
if (!conf.isInstanceOf[JobConf]) {
149138
initLocalJobConfFuncOpt.map(f => f(newJobConf))
150-
HadoopRDD.putCachedMetadata(jobConfCacheKey, newJobConf)
151-
newJobConf
152139
}
140+
newJobConf
153141
}
154142
}
155143

@@ -257,7 +245,10 @@ class HadoopRDD[K, V](
257245
}
258246

259247
private[spark] object HadoopRDD {
260-
/** Constructing Configuration objects is not threadsafe, use this lock to serialize. */
248+
/**
249+
* Configuration's constructor is not threadsafe (see SPARK-1097 and HADOOP-10456).
250+
* Therefore, we synchronize on this lock before calling new JobConf() or new Configuration().
251+
*/
261252
val CONFIGURATION_INSTANTIATION_LOCK = new Object()
262253

263254
/**

0 commit comments

Comments
 (0)