Skip to content

Commit 9a4cce6

Browse files
committed
@cloudfan's comments
1 parent 343bf8b commit 9a4cce6

File tree

4 files changed

+13
-3
lines changed

4 files changed

+13
-3
lines changed

core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ private[spark] class CoarseGrainedExecutorBackend(
297297
// This config is internal and only used by unit tests to force an executor
298298
// to hang around for longer when decommissioned.
299299
val initialSleepMillis = env.conf.getInt(
300-
"spark.executor.decommission.initial.sleep.millis", sleep_time)
300+
"spark.test.executor.decommission.initial.sleep.millis", sleep_time)
301301
if (initialSleepMillis > 0) {
302302
Thread.sleep(initialSleepMillis)
303303
}

core/src/main/scala/org/apache/spark/internal/config/package.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1877,6 +1877,16 @@ package object config {
18771877
.timeConf(TimeUnit.SECONDS)
18781878
.createOptional
18791879

1880+
private[spark] val DECOMMISSIONED_EXECUTORS_REMEMBER_AFTER_REMOVAL_TTL =
1881+
ConfigBuilder("spark.executor.decommission.removed.infoCacheTTL")
1882+
.doc("Duration for which a decommissioned executor's information will be kept after its" +
1883+
"removal. Keeping the decommissioned info after removal helps pinpoint fetch failures to " +
1884+
"decommissioning even after the mapper executor has been decommissioned. This allows " +
1885+
"eager recovery from fetch failures caused by decommissioning, increasing job robustness.")
1886+
.version("3.1.0")
1887+
.timeConf(TimeUnit.SECONDS)
1888+
.createWithDefaultString("5m")
1889+
18801890
private[spark] val STAGING_DIR = ConfigBuilder("spark.yarn.stagingDir")
18811891
.doc("Staging directory used while submitting applications.")
18821892
.version("2.0.0")

core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ private[spark] class TaskSchedulerImpl(
148148
// decommissioned.
149149
lazy val decommissionedExecutorsRemoved = CacheBuilder.newBuilder()
150150
.expireAfterWrite(
151-
conf.getLong("spark.decommissioningRememberAfterRemoval.seconds", 60L), TimeUnit.SECONDS)
151+
conf.get(DECOMMISSIONED_EXECUTORS_REMEMBER_AFTER_REMOVAL_TTL), TimeUnit.SECONDS)
152152
.ticker(new Ticker{
153153
override def read(): Long = TimeUnit.MILLISECONDS.toNanos(clock.getTimeMillis())
154154
})

core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ class DecommissionWorkerSuite
207207
createWorkers(2)
208208
sc = createSparkContext(
209209
config.Tests.TEST_NO_STAGE_RETRY.key -> "false",
210-
"spark.executor.decommission.initial.sleep.millis" -> initialSleepMillis.toString,
210+
"spark.test.executor.decommission.initial.sleep.millis" -> initialSleepMillis.toString,
211211
config.UNREGISTER_OUTPUT_ON_HOST_ON_FETCH_FAILURE.key -> "true")
212212
val executorIdToWorkerInfo = getExecutorToWorkerAssignments
213213
val executorToDecom = executorIdToWorkerInfo.keysIterator.next

0 commit comments

Comments
 (0)