Skip to content

Commit d79238d

Browse files
sryzapwendell
authored andcommitted
SPARK-3612. Executor shouldn't quit if heartbeat message fails to reach ...
...the driver Author: Sandy Ryza <[email protected]> Closes apache#2487 from sryza/sandy-spark-3612 and squashes the following commits: 2b7353d [Sandy Ryza] SPARK-3612. Executor shouldn't quit if heartbeat message fails to reach the driver
1 parent 8dfe79f commit d79238d

File tree

1 file changed

+11
-5
lines changed

1 file changed

+11
-5
lines changed

core/src/main/scala/org/apache/spark/executor/Executor.scala

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import java.util.concurrent._
2424

2525
import scala.collection.JavaConversions._
2626
import scala.collection.mutable.{ArrayBuffer, HashMap}
27+
import scala.util.control.NonFatal
2728

2829
import org.apache.spark._
2930
import org.apache.spark.deploy.SparkHadoopUtil
@@ -375,12 +376,17 @@ private[spark] class Executor(
375376
}
376377

377378
val message = Heartbeat(executorId, tasksMetrics.toArray, env.blockManager.blockManagerId)
378-
val response = AkkaUtils.askWithReply[HeartbeatResponse](message, heartbeatReceiverRef,
379-
retryAttempts, retryIntervalMs, timeout)
380-
if (response.reregisterBlockManager) {
381-
logWarning("Told to re-register on heartbeat")
382-
env.blockManager.reregister()
379+
try {
380+
val response = AkkaUtils.askWithReply[HeartbeatResponse](message, heartbeatReceiverRef,
381+
retryAttempts, retryIntervalMs, timeout)
382+
if (response.reregisterBlockManager) {
383+
logWarning("Told to re-register on heartbeat")
384+
env.blockManager.reregister()
385+
}
386+
} catch {
387+
case NonFatal(t) => logWarning("Issue communicating with driver in heartbeater", t)
383388
}
389+
384390
Thread.sleep(interval)
385391
}
386392
}

0 commit comments

Comments
 (0)