Skip to content

Commit 6179a94

Browse files
committed
SPARK-4783 [CORE] System.exit() calls in SparkContext disrupt applications embedding Spark
Avoid `System.exit(1)` in `TaskSchedulerImpl` and convert to `SparkException`; ensure scheduler calls `sc.stop()` even when this exception is thrown. CC mateiz aarondav as those who may have last touched this code. Author: Sean Owen <[email protected]> Closes apache#5492 from srowen/SPARK-4783 and squashes the following commits: 60dc682 [Sean Owen] Avoid System.exit(1) in TaskSchedulerImpl and convert to SparkException; ensure scheduler calls sc.stop() even when this exception is thrown
1 parent 8370550 commit 6179a94

File tree

2 files changed

+8
-6
lines changed

2 files changed

+8
-6
lines changed

core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ private[spark] class TaskSchedulerImpl(
394394

395395
def error(message: String) {
396396
synchronized {
397-
if (activeTaskSets.size > 0) {
397+
if (activeTaskSets.nonEmpty) {
398398
// Have each task set throw a SparkException with the error
399399
for ((taskSetId, manager) <- activeTaskSets) {
400400
try {
@@ -407,8 +407,7 @@ private[spark] class TaskSchedulerImpl(
407407
// No task sets are active but we still got an error. Just exit since this
408408
// must mean the error is during registration.
409409
// It might be good to do something smarter here in the future.
410-
logError("Exiting due to error from cluster scheduler: " + message)
411-
System.exit(1)
410+
throw new SparkException(s"Exiting due to error from cluster scheduler: $message")
412411
}
413412
}
414413
}

core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,12 @@ private[spark] class SparkDeploySchedulerBackend(
118118
notifyContext()
119119
if (!stopping) {
120120
logError("Application has been killed. Reason: " + reason)
121-
scheduler.error(reason)
122-
// Ensure the application terminates, as we can no longer run jobs.
123-
sc.stop()
121+
try {
122+
scheduler.error(reason)
123+
} finally {
124+
// Ensure the application terminates, as we can no longer run jobs.
125+
sc.stop()
126+
}
124127
}
125128
}
126129

0 commit comments

Comments
 (0)