-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-8103][core] DAGScheduler should not submit multiple concurrent attempts for a stage #6750
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a9bf31f
28d70aa
c443def
06a0af6
6e14683
883fe49
7021d28
55f4a94
b6bc248
ecb4e7d
9601b47
89a59b6
8c29707
cb245da
46bc26a
d8eb202
ada7726
6542b42
b2faef5
517b6e5
227b40d
a5f7c8c
19685bb
baf46e1
f025154
c0d4d90
109900e
906d626
a21c8b5
d7f1ef2
88b61cc
c04707e
4470fa1
6bc23af
e43ac25
584acd4
e01b7aa
fb3acfc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -75,9 +75,9 @@ private[spark] class TaskSchedulerImpl( | |
|
|
||
| // TaskSetManagers are not thread safe, so any access to one should be synchronized | ||
| // on this class. | ||
| val activeTaskSets = new HashMap[String, TaskSetManager] | ||
| private val taskSetsByStageIdAndAttempt = new HashMap[Int, HashMap[Int, TaskSetManager]] | ||
|
|
||
| val taskIdToTaskSetId = new HashMap[Long, String] | ||
| private[scheduler] val taskIdToTaskSetManager = new HashMap[Long, TaskSetManager] | ||
| val taskIdToExecutorId = new HashMap[Long, String] | ||
|
|
||
| @volatile private var hasReceivedTask = false | ||
|
|
@@ -162,7 +162,17 @@ private[spark] class TaskSchedulerImpl( | |
| logInfo("Adding task set " + taskSet.id + " with " + tasks.length + " tasks") | ||
| this.synchronized { | ||
| val manager = createTaskSetManager(taskSet, maxTaskFailures) | ||
| activeTaskSets(taskSet.id) = manager | ||
| val stage = taskSet.stageId | ||
| val stageTaskSets = | ||
| taskSetsByStageIdAndAttempt.getOrElseUpdate(stage, new HashMap[Int, TaskSetManager]) | ||
| stageTaskSets(taskSet.stageAttemptId) = manager | ||
| val conflictingTaskSet = stageTaskSets.exists { case (_, ts) => | ||
| ts.taskSet != taskSet && !ts.isZombie | ||
| } | ||
| if (conflictingTaskSet) { | ||
| throw new IllegalStateException(s"more than one active taskSet for stage $stage:" + | ||
| s" ${stageTaskSets.toSeq.map{_._2.taskSet.id}.mkString(",")}") | ||
| } | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. restoring the comments from the old diff b/c they are still relevant: from mark:
from marcelo:
good point, there isn't any need to do the I'd really rather leave the check in place. In fact I think this fail-fast behavior is especially important in a production environment -- that's much better than an infinite loop of failures. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I totally agree that it is valuable to catch the failure in production. I'm only suggesting that at some point the check becomes a big enough performance hit that it makes sense to compromise on the fail-fast desiderata in order to maintain production performance while trying to ensure in development that the failure can never occur. I doubt that this check is that costly, but my expectation is that Kay has a better sense of how much more we can afford to do within this synchronized block. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What if you change activeTaskSets to be keyed on the stageId? That seems pretty easy to do and would make this check O(1) rather than O(N) |
||
| schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties) | ||
|
|
||
| if (!isLocal && !hasReceivedTask) { | ||
|
|
@@ -192,19 +202,21 @@ private[spark] class TaskSchedulerImpl( | |
|
|
||
| override def cancelTasks(stageId: Int, interruptThread: Boolean): Unit = synchronized { | ||
| logInfo("Cancelling stage " + stageId) | ||
| activeTaskSets.find(_._2.stageId == stageId).foreach { case (_, tsm) => | ||
| // There are two possible cases here: | ||
| // 1. The task set manager has been created and some tasks have been scheduled. | ||
| // In this case, send a kill signal to the executors to kill the task and then abort | ||
| // the stage. | ||
| // 2. The task set manager has been created but no tasks has been scheduled. In this case, | ||
| // simply abort the stage. | ||
| tsm.runningTasksSet.foreach { tid => | ||
| val execId = taskIdToExecutorId(tid) | ||
| backend.killTask(tid, execId, interruptThread) | ||
| taskSetsByStageIdAndAttempt.get(stageId).foreach { attempts => | ||
| attempts.foreach { case (_, tsm) => | ||
| // There are two possible cases here: | ||
| // 1. The task set manager has been created and some tasks have been scheduled. | ||
| // In this case, send a kill signal to the executors to kill the task and then abort | ||
| // the stage. | ||
| // 2. The task set manager has been created but no tasks has been scheduled. In this case, | ||
| // simply abort the stage. | ||
| tsm.runningTasksSet.foreach { tid => | ||
| val execId = taskIdToExecutorId(tid) | ||
| backend.killTask(tid, execId, interruptThread) | ||
| } | ||
| tsm.abort("Stage %s cancelled".format(stageId)) | ||
| logInfo("Stage %d was cancelled".format(stageId)) | ||
| } | ||
| tsm.abort("Stage %s cancelled".format(stageId)) | ||
| logInfo("Stage %d was cancelled".format(stageId)) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -214,7 +226,12 @@ private[spark] class TaskSchedulerImpl( | |
| * cleaned up. | ||
| */ | ||
| def taskSetFinished(manager: TaskSetManager): Unit = synchronized { | ||
| activeTaskSets -= manager.taskSet.id | ||
| taskSetsByStageIdAndAttempt.get(manager.taskSet.stageId).foreach { taskSetsForStage => | ||
| taskSetsForStage -= manager.taskSet.stageAttemptId | ||
| if (taskSetsForStage.isEmpty) { | ||
| taskSetsByStageIdAndAttempt -= manager.taskSet.stageId | ||
| } | ||
| } | ||
| manager.parent.removeSchedulable(manager) | ||
| logInfo("Removed TaskSet %s, whose tasks have all completed, from pool %s" | ||
| .format(manager.taskSet.id, manager.parent.name)) | ||
|
|
@@ -235,7 +252,7 @@ private[spark] class TaskSchedulerImpl( | |
| for (task <- taskSet.resourceOffer(execId, host, maxLocality)) { | ||
| tasks(i) += task | ||
| val tid = task.taskId | ||
| taskIdToTaskSetId(tid) = taskSet.taskSet.id | ||
| taskIdToTaskSetManager(tid) = taskSet | ||
| taskIdToExecutorId(tid) = execId | ||
| executorsByHost(host) += execId | ||
| availableCpus(i) -= CPUS_PER_TASK | ||
|
|
@@ -319,26 +336,24 @@ private[spark] class TaskSchedulerImpl( | |
| failedExecutor = Some(execId) | ||
| } | ||
| } | ||
| taskIdToTaskSetId.get(tid) match { | ||
| case Some(taskSetId) => | ||
| taskIdToTaskSetManager.get(tid) match { | ||
| case Some(taskSet) => | ||
| if (TaskState.isFinished(state)) { | ||
| taskIdToTaskSetId.remove(tid) | ||
| taskIdToTaskSetManager.remove(tid) | ||
| taskIdToExecutorId.remove(tid) | ||
| } | ||
| activeTaskSets.get(taskSetId).foreach { taskSet => | ||
| if (state == TaskState.FINISHED) { | ||
| taskSet.removeRunningTask(tid) | ||
| taskResultGetter.enqueueSuccessfulTask(taskSet, tid, serializedData) | ||
| } else if (Set(TaskState.FAILED, TaskState.KILLED, TaskState.LOST).contains(state)) { | ||
| taskSet.removeRunningTask(tid) | ||
| taskResultGetter.enqueueFailedTask(taskSet, tid, state, serializedData) | ||
| } | ||
| if (state == TaskState.FINISHED) { | ||
| taskSet.removeRunningTask(tid) | ||
| taskResultGetter.enqueueSuccessfulTask(taskSet, tid, serializedData) | ||
| } else if (Set(TaskState.FAILED, TaskState.KILLED, TaskState.LOST).contains(state)) { | ||
| taskSet.removeRunningTask(tid) | ||
| taskResultGetter.enqueueFailedTask(taskSet, tid, state, serializedData) | ||
| } | ||
| case None => | ||
| logError( | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is a small change in logic here. Before, we'd hit this case only if This wasn't exactly intentional, it just fell out from an attempt at refactoring a helper method -- but it does seems to more accurately correspond to what is in the error msg. Still I'm not certain about it. Perhaps if nobody else has high confidence in this change I should just go back to the old behavior. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is fine! |
||
| ("Ignoring update with state %s for TID %s because its task set is gone (this is " + | ||
| "likely the result of receiving duplicate task finished status updates)") | ||
| .format(state, tid)) | ||
| "likely the result of receiving duplicate task finished status updates)") | ||
| .format(state, tid)) | ||
| } | ||
| } catch { | ||
| case e: Exception => logError("Exception in statusUpdate", e) | ||
|
|
@@ -363,9 +378,9 @@ private[spark] class TaskSchedulerImpl( | |
|
|
||
| val metricsWithStageIds: Array[(Long, Int, Int, TaskMetrics)] = synchronized { | ||
| taskMetrics.flatMap { case (id, metrics) => | ||
| taskIdToTaskSetId.get(id) | ||
| .flatMap(activeTaskSets.get) | ||
| .map(taskSetMgr => (id, taskSetMgr.stageId, taskSetMgr.taskSet.attempt, metrics)) | ||
| taskIdToTaskSetManager.get(id).map { taskSetMgr => | ||
| (id, taskSetMgr.stageId, taskSetMgr.taskSet.stageAttemptId, metrics) | ||
| } | ||
| } | ||
| } | ||
| dagScheduler.executorHeartbeatReceived(execId, metricsWithStageIds, blockManagerId) | ||
|
|
@@ -397,9 +412,12 @@ private[spark] class TaskSchedulerImpl( | |
|
|
||
| def error(message: String) { | ||
| synchronized { | ||
| if (activeTaskSets.nonEmpty) { | ||
| if (taskSetsByStageIdAndAttempt.nonEmpty) { | ||
| // Have each task set throw a SparkException with the error | ||
| for ((taskSetId, manager) <- activeTaskSets) { | ||
| for { | ||
| attempts <- taskSetsByStageIdAndAttempt.values | ||
| manager <- attempts.values | ||
| } { | ||
| try { | ||
| manager.abort(message) | ||
| } catch { | ||
|
|
@@ -520,6 +538,17 @@ private[spark] class TaskSchedulerImpl( | |
|
|
||
| override def applicationAttemptId(): Option[String] = backend.applicationAttemptId() | ||
|
|
||
| private[scheduler] def taskSetManagerForAttempt( | ||
| stageId: Int, | ||
| stageAttemptId: Int): Option[TaskSetManager] = { | ||
| for { | ||
| attempts <- taskSetsByStageIdAndAttempt.get(stageId) | ||
| manager <- attempts.get(stageAttemptId) | ||
| } yield { | ||
| manager | ||
| } | ||
| } | ||
|
|
||
| } | ||
|
|
||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@kayousterhout How much of a concern should the extra overhead be here? Just wondering whether this (let's hope rare) condition might better be handled only in a non-production environment and behind an
if(debug)kind of flag.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps the code could just look for an existing task set that matches the stage ID of the task set being added? That should be a little better than the filter / groupBy. Something like: