Skip to content

Commit 116c553

Browse files
jongyoulsrowen
authored andcommitted
[SPARK-6286][Mesos][minor] Handle missing Mesos case TASK_ERROR
- Made TaskState.isFailed for handling TASK_LOST and TASK_ERROR and synchronizing CoarseMesosSchedulerBackend and MesosSchedulerBackend - This is related apache#5000 Author: Jongyoul Lee <[email protected]> Closes apache#5088 from jongyoul/SPARK-6286-1 and squashes the following commits: 4f2362f [Jongyoul Lee] [SPARK-6286][Mesos][minor] Handle missing Mesos case TASK_ERROR - Fixed scalastyle ac4336a [Jongyoul Lee] [SPARK-6286][Mesos][minor] Handle missing Mesos case TASK_ERROR - Made TaskState.isFailed for handling TASK_LOST and TASK_ERROR and synchronizing CoarseMesosSchedulerBackend and MesosSchedulerBackend
1 parent 0745a30 commit 116c553

File tree

3 files changed

+5
-2
lines changed

3 files changed

+5
-2
lines changed

core/src/main/scala/org/apache/spark/TaskState.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ private[spark] object TaskState extends Enumeration {
2727

2828
type TaskState = Value
2929

30+
def isFailed(state: TaskState) = (LOST == state) || (FAILED == state)
31+
3032
def isFinished(state: TaskState) = FINISHED_STATES.contains(state)
3133

3234
def toMesos(state: TaskState): MesosTaskState = state match {

core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ private[spark] class CoarseMesosSchedulerBackend(
277277
coresByTaskId -= taskId
278278
}
279279
// If it was a failure, mark the slave as failed for blacklisting purposes
280-
if (state == MesosTaskState.TASK_FAILED || state == MesosTaskState.TASK_LOST) {
280+
if (TaskState.isFailed(TaskState.fromMesos(state))) {
281281
failuresBySlaveId(slaveId) = failuresBySlaveId.getOrElse(slaveId, 0) + 1
282282
if (failuresBySlaveId(slaveId) >= MAX_SLAVE_FAILURES) {
283283
logInfo("Blacklisting Mesos slave " + slaveId + " due to too many failures; " +

core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,8 @@ private[spark] class MesosSchedulerBackend(
318318
val tid = status.getTaskId.getValue.toLong
319319
val state = TaskState.fromMesos(status.getState)
320320
synchronized {
321-
if (status.getState == MesosTaskState.TASK_LOST && taskIdToSlaveId.contains(tid)) {
321+
if (TaskState.isFailed(TaskState.fromMesos(status.getState))
322+
&& taskIdToSlaveId.contains(tid)) {
322323
// We lost the executor on this slave, so remember that it's gone
323324
removeExecutor(taskIdToSlaveId(tid), "Lost executor")
324325
}

0 commit comments

Comments
 (0)