@@ -101,6 +101,10 @@ private[spark] class CoarseMesosSchedulerBackend(
101101 private val slaveOfferConstraints =
102102 parseConstraintString(sc.conf.get(" spark.mesos.constraints" , " " ))
103103
104+ // reject offers with mismatched constraints in seconds
105+ private val rejectOfferDurationForUnmetConstraints =
106+ getRejectOfferDurationForUnmetConstraints(sc)
107+
104108 // A client for talking to the external shuffle service, if it is a
105109 private val mesosExternalShuffleClient : Option [MesosExternalShuffleClient ] = {
106110 if (shuffleServiceEnabled) {
@@ -249,48 +253,56 @@ private[spark] class CoarseMesosSchedulerBackend(
249253 val mem = getResource(offer.getResourcesList, " mem" )
250254 val cpus = getResource(offer.getResourcesList, " cpus" ).toInt
251255 val id = offer.getId.getValue
252- if (taskIdToSlaveId.size < executorLimit &&
253- totalCoresAcquired < maxCores &&
254- meetsConstraints &&
255- mem >= calculateTotalMemory(sc) &&
256- cpus >= 1 &&
257- failuresBySlaveId.getOrElse(slaveId, 0 ) < MAX_SLAVE_FAILURES &&
258- ! slaveIdsWithExecutors.contains(slaveId)) {
259- // Launch an executor on the slave
260- val cpusToUse = math.min(cpus, maxCores - totalCoresAcquired)
261- totalCoresAcquired += cpusToUse
262- val taskId = newMesosTaskId()
263- taskIdToSlaveId.put(taskId, slaveId)
264- slaveIdsWithExecutors += slaveId
265- coresByTaskId(taskId) = cpusToUse
266- // Gather cpu resources from the available resources and use them in the task.
267- val (remainingResources, cpuResourcesToUse) =
268- partitionResources(offer.getResourcesList, " cpus" , cpusToUse)
269- val (_, memResourcesToUse) =
270- partitionResources(remainingResources.asJava, " mem" , calculateTotalMemory(sc))
271- val taskBuilder = MesosTaskInfo .newBuilder()
272- .setTaskId(TaskID .newBuilder().setValue(taskId.toString).build())
273- .setSlaveId(offer.getSlaveId)
274- .setCommand(createCommand(offer, cpusToUse + extraCoresPerSlave, taskId))
275- .setName(" Task " + taskId)
276- .addAllResources(cpuResourcesToUse.asJava)
277- .addAllResources(memResourcesToUse.asJava)
278-
279- sc.conf.getOption(" spark.mesos.executor.docker.image" ).foreach { image =>
280- MesosSchedulerBackendUtil
281- .setupContainerBuilderDockerInfo(image, sc.conf, taskBuilder.getContainerBuilder())
256+ if (meetsConstraints) {
257+ if (taskIdToSlaveId.size < executorLimit &&
258+ totalCoresAcquired < maxCores &&
259+ mem >= calculateTotalMemory(sc) &&
260+ cpus >= 1 &&
261+ failuresBySlaveId.getOrElse(slaveId, 0 ) < MAX_SLAVE_FAILURES &&
262+ ! slaveIdsWithExecutors.contains(slaveId)) {
263+ // Launch an executor on the slave
264+ val cpusToUse = math.min(cpus, maxCores - totalCoresAcquired)
265+ totalCoresAcquired += cpusToUse
266+ val taskId = newMesosTaskId()
267+ taskIdToSlaveId.put(taskId, slaveId)
268+ slaveIdsWithExecutors += slaveId
269+ coresByTaskId(taskId) = cpusToUse
270+ // Gather cpu resources from the available resources and use them in the task.
271+ val (remainingResources, cpuResourcesToUse) =
272+ partitionResources(offer.getResourcesList, " cpus" , cpusToUse)
273+ val (_, memResourcesToUse) =
274+ partitionResources(remainingResources.asJava, " mem" , calculateTotalMemory(sc))
275+ val taskBuilder = MesosTaskInfo .newBuilder()
276+ .setTaskId(TaskID .newBuilder().setValue(taskId.toString).build())
277+ .setSlaveId(offer.getSlaveId)
278+ .setCommand(createCommand(offer, cpusToUse + extraCoresPerSlave, taskId))
279+ .setName(" Task " + taskId)
280+ .addAllResources(cpuResourcesToUse.asJava)
281+ .addAllResources(memResourcesToUse.asJava)
282+
283+ sc.conf.getOption(" spark.mesos.executor.docker.image" ).foreach { image =>
284+ MesosSchedulerBackendUtil
285+ .setupContainerBuilderDockerInfo(image, sc.conf, taskBuilder.getContainerBuilder())
286+ }
287+
288+ // Accept the offer and launch the task
289+ logDebug(s " Accepting offer: $id with attributes: $offerAttributes mem: $mem cpu: $cpus" )
290+ slaveIdToHost(offer.getSlaveId.getValue) = offer.getHostname
291+ d.launchTasks(
292+ Collections .singleton(offer.getId),
293+ Collections .singleton(taskBuilder.build()), filters)
294+ } else {
295+ // Decline the offer
296+ logDebug(s " Declining offer: $id with attributes: $offerAttributes mem: $mem cpu: $cpus" )
297+ d.declineOffer(offer.getId)
282298 }
283-
284- // accept the offer and launch the task
285- logDebug(s " Accepting offer: $id with attributes: $offerAttributes mem: $mem cpu: $cpus" )
286- slaveIdToHost(offer.getSlaveId.getValue) = offer.getHostname
287- d.launchTasks(
288- Collections .singleton(offer.getId),
289- Collections .singleton(taskBuilder.build()), filters)
290299 } else {
291- // Decline the offer
292- logDebug(s " Declining offer: $id with attributes: $offerAttributes mem: $mem cpu: $cpus" )
293- d.declineOffer(offer.getId)
300+ // This offer does not meet constraints. We don't need to see it again.
301+ // Decline the offer for a long period of time.
302+ logDebug(s " Declining offer: $id with attributes: $offerAttributes mem: $mem cpu: $cpus"
303+ + s " for $rejectOfferDurationForUnmetConstraints seconds " )
304+ d.declineOffer(offer.getId, Filters .newBuilder()
305+ .setRefuseSeconds(rejectOfferDurationForUnmetConstraints).build())
294306 }
295307 }
296308 }
0 commit comments