@@ -76,7 +76,7 @@ private[spark] class MesosClusterTaskState(
7676 */
7777private [spark] class MesosClusterSchedulerState (
7878 val frameworkId : String ,
79- val masterUrl : String ,
79+ val masterUrl : Option [ String ] ,
8080 val queuedDrivers : Iterable [MesosDriverDescription ],
8181 val launchedDrivers : Iterable [MesosClusterTaskState ],
8282 val finishedDrivers : Iterable [MesosClusterTaskState ],
@@ -135,7 +135,7 @@ private[spark] class MesosClusterSchedulerDriver(
135135 // All supervised drivers that are waiting to retry after termination.
136136 var superviseRetryList : SuperviseRetryList = _
137137
138- private var masterInfo : MasterInfo = _
138+ private var masterInfo : Option [ MasterInfo ] = None
139139
140140 private def createDateFormat = new SimpleDateFormat (" yyyyMMddHHmmss" ) // For application IDs
141141 private def newDriverId (submitDate : Date ): String = {
@@ -252,7 +252,7 @@ private[spark] class MesosClusterSchedulerDriver(
252252 markRegistered()
253253
254254 stateLock.synchronized {
255- this .masterInfo = masterInfo
255+ this .masterInfo = Some ( masterInfo)
256256 if (! launchedDrivers.pendingRecover.isEmpty) {
257257 // Start task reconciliation if we need to recover.
258258 val statuses = launchedDrivers.pendingRecover.collect {
@@ -350,20 +350,14 @@ private[spark] class MesosClusterSchedulerDriver(
350350 getResource(o.getResourcesList, " cpus" ),
351351 getResource(o.getResourcesList, " mem" ))
352352 }
353- logTrace(s " Received offers from Mesos: ${printOffers(currentOffers)}" )
353+ logTrace(s " Received offers from Mesos: \n ${printOffers(currentOffers)}" )
354354 val tasks = new mutable.HashMap [OfferID , ArrayBuffer [TaskInfo ]]()
355355 val currentTime = new Date ()
356-
357356 def scheduleTasks (
358- taskFunc : () => ( Option [ MesosDriverDescription ] , Option [RetryState ]),
357+ tasksFunc : () => Seq [( MesosDriverDescription , Option [RetryState ])] ,
359358 scheduledCallback : (String ) => Unit ): Unit = {
360- var nextItem = taskFunc()
361- // TODO: We should not stop scheduling at the very first task
362- // that cannot be scheduled. Instead we should exhaust the
363- // candidate list and remove drivers that cannot scheduled
364- // over a configurable period of time.
365- while (nextItem._1.isDefined) {
366- val (submission, retryState) = (nextItem._1.get, nextItem._2)
359+ val candidates = tasksFunc()
360+ for ((submission, retryState) <- candidates) {
367361 val driverCpu = submission.cores
368362 val driverMem = submission.mem
369363 logTrace(s " Finding offer to launch driver with cpu: $driverCpu, mem: $driverMem" )
@@ -374,56 +368,54 @@ private[spark] class MesosClusterSchedulerDriver(
374368 if (offerOption.isEmpty) {
375369 logDebug(s " Unable to find offer to launch driver id: ${submission.submissionId.get}, " +
376370 s " cpu: $driverCpu, mem: $driverMem" )
377- return
378- }
379-
380- val offer = offerOption.get
381- offer.cpu -= driverCpu
382- offer.mem -= driverMem
383- val taskId = TaskID .newBuilder().setValue(submission.submissionId.get).build()
384- val cpuResource = Resource .newBuilder()
385- .setName(" cpus" ).setType(Value .Type .SCALAR )
386- .setScalar(Value .Scalar .newBuilder().setValue(driverCpu)).build()
387- val memResource = Resource .newBuilder()
388- .setName(" mem" ).setType(Value .Type .SCALAR )
389- .setScalar(Value .Scalar .newBuilder().setValue(driverMem)).build()
390- val commandInfo = buildCommand(submission)
391- val appName = submission.schedulerProperties(" spark.app.name" )
392- val taskInfo = TaskInfo .newBuilder()
393- .setTaskId(taskId)
394- .setName(s " Driver for $appName" )
395- .setSlaveId(offer.offer.getSlaveId)
396- .setCommand(commandInfo)
397- .addResources(cpuResource)
398- .addResources(memResource)
399- .build
400- val queuedTasks = if (! tasks.contains(offer.offer.getId)) {
401- val buffer = new ArrayBuffer [TaskInfo ]
402- tasks(offer.offer.getId) = buffer
403- buffer
404371 } else {
405- tasks(offer.offer.getId)
372+ val offer = offerOption.get
373+ offer.cpu -= driverCpu
374+ offer.mem -= driverMem
375+ val taskId = TaskID .newBuilder().setValue(submission.submissionId.get).build()
376+ val cpuResource = Resource .newBuilder()
377+ .setName(" cpus" ).setType(Value .Type .SCALAR )
378+ .setScalar(Value .Scalar .newBuilder().setValue(driverCpu)).build()
379+ val memResource = Resource .newBuilder()
380+ .setName(" mem" ).setType(Value .Type .SCALAR )
381+ .setScalar(Value .Scalar .newBuilder().setValue(driverMem)).build()
382+ val commandInfo = buildCommand(submission)
383+ val appName = submission.schedulerProperties(" spark.app.name" )
384+ val taskInfo = TaskInfo .newBuilder()
385+ .setTaskId(taskId)
386+ .setName(s " Driver for $appName" )
387+ .setSlaveId(offer.offer.getSlaveId)
388+ .setCommand(commandInfo)
389+ .addResources(cpuResource)
390+ .addResources(memResource)
391+ .build
392+ val queuedTasks = if (! tasks.contains(offer.offer.getId)) {
393+ val buffer = new ArrayBuffer [TaskInfo ]
394+ tasks(offer.offer.getId) = buffer
395+ buffer
396+ } else {
397+ tasks(offer.offer.getId)
398+ }
399+ queuedTasks += taskInfo
400+ logTrace(s " Using offer ${offer.offer.getId.getValue} to launch driver " +
401+ submission.submissionId.get)
402+
403+ launchedDrivers.set(
404+ submission.submissionId.get,
405+ new MesosClusterTaskState (submission, taskId, offer.offer.getSlaveId,
406+ None , new Date (), retryState))
407+ scheduledCallback(submission.submissionId.get)
406408 }
407- queuedTasks += taskInfo
408- logTrace(s " Using offer ${offer.offer.getId.getValue} to launch driver " +
409- submission.submissionId.get)
410-
411- launchedDrivers.set(
412- submission.submissionId.get,
413- new MesosClusterTaskState (submission, taskId, offer.offer.getSlaveId,
414- None , new Date (), retryState))
415- scheduledCallback(submission.submissionId.get)
416- nextItem = taskFunc()
417409 }
418410 }
419411
420412 stateLock.synchronized {
421413 scheduleTasks(() => {
422- superviseRetryList.getNextRetry (currentTime)
414+ superviseRetryList.getNextRetries (currentTime)
423415 }, (id : String ) => {
424416 superviseRetryList.remove(id)
425417 })
426- scheduleTasks(() => ( queue.peek() , None ), (_ ) => queue.poll( ))
418+ scheduleTasks(() => queue.drivers.map(d => (d , None )) , (id ) => queue.remove(id ))
427419 }
428420
429421 tasks.foreach { case (offerId, tasks) =>
@@ -439,8 +431,8 @@ private[spark] class MesosClusterSchedulerDriver(
439431 stateLock.synchronized {
440432 new MesosClusterSchedulerState (
441433 frameworkId,
442- s " http:// ${masterInfo .getIp}: ${masterInfo .getPort}" ,
443- queue.drivers ,
434+ masterInfo.map(m => s " http:// ${m .getIp}: ${m .getPort}" ) ,
435+ queue.copyDrivers ,
444436 launchedDrivers.states,
445437 finishedDrivers.collect { case s => s.copy() },
446438 superviseRetryList.retries)
0 commit comments