@@ -252,15 +252,12 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
252252 try {
253253 logInfo(" Allocating " + args.numExecutors + " executors." )
254254 // Wait until all containers have finished
255- // TODO: This is a bit ugly. Can we make it nicer?
256- // TODO: Handle container failure
257255 yarnAllocator.addResourceRequests(args.numExecutors)
256+ yarnAllocator.allocateResources()
258257 // Exits the loop if the user thread exits.
259258 while (yarnAllocator.getNumExecutorsRunning < args.numExecutors && userThread.isAlive) {
260- if (yarnAllocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
261- finishApplicationMaster(FinalApplicationStatus .FAILED ,
262- " max number of executor failures reached" )
263- }
259+ checkNumExecutorsFailed()
260+ allocateMissingExecutor()
264261 yarnAllocator.allocateResources()
265262 ApplicationMaster .incrementAllocatorLoop(1 )
266263 Thread .sleep(100 )
@@ -289,23 +286,31 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
289286 }
290287 }
291288
289+ private def allocateMissingExecutor () {
290+ val missingExecutorCount = args.numExecutors - yarnAllocator.getNumExecutorsRunning -
291+ yarnAllocator.getNumPendingAllocate
292+ if (missingExecutorCount > 0 ) {
293+ logInfo(" Allocating %d containers to make up for (potentially) lost containers" .
294+ format(missingExecutorCount))
295+ yarnAllocator.addResourceRequests(missingExecutorCount)
296+ }
297+ }
298+
299+ private def checkNumExecutorsFailed () {
300+ if (yarnAllocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
301+ finishApplicationMaster(FinalApplicationStatus .FAILED ,
302+ " max number of executor failures reached" )
303+ }
304+ }
305+
292306 private def launchReporterThread (_sleepTime : Long ): Thread = {
293307 val sleepTime = if (_sleepTime <= 0 ) 0 else _sleepTime
294308
295309 val t = new Thread {
296310 override def run () {
297311 while (userThread.isAlive) {
298- if (yarnAllocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
299- finishApplicationMaster(FinalApplicationStatus .FAILED ,
300- " max number of executor failures reached" )
301- }
302- val missingExecutorCount = args.numExecutors - yarnAllocator.getNumExecutorsRunning -
303- yarnAllocator.getNumPendingAllocate
304- if (missingExecutorCount > 0 ) {
305- logInfo(" Allocating %d containers to make up for (potentially) lost containers" .
306- format(missingExecutorCount))
307- yarnAllocator.addResourceRequests(missingExecutorCount)
308- }
312+ checkNumExecutorsFailed()
313+ allocateMissingExecutor()
309314 sendProgress()
310315 Thread .sleep(sleepTime)
311316 }
0 commit comments