@@ -252,26 +252,14 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
252252 try {
253253 logInfo(" Allocating " + args.numExecutors + " executors." )
254254 // Wait until all containers have finished
255- // TODO: This is a bit ugly. Can we make it nicer?
256- // TODO: Handle container failure
257255 yarnAllocator.addResourceRequests(args.numExecutors)
258256 // Exits the loop if the user thread exits.
259- val startTime = System .currentTimeMillis()
260- var usedTime = 0L
261- while ((yarnAllocator.getNumExecutorsRunning < args.numExecutors
262- && userThread.isAlive) && (usedTime < 1000L * 60 * 10 )) {
263- if (yarnAllocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
264- finishApplicationMaster(FinalApplicationStatus .FAILED ,
265- " max number of executor failures reached" )
266- }
257+ while (yarnAllocator.getNumExecutorsRunning < args.numExecutors && userThread.isAlive) {
267258 yarnAllocator.allocateResources()
268- val numExecutorsFailed = yarnAllocator.getNumExecutorsFailed
269- if (numExecutorsFailed > 0 ) {
270- yarnAllocator.addResourceRequests(numExecutorsFailed)
271- }
259+ checkNumExecutorsFailed()
260+ allocateMissingExecutor()
272261 ApplicationMaster .incrementAllocatorLoop(1 )
273262 Thread .sleep(100 )
274- usedTime = System .currentTimeMillis() - startTime
275263 }
276264 } finally {
277265 // In case of exceptions, etc - ensure that count is at least ALLOCATOR_LOOP_WAIT_COUNT,
@@ -297,23 +285,31 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
297285 }
298286 }
299287
288+ private def allocateMissingExecutor () {
289+ val missingExecutorCount = args.numExecutors - yarnAllocator.getNumExecutorsRunning -
290+ yarnAllocator.getNumPendingAllocate
291+ if (missingExecutorCount > 0 ) {
292+ logInfo(" Allocating %d containers to make up for (potentially) lost containers" .
293+ format(missingExecutorCount))
294+ yarnAllocator.addResourceRequests(missingExecutorCount)
295+ }
296+ }
297+
298+ private def checkNumExecutorsFailed () {
299+ if (yarnAllocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
300+ finishApplicationMaster(FinalApplicationStatus .FAILED ,
301+ " max number of executor failures reached" )
302+ }
303+ }
304+
300305 private def launchReporterThread (_sleepTime : Long ): Thread = {
301306 val sleepTime = if (_sleepTime <= 0 ) 0 else _sleepTime
302307
303308 val t = new Thread {
304309 override def run () {
305310 while (userThread.isAlive) {
306- if (yarnAllocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
307- finishApplicationMaster(FinalApplicationStatus .FAILED ,
308- " max number of executor failures reached" )
309- }
310- val missingExecutorCount = args.numExecutors - yarnAllocator.getNumExecutorsRunning -
311- yarnAllocator.getNumPendingAllocate
312- if (missingExecutorCount > 0 ) {
313- logInfo(" Allocating %d containers to make up for (potentially) lost containers" .
314- format(missingExecutorCount))
315- yarnAllocator.addResourceRequests(missingExecutorCount)
316- }
311+ checkNumExecutorsFailed()
312+ allocateMissingExecutor()
317313 sendProgress()
318314 Thread .sleep(sleepTime)
319315 }
0 commit comments