Skip to content

Commit 3c464bd

Browse files
committed
add time limit to allocateExecutors
1 parent e00b656 commit 3c464bd

File tree

2 files changed

+11
-3
lines changed

2 files changed

+11
-3
lines changed

yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,18 +256,22 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration,
256256
// TODO: Handle container failure
257257
yarnAllocator.addResourceRequests(args.numExecutors)
258258
// Exits the loop if the user thread exits.
259-
while (yarnAllocator.getNumExecutorsRunning < args.numExecutors && userThread.isAlive) {
259+
val startTime = System.currentTimeMillis()
260+
var usedTime = 0L
261+
while ((yarnAllocator.getNumExecutorsRunning < args.numExecutors
262+
&& userThread.isAlive) && (usedTime < 1000L * 60 * 10)) {
260263
if (yarnAllocator.getNumExecutorsFailed >= maxNumExecutorFailures) {
261264
finishApplicationMaster(FinalApplicationStatus.FAILED,
262265
"max number of executor failures reached")
263266
}
267+
yarnAllocator.allocateResources()
264268
val numExecutorsFailed = yarnAllocator.getNumExecutorsFailed
265269
if (numExecutorsFailed > 0) {
266270
yarnAllocator.addResourceRequests(numExecutorsFailed)
267271
}
268-
yarnAllocator.allocateResources()
269272
ApplicationMaster.incrementAllocatorLoop(1)
270273
Thread.sleep(100)
274+
usedTime = System.currentTimeMillis() - startTime
271275
}
272276
} finally {
273277
// In case of exceptions, etc - ensure that count is at least ALLOCATOR_LOOP_WAIT_COUNT,

yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,13 +204,17 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp
204204
// TODO: Handle container failure
205205

206206
yarnAllocator.addResourceRequests(args.numExecutors)
207-
while ((yarnAllocator.getNumExecutorsRunning < args.numExecutors) && (!driverClosed)) {
207+
val startTime = System.currentTimeMillis()
208+
var usedTime = 0L
209+
while (((yarnAllocator.getNumExecutorsRunning < args.numExecutors) && (!driverClosed))
210+
&& (usedTime < 1000L * 60 * 10)) {
208211
yarnAllocator.allocateResources()
209212
val numExecutorsFailed = yarnAllocator.getNumExecutorsFailed
210213
if (numExecutorsFailed > 0) {
211214
yarnAllocator.addResourceRequests(numExecutorsFailed)
212215
}
213216
Thread.sleep(100)
217+
usedTime = System.currentTimeMillis() - startTime
214218
}
215219

216220
logInfo("All executors have launched.")

0 commit comments

Comments
 (0)