Skip to content

Commit 917600a

Browse files
committed
solve conflicts
2 parents f7064d0 + 3aed305 commit 917600a

File tree

130 files changed

+3513
-2258
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

130 files changed

+3513
-2258
lines changed

assembly/pom.xml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,5 +354,25 @@
354354
</dependency>
355355
</dependencies>
356356
</profile>
357+
358+
<!-- Profiles that disable inclusion of certain dependencies. -->
359+
<profile>
360+
<id>hadoop-provided</id>
361+
<properties>
362+
<hadoop.deps.scope>provided</hadoop.deps.scope>
363+
</properties>
364+
</profile>
365+
<profile>
366+
<id>hive-provided</id>
367+
<properties>
368+
<hive.deps.scope>provided</hive.deps.scope>
369+
</properties>
370+
</profile>
371+
<profile>
372+
<id>parquet-provided</id>
373+
<properties>
374+
<parquet.deps.scope>provided</parquet.deps.scope>
375+
</properties>
376+
</profile>
357377
</profiles>
358378
</project>

bagel/pom.xml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,6 @@
4040
<artifactId>spark-core_${scala.binary.version}</artifactId>
4141
<version>${project.version}</version>
4242
</dependency>
43-
<dependency>
44-
<groupId>org.eclipse.jetty</groupId>
45-
<artifactId>jetty-server</artifactId>
46-
</dependency>
4743
<dependency>
4844
<groupId>org.scalacheck</groupId>
4945
<artifactId>scalacheck_${scala.binary.version}</artifactId>

bin/compute-classpath.cmd

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,13 @@ if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
109109
set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
110110
:no_yarn_conf_dir
111111

112+
rem To allow for distributions to append needed libraries to the classpath (e.g. when
113+
rem using the "hadoop-provided" profile to build Spark), check SPARK_DIST_CLASSPATH and
114+
rem append it to tbe final classpath.
115+
if not "x%$SPARK_DIST_CLASSPATH%"=="x" (
116+
set CLASSPATH=%CLASSPATH%;%SPARK_DIST_CLASSPATH%
117+
)
118+
112119
rem A bit of a hack to allow calling this script within run2.cmd without seeing output
113120
if "%DONT_PRINT_CLASSPATH%"=="1" goto exit
114121

bin/compute-classpath.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,4 +146,11 @@ if [ -n "$YARN_CONF_DIR" ]; then
146146
CLASSPATH="$CLASSPATH:$YARN_CONF_DIR"
147147
fi
148148

149+
# To allow for distributions to append needed libraries to the classpath (e.g. when
150+
# using the "hadoop-provided" profile to build Spark), check SPARK_DIST_CLASSPATH and
151+
# append it to tbe final classpath.
152+
if [ -n "$SPARK_DIST_CLASSPATH" ]; then
153+
CLASSPATH="$CLASSPATH:$SPARK_DIST_CLASSPATH"
154+
fi
155+
149156
echo "$CLASSPATH"

bin/spark-submit

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,19 @@ while (($#)); do
3838
export SPARK_SUBMIT_CLASSPATH=$2
3939
elif [ "$1" = "--driver-java-options" ]; then
4040
export SPARK_SUBMIT_OPTS=$2
41+
elif [ "$1" = "--master" ]; then
42+
export MASTER=$2
4143
fi
4244
shift
4345
done
4446

45-
DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf"
47+
if [ -z "$SPARK_CONF_DIR" ]; then
48+
export SPARK_CONF_DIR="$SPARK_HOME/conf"
49+
fi
50+
DEFAULT_PROPERTIES_FILE="$SPARK_CONF_DIR/spark-defaults.conf"
51+
if [ "$MASTER" == "yarn-cluster" ]; then
52+
SPARK_SUBMIT_DEPLOY_MODE=cluster
53+
fi
4654
export SPARK_SUBMIT_DEPLOY_MODE=${SPARK_SUBMIT_DEPLOY_MODE:-"client"}
4755
export SPARK_SUBMIT_PROPERTIES_FILE=${SPARK_SUBMIT_PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"}
4856

bin/spark-submit2.cmd

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,11 @@ set ORIG_ARGS=%*
2424

2525
rem Reset the values of all variables used
2626
set SPARK_SUBMIT_DEPLOY_MODE=client
27-
set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_HOME%\conf\spark-defaults.conf
27+
28+
if not defined %SPARK_CONF_DIR% (
29+
set SPARK_CONF_DIR=%SPARK_HOME%\conf
30+
)
31+
set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_CONF_DIR%\spark-defaults.conf
2832
set SPARK_SUBMIT_DRIVER_MEMORY=
2933
set SPARK_SUBMIT_LIBRARY_PATH=
3034
set SPARK_SUBMIT_CLASSPATH=
@@ -45,11 +49,17 @@ if [%1] == [] goto continue
4549
set SPARK_SUBMIT_CLASSPATH=%2
4650
) else if [%1] == [--driver-java-options] (
4751
set SPARK_SUBMIT_OPTS=%2
52+
) else if [%1] == [--master] (
53+
set MASTER=%2
4854
)
4955
shift
5056
goto loop
5157
:continue
5258

59+
if [%MASTER%] == [yarn-cluster] (
60+
set SPARK_SUBMIT_DEPLOY_MODE=cluster
61+
)
62+
5363
rem For client mode, the driver will be launched in the same JVM that launches
5464
rem SparkSubmit, so we may need to read the properties file for any extra class
5565
rem paths, library paths, java options and memory early on. Otherwise, it will

core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala

Lines changed: 77 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ private[spark] class ExecutorAllocationManager(
6565
listenerBus: LiveListenerBus,
6666
conf: SparkConf)
6767
extends Logging {
68+
69+
allocationManager =>
70+
6871
import ExecutorAllocationManager._
6972

7073
// Lower and upper bounds on the number of executors. These are required.
@@ -121,7 +124,7 @@ private[spark] class ExecutorAllocationManager(
121124
private var clock: Clock = new RealClock
122125

123126
// Listener for Spark events that impact the allocation policy
124-
private val listener = new ExecutorAllocationListener(this)
127+
private val listener = new ExecutorAllocationListener
125128

126129
/**
127130
* Verify that the settings specified through the config are valid.
@@ -209,11 +212,12 @@ private[spark] class ExecutorAllocationManager(
209212
addTime += sustainedSchedulerBacklogTimeout * 1000
210213
}
211214

212-
removeTimes.foreach { case (executorId, expireTime) =>
213-
if (now >= expireTime) {
215+
removeTimes.retain { case (executorId, expireTime) =>
216+
val expired = now >= expireTime
217+
if (expired) {
214218
removeExecutor(executorId)
215-
removeTimes.remove(executorId)
216219
}
220+
!expired
217221
}
218222
}
219223

@@ -291,7 +295,7 @@ private[spark] class ExecutorAllocationManager(
291295
// Do not kill the executor if we have already reached the lower bound
292296
val numExistingExecutors = executorIds.size - executorsPendingToRemove.size
293297
if (numExistingExecutors - 1 < minNumExecutors) {
294-
logInfo(s"Not removing idle executor $executorId because there are only " +
298+
logDebug(s"Not removing idle executor $executorId because there are only " +
295299
s"$numExistingExecutors executor(s) left (limit $minNumExecutors)")
296300
return false
297301
}
@@ -315,7 +319,11 @@ private[spark] class ExecutorAllocationManager(
315319
private def onExecutorAdded(executorId: String): Unit = synchronized {
316320
if (!executorIds.contains(executorId)) {
317321
executorIds.add(executorId)
318-
executorIds.foreach(onExecutorIdle)
322+
// If an executor (call this executor X) is not removed because the lower bound
323+
// has been reached, it will no longer be marked as idle. When new executors join,
324+
// however, we are no longer at the lower bound, and so we must mark executor X
325+
// as idle again so as not to forget that it is a candidate for removal. (see SPARK-4951)
326+
executorIds.filter(listener.isExecutorIdle).foreach(onExecutorIdle)
319327
logInfo(s"New executor $executorId has registered (new total is ${executorIds.size})")
320328
if (numExecutorsPending > 0) {
321329
numExecutorsPending -= 1
@@ -373,10 +381,14 @@ private[spark] class ExecutorAllocationManager(
373381
* the executor is not already marked as idle.
374382
*/
375383
private def onExecutorIdle(executorId: String): Unit = synchronized {
376-
if (!removeTimes.contains(executorId) && !executorsPendingToRemove.contains(executorId)) {
377-
logDebug(s"Starting idle timer for $executorId because there are no more tasks " +
378-
s"scheduled to run on the executor (to expire in $executorIdleTimeout seconds)")
379-
removeTimes(executorId) = clock.getTimeMillis + executorIdleTimeout * 1000
384+
if (executorIds.contains(executorId)) {
385+
if (!removeTimes.contains(executorId) && !executorsPendingToRemove.contains(executorId)) {
386+
logDebug(s"Starting idle timer for $executorId because there are no more tasks " +
387+
s"scheduled to run on the executor (to expire in $executorIdleTimeout seconds)")
388+
removeTimes(executorId) = clock.getTimeMillis + executorIdleTimeout * 1000
389+
}
390+
} else {
391+
logWarning(s"Attempted to mark unknown executor $executorId idle")
380392
}
381393
}
382394

@@ -396,25 +408,24 @@ private[spark] class ExecutorAllocationManager(
396408
* and consistency of events returned by the listener. For simplicity, it does not account
397409
* for speculated tasks.
398410
*/
399-
private class ExecutorAllocationListener(allocationManager: ExecutorAllocationManager)
400-
extends SparkListener {
411+
private class ExecutorAllocationListener extends SparkListener {
401412

402413
private val stageIdToNumTasks = new mutable.HashMap[Int, Int]
403414
private val stageIdToTaskIndices = new mutable.HashMap[Int, mutable.HashSet[Int]]
404415
private val executorIdToTaskIds = new mutable.HashMap[String, mutable.HashSet[Long]]
405416

406417
override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = {
407-
synchronized {
408-
val stageId = stageSubmitted.stageInfo.stageId
409-
val numTasks = stageSubmitted.stageInfo.numTasks
418+
val stageId = stageSubmitted.stageInfo.stageId
419+
val numTasks = stageSubmitted.stageInfo.numTasks
420+
allocationManager.synchronized {
410421
stageIdToNumTasks(stageId) = numTasks
411422
allocationManager.onSchedulerBacklogged()
412423
}
413424
}
414425

415426
override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = {
416-
synchronized {
417-
val stageId = stageCompleted.stageInfo.stageId
427+
val stageId = stageCompleted.stageInfo.stageId
428+
allocationManager.synchronized {
418429
stageIdToNumTasks -= stageId
419430
stageIdToTaskIndices -= stageId
420431

@@ -426,47 +437,62 @@ private[spark] class ExecutorAllocationManager(
426437
}
427438
}
428439

429-
override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = synchronized {
440+
override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
430441
val stageId = taskStart.stageId
431442
val taskId = taskStart.taskInfo.taskId
432443
val taskIndex = taskStart.taskInfo.index
433444
val executorId = taskStart.taskInfo.executorId
434445

435-
// If this is the last pending task, mark the scheduler queue as empty
436-
stageIdToTaskIndices.getOrElseUpdate(stageId, new mutable.HashSet[Int]) += taskIndex
437-
val numTasksScheduled = stageIdToTaskIndices(stageId).size
438-
val numTasksTotal = stageIdToNumTasks.getOrElse(stageId, -1)
439-
if (numTasksScheduled == numTasksTotal) {
440-
// No more pending tasks for this stage
441-
stageIdToNumTasks -= stageId
442-
if (stageIdToNumTasks.isEmpty) {
443-
allocationManager.onSchedulerQueueEmpty()
446+
allocationManager.synchronized {
447+
// This guards against the race condition in which the `SparkListenerTaskStart`
448+
// event is posted before the `SparkListenerBlockManagerAdded` event, which is
449+
// possible because these events are posted in different threads. (see SPARK-4951)
450+
if (!allocationManager.executorIds.contains(executorId)) {
451+
allocationManager.onExecutorAdded(executorId)
452+
}
453+
454+
// If this is the last pending task, mark the scheduler queue as empty
455+
stageIdToTaskIndices.getOrElseUpdate(stageId, new mutable.HashSet[Int]) += taskIndex
456+
val numTasksScheduled = stageIdToTaskIndices(stageId).size
457+
val numTasksTotal = stageIdToNumTasks.getOrElse(stageId, -1)
458+
if (numTasksScheduled == numTasksTotal) {
459+
// No more pending tasks for this stage
460+
stageIdToNumTasks -= stageId
461+
if (stageIdToNumTasks.isEmpty) {
462+
allocationManager.onSchedulerQueueEmpty()
463+
}
444464
}
445-
}
446465

447-
// Mark the executor on which this task is scheduled as busy
448-
executorIdToTaskIds.getOrElseUpdate(executorId, new mutable.HashSet[Long]) += taskId
449-
allocationManager.onExecutorBusy(executorId)
466+
// Mark the executor on which this task is scheduled as busy
467+
executorIdToTaskIds.getOrElseUpdate(executorId, new mutable.HashSet[Long]) += taskId
468+
allocationManager.onExecutorBusy(executorId)
469+
}
450470
}
451471

452-
override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = synchronized {
472+
override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
453473
val executorId = taskEnd.taskInfo.executorId
454474
val taskId = taskEnd.taskInfo.taskId
455-
456-
// If the executor is no longer running scheduled any tasks, mark it as idle
457-
if (executorIdToTaskIds.contains(executorId)) {
458-
executorIdToTaskIds(executorId) -= taskId
459-
if (executorIdToTaskIds(executorId).isEmpty) {
460-
executorIdToTaskIds -= executorId
461-
allocationManager.onExecutorIdle(executorId)
475+
allocationManager.synchronized {
476+
// If the executor is no longer running scheduled any tasks, mark it as idle
477+
if (executorIdToTaskIds.contains(executorId)) {
478+
executorIdToTaskIds(executorId) -= taskId
479+
if (executorIdToTaskIds(executorId).isEmpty) {
480+
executorIdToTaskIds -= executorId
481+
allocationManager.onExecutorIdle(executorId)
482+
}
462483
}
463484
}
464485
}
465486

466487
override def onBlockManagerAdded(blockManagerAdded: SparkListenerBlockManagerAdded): Unit = {
467488
val executorId = blockManagerAdded.blockManagerId.executorId
468489
if (executorId != SparkContext.DRIVER_IDENTIFIER) {
469-
allocationManager.onExecutorAdded(executorId)
490+
// This guards against the race condition in which the `SparkListenerTaskStart`
491+
// event is posted before the `SparkListenerBlockManagerAdded` event, which is
492+
// possible because these events are posted in different threads. (see SPARK-4951)
493+
if (!allocationManager.executorIds.contains(executorId)) {
494+
allocationManager.onExecutorAdded(executorId)
495+
}
470496
}
471497
}
472498

@@ -478,12 +504,23 @@ private[spark] class ExecutorAllocationManager(
478504
/**
479505
* An estimate of the total number of pending tasks remaining for currently running stages. Does
480506
* not account for tasks which may have failed and been resubmitted.
507+
*
508+
* Note: This is not thread-safe without the caller owning the `allocationManager` lock.
481509
*/
482510
def totalPendingTasks(): Int = {
483511
stageIdToNumTasks.map { case (stageId, numTasks) =>
484512
numTasks - stageIdToTaskIndices.get(stageId).map(_.size).getOrElse(0)
485513
}.sum
486514
}
515+
516+
/**
517+
* Return true if an executor is not currently running a task, and false otherwise.
518+
*
519+
* Note: This is not thread-safe without the caller owning the `allocationManager` lock.
520+
*/
521+
def isExecutorIdle(executorId: String): Boolean = {
522+
!executorIdToTaskIds.contains(executorId)
523+
}
487524
}
488525

489526
}

core/src/main/scala/org/apache/spark/SparkContext.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
229229
// An asynchronous listener bus for Spark events
230230
private[spark] val listenerBus = new LiveListenerBus
231231

232-
conf.set("spark.executor.id", "driver")
232+
conf.set("spark.executor.id", SparkContext.DRIVER_IDENTIFIER)
233233

234234
// Create the Spark execution environment (cache, map output tracker, etc)
235235
private[spark] val env = SparkEnv.createDriverEnv(conf, isLocal, listenerBus)
@@ -458,7 +458,6 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
458458
Option(localProperties.get).map(_.getProperty(key)).getOrElse(null)
459459

460460
/** Set a human readable description of the current job. */
461-
@deprecated("use setJobGroup", "0.8.1")
462461
def setJobDescription(value: String) {
463462
setLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION, value)
464463
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark
19+
20+
import org.apache.spark.annotation.DeveloperApi
21+
22+
/**
23+
* Exception thrown when a task cannot be serialized.
24+
*/
25+
private[spark] class TaskNotSerializableException(error: Throwable) extends Exception(error)

core/src/main/scala/org/apache/spark/deploy/Client.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ object Client {
160160
val (actorSystem, _) = AkkaUtils.createActorSystem(
161161
"driverClient", Utils.localHostName(), 0, conf, new SecurityManager(conf))
162162

163+
// Verify driverArgs.master is a valid url so that we can use it in ClientActor safely
164+
Master.toAkkaUrl(driverArgs.master)
163165
actorSystem.actorOf(Props(classOf[ClientActor], driverArgs, conf))
164166

165167
actorSystem.awaitTermination()

0 commit comments

Comments
 (0)