Skip to content

Commit 19406cc

Browse files
committed
Merge branch 'master' of https://github.com/apache/spark into kafkaRdd
2 parents 2e67117 + d743732 commit 19406cc

File tree

323 files changed

+12732
-7998
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

323 files changed

+12732
-7998
lines changed

assembly/pom.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,10 @@
142142
</includes>
143143
<excludes>
144144
<exclude>com/google/common/base/Absent*</exclude>
145+
<exclude>com/google/common/base/Function</exclude>
145146
<exclude>com/google/common/base/Optional*</exclude>
146147
<exclude>com/google/common/base/Present*</exclude>
148+
<exclude>com/google/common/base/Supplier</exclude>
147149
</excludes>
148150
</relocation>
149151
</relocations>

bin/compute-classpath.sh

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -72,22 +72,25 @@ else
7272
assembly_folder="$ASSEMBLY_DIR"
7373
fi
7474

75-
num_jars="$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*\.jar$" | wc -l)"
76-
if [ "$num_jars" -eq "0" ]; then
77-
echo "Failed to find Spark assembly in $assembly_folder"
78-
echo "You need to build Spark before running this program."
79-
exit 1
80-
fi
75+
num_jars=0
76+
77+
for f in ${assembly_folder}/spark-assembly*hadoop*.jar; do
78+
if [[ ! -e "$f" ]]; then
79+
echo "Failed to find Spark assembly in $assembly_folder" 1>&2
80+
echo "You need to build Spark before running this program." 1>&2
81+
exit 1
82+
fi
83+
ASSEMBLY_JAR="$f"
84+
num_jars=$((num_jars+1))
85+
done
86+
8187
if [ "$num_jars" -gt "1" ]; then
82-
jars_list=$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*.jar$")
83-
echo "Found multiple Spark assembly jars in $assembly_folder:"
84-
echo "$jars_list"
85-
echo "Please remove all but one jar."
88+
echo "Found multiple Spark assembly jars in $assembly_folder:" 1>&2
89+
ls ${assembly_folder}/spark-assembly*hadoop*.jar 1>&2
90+
echo "Please remove all but one jar." 1>&2
8691
exit 1
8792
fi
8893

89-
ASSEMBLY_JAR="$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)"
90-
9194
# Verify that versions of java used to build the jars and run Spark are compatible
9295
jar_error_check=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" nonexistent/class/path 2>&1)
9396
if [[ "$jar_error_check" =~ "invalid CEN header" ]]; then

bin/run-example

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,17 +35,32 @@ else
3535
fi
3636

3737
if [ -f "$FWDIR/RELEASE" ]; then
38-
export SPARK_EXAMPLES_JAR="`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`"
39-
elif [ -e "$EXAMPLES_DIR"/target/scala-$SPARK_SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
40-
export SPARK_EXAMPLES_JAR="`ls "$EXAMPLES_DIR"/target/scala-$SPARK_SCALA_VERSION/spark-examples-*hadoop*.jar`"
38+
JAR_PATH="${FWDIR}/lib"
39+
else
40+
JAR_PATH="${EXAMPLES_DIR}/target/scala-${SPARK_SCALA_VERSION}"
4141
fi
4242

43-
if [[ -z "$SPARK_EXAMPLES_JAR" ]]; then
44-
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" 1>&2
45-
echo "You need to build Spark before running this program" 1>&2
43+
JAR_COUNT=0
44+
45+
for f in ${JAR_PATH}/spark-examples-*hadoop*.jar; do
46+
if [[ ! -e "$f" ]]; then
47+
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" 1>&2
48+
echo "You need to build Spark before running this program" 1>&2
49+
exit 1
50+
fi
51+
SPARK_EXAMPLES_JAR="$f"
52+
JAR_COUNT=$((JAR_COUNT+1))
53+
done
54+
55+
if [ "$JAR_COUNT" -gt "1" ]; then
56+
echo "Found multiple Spark examples assembly jars in ${JAR_PATH}" 1>&2
57+
ls ${JAR_PATH}/spark-examples-*hadoop*.jar 1>&2
58+
echo "Please remove all but one jar." 1>&2
4659
exit 1
4760
fi
4861

62+
export SPARK_EXAMPLES_JAR
63+
4964
EXAMPLE_MASTER=${MASTER:-"local[*]"}
5065

5166
if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples* ]]; then

bin/spark-class

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
2929

3030
# Export this as SPARK_HOME
3131
export SPARK_HOME="$FWDIR"
32+
export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"$SPARK_HOME/conf"}"
3233

3334
. "$FWDIR"/bin/load-spark-env.sh
3435

@@ -71,6 +72,8 @@ case "$1" in
7172
'org.apache.spark.executor.MesosExecutorBackend')
7273
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
7374
OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM}
75+
export PYTHONPATH="$FWDIR/python:$PYTHONPATH"
76+
export PYTHONPATH="$FWDIR/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
7477
;;
7578

7679
# Spark submit uses SPARK_JAVA_OPTS + SPARK_SUBMIT_OPTS +
@@ -118,8 +121,8 @@ fi
118121
JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"
119122

120123
# Load extra JAVA_OPTS from conf/java-opts, if it exists
121-
if [ -e "$FWDIR/conf/java-opts" ] ; then
122-
JAVA_OPTS="$JAVA_OPTS `cat "$FWDIR"/conf/java-opts`"
124+
if [ -e "$SPARK_CONF_DIR/java-opts" ] ; then
125+
JAVA_OPTS="$JAVA_OPTS `cat "$SPARK_CONF_DIR"/java-opts`"
123126
fi
124127

125128
# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!
@@ -148,7 +151,7 @@ fi
148151
if [[ "$1" =~ org.apache.spark.tools.* ]]; then
149152
if test -z "$SPARK_TOOLS_JAR"; then
150153
echo "Failed to find Spark Tools Jar in $FWDIR/tools/target/scala-$SPARK_SCALA_VERSION/" 1>&2
151-
echo "You need to build Spark before running $1." 1>&2
154+
echo "You need to run \"build/sbt tools/package\" before running $1." 1>&2
152155
exit 1
153156
fi
154157
CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR"

build/mvn

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,10 @@ install_app() {
6868
# Install maven under the build/ folder
6969
install_mvn() {
7070
install_app \
71-
"http://apache.claz.org/maven/maven-3/3.2.3/binaries" \
72-
"apache-maven-3.2.3-bin.tar.gz" \
73-
"apache-maven-3.2.3/bin/mvn"
74-
MVN_BIN="${_DIR}/apache-maven-3.2.3/bin/mvn"
71+
"http://archive.apache.org/dist/maven/maven-3/3.2.5/binaries" \
72+
"apache-maven-3.2.5-bin.tar.gz" \
73+
"apache-maven-3.2.5/bin/mvn"
74+
MVN_BIN="${_DIR}/apache-maven-3.2.5/bin/mvn"
7575
}
7676

7777
# Install zinc under the build/ folder

core/pom.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,8 +372,10 @@
372372
<artifact>com.google.guava:guava</artifact>
373373
<includes>
374374
<include>com/google/common/base/Absent*</include>
375+
<include>com/google/common/base/Function</include>
375376
<include>com/google/common/base/Optional*</include>
376377
<include>com/google/common/base/Present*</include>
378+
<include>com/google/common/base/Supplier</include>
377379
</includes>
378380
</filter>
379381
</filters>
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark;
19+
20+
import org.apache.spark.scheduler.SparkListener;
21+
import org.apache.spark.scheduler.SparkListenerApplicationEnd;
22+
import org.apache.spark.scheduler.SparkListenerApplicationStart;
23+
import org.apache.spark.scheduler.SparkListenerBlockManagerAdded;
24+
import org.apache.spark.scheduler.SparkListenerBlockManagerRemoved;
25+
import org.apache.spark.scheduler.SparkListenerEnvironmentUpdate;
26+
import org.apache.spark.scheduler.SparkListenerExecutorAdded;
27+
import org.apache.spark.scheduler.SparkListenerExecutorMetricsUpdate;
28+
import org.apache.spark.scheduler.SparkListenerExecutorRemoved;
29+
import org.apache.spark.scheduler.SparkListenerJobEnd;
30+
import org.apache.spark.scheduler.SparkListenerJobStart;
31+
import org.apache.spark.scheduler.SparkListenerStageCompleted;
32+
import org.apache.spark.scheduler.SparkListenerStageSubmitted;
33+
import org.apache.spark.scheduler.SparkListenerTaskEnd;
34+
import org.apache.spark.scheduler.SparkListenerTaskGettingResult;
35+
import org.apache.spark.scheduler.SparkListenerTaskStart;
36+
import org.apache.spark.scheduler.SparkListenerUnpersistRDD;
37+
38+
/**
39+
* Java clients should extend this class instead of implementing
40+
* SparkListener directly. This is to prevent java clients
41+
* from breaking when new events are added to the SparkListener
42+
* trait.
43+
*
44+
* This is a concrete class instead of abstract to enforce
45+
* new events get added to both the SparkListener and this adapter
46+
* in lockstep.
47+
*/
48+
public class JavaSparkListener implements SparkListener {
49+
50+
@Override
51+
public void onStageCompleted(SparkListenerStageCompleted stageCompleted) { }
52+
53+
@Override
54+
public void onStageSubmitted(SparkListenerStageSubmitted stageSubmitted) { }
55+
56+
@Override
57+
public void onTaskStart(SparkListenerTaskStart taskStart) { }
58+
59+
@Override
60+
public void onTaskGettingResult(SparkListenerTaskGettingResult taskGettingResult) { }
61+
62+
@Override
63+
public void onTaskEnd(SparkListenerTaskEnd taskEnd) { }
64+
65+
@Override
66+
public void onJobStart(SparkListenerJobStart jobStart) { }
67+
68+
@Override
69+
public void onJobEnd(SparkListenerJobEnd jobEnd) { }
70+
71+
@Override
72+
public void onEnvironmentUpdate(SparkListenerEnvironmentUpdate environmentUpdate) { }
73+
74+
@Override
75+
public void onBlockManagerAdded(SparkListenerBlockManagerAdded blockManagerAdded) { }
76+
77+
@Override
78+
public void onBlockManagerRemoved(SparkListenerBlockManagerRemoved blockManagerRemoved) { }
79+
80+
@Override
81+
public void onUnpersistRDD(SparkListenerUnpersistRDD unpersistRDD) { }
82+
83+
@Override
84+
public void onApplicationStart(SparkListenerApplicationStart applicationStart) { }
85+
86+
@Override
87+
public void onApplicationEnd(SparkListenerApplicationEnd applicationEnd) { }
88+
89+
@Override
90+
public void onExecutorMetricsUpdate(SparkListenerExecutorMetricsUpdate executorMetricsUpdate) { }
91+
92+
@Override
93+
public void onExecutorAdded(SparkListenerExecutorAdded executorAdded) { }
94+
95+
@Override
96+
public void onExecutorRemoved(SparkListenerExecutorRemoved executorRemoved) { }
97+
}

core/src/main/resources/org/apache/spark/log4j-defaults.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ log4j.logger.org.eclipse.jetty=WARN
1010
log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR
1111
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
1212
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
13+
log4j.logger.org.apache.hadoop.yarn.util.RackResolver=WARN

core/src/main/resources/org/apache/spark/ui/static/webui.css

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
height: 50px;
2020
font-size: 15px;
2121
margin-bottom: 15px;
22+
min-width: 1200px
2223
}
2324

2425
.navbar .navbar-inner {
@@ -39,12 +40,12 @@
3940

4041
.navbar .nav > li a {
4142
height: 30px;
42-
line-height: 30px;
43+
line-height: 2;
4344
}
4445

4546
.navbar-text {
4647
height: 50px;
47-
line-height: 50px;
48+
line-height: 3.3;
4849
}
4950

5051
table.sortable thead {
@@ -120,6 +121,14 @@ pre {
120121
border: none;
121122
}
122123

124+
.description-input {
125+
overflow: hidden;
126+
text-overflow: ellipsis;
127+
width: 100%;
128+
white-space: nowrap;
129+
display: block;
130+
}
131+
123132
.stacktrace-details {
124133
max-height: 300px;
125134
overflow-y: auto;
@@ -170,7 +179,7 @@ span.additional-metric-title {
170179
}
171180

172181
.version {
173-
line-height: 30px;
182+
line-height: 2.5;
174183
vertical-align: bottom;
175184
font-size: 12px;
176185
padding: 0;
@@ -181,6 +190,7 @@ span.additional-metric-title {
181190

182191
/* Hide all additional metrics by default. This is done here rather than using JavaScript to
183192
* avoid slow page loads for stage pages with large numbers (e.g., thousands) of tasks. */
184-
.scheduler_delay, .deserialization_time, .serialization_time, .getting_result_time {
193+
.scheduler_delay, .deserialization_time, .fetch_wait_time, .serialization_time,
194+
.getting_result_time {
185195
display: none;
186196
}

core/src/main/scala/org/apache/spark/Aggregator.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ case class Aggregator[K, V, C] (
6161
// Update task metrics if context is not null
6262
// TODO: Make context non optional in a future release
6363
Option(context).foreach { c =>
64-
c.taskMetrics.memoryBytesSpilled += combiners.memoryBytesSpilled
65-
c.taskMetrics.diskBytesSpilled += combiners.diskBytesSpilled
64+
c.taskMetrics.incMemoryBytesSpilled(combiners.memoryBytesSpilled)
65+
c.taskMetrics.incDiskBytesSpilled(combiners.diskBytesSpilled)
6666
}
6767
combiners.iterator
6868
}
@@ -95,8 +95,8 @@ case class Aggregator[K, V, C] (
9595
// Update task metrics if context is not null
9696
// TODO: Make context non-optional in a future release
9797
Option(context).foreach { c =>
98-
c.taskMetrics.memoryBytesSpilled += combiners.memoryBytesSpilled
99-
c.taskMetrics.diskBytesSpilled += combiners.diskBytesSpilled
98+
c.taskMetrics.incMemoryBytesSpilled(combiners.memoryBytesSpilled)
99+
c.taskMetrics.incDiskBytesSpilled(combiners.diskBytesSpilled)
100100
}
101101
combiners.iterator
102102
}

0 commit comments

Comments
 (0)