Skip to content

Commit 2d9e23e

Browse files
committed
Merge remote-tracking branch 'upstream/master' into security-branch-0.9-with-client-rebase_rework
Conflicts: core/src/main/scala/org/apache/spark/SparkEnv.scala core/src/main/scala/org/apache/spark/network/ConnectionManager.scala core/src/main/scala/org/apache/spark/ui/JettyUtils.scala repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
2 parents 5721c5a + f74ae0e commit 2d9e23e

File tree

323 files changed

+7053
-2559
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

323 files changed

+7053
-2559
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,4 @@ derby.log
4444
dist/
4545
spark-*-bin.tar.gz
4646
unit-tests.log
47-
lib/
47+
/lib/

assembly/pom.xml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent</artifactId>
24-
<version>0.9.0-incubating-SNAPSHOT</version>
24+
<version>1.0.0-incubating-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

@@ -71,6 +71,11 @@
7171
<artifactId>spark-streaming_${scala.binary.version}</artifactId>
7272
<version>${project.version}</version>
7373
</dependency>
74+
<dependency>
75+
<groupId>org.apache.spark</groupId>
76+
<artifactId>spark-graphx_${scala.binary.version}</artifactId>
77+
<version>${project.version}</version>
78+
</dependency>
7479
<dependency>
7580
<groupId>net.sf.py4j</groupId>
7681
<artifactId>py4j</artifactId>

bagel/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent</artifactId>
24-
<version>0.9.0-incubating-SNAPSHOT</version>
24+
<version>1.0.0-incubating-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

bagel/src/main/scala/org/apache/spark/bagel/Bagel.scala

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -28,21 +28,23 @@ object Bagel extends Logging {
2828
/**
2929
* Runs a Bagel program.
3030
* @param sc [[org.apache.spark.SparkContext]] to use for the program.
31-
* @param vertices vertices of the graph represented as an RDD of (Key, Vertex) pairs. Often the Key will be
32-
* the vertex id.
33-
* @param messages initial set of messages represented as an RDD of (Key, Message) pairs. Often this will be an
34-
* empty array, i.e. sc.parallelize(Array[K, Message]()).
35-
* @param combiner [[org.apache.spark.bagel.Combiner]] combines multiple individual messages to a given vertex into one
36-
* message before sending (which often involves network I/O).
37-
* @param aggregator [[org.apache.spark.bagel.Aggregator]] performs a reduce across all vertices after each superstep,
38-
* and provides the result to each vertex in the next superstep.
31+
* @param vertices vertices of the graph represented as an RDD of (Key, Vertex) pairs. Often the
32+
* Key will be the vertex id.
33+
* @param messages initial set of messages represented as an RDD of (Key, Message) pairs. Often
34+
* this will be an empty array, i.e. sc.parallelize(Array[K, Message]()).
35+
* @param combiner [[org.apache.spark.bagel.Combiner]] combines multiple individual messages to a
36+
* given vertex into one message before sending (which often involves network
37+
* I/O).
38+
* @param aggregator [[org.apache.spark.bagel.Aggregator]] performs a reduce across all vertices
39+
* after each superstep and provides the result to each vertex in the next
40+
* superstep.
3941
* @param partitioner [[org.apache.spark.Partitioner]] partitions values by key
4042
* @param numPartitions number of partitions across which to split the graph.
4143
* Default is the default parallelism of the SparkContext
42-
* @param storageLevel [[org.apache.spark.storage.StorageLevel]] to use for caching of intermediate RDDs in each superstep.
43-
* Defaults to caching in memory.
44-
* @param compute function that takes a Vertex, optional set of (possibly combined) messages to the Vertex,
45-
* optional Aggregator and the current superstep,
44+
* @param storageLevel [[org.apache.spark.storage.StorageLevel]] to use for caching of
45+
* intermediate RDDs in each superstep. Defaults to caching in memory.
46+
* @param compute function that takes a Vertex, optional set of (possibly combined) messages to
47+
* the Vertex, optional Aggregator and the current superstep,
4648
* and returns a set of (Vertex, outgoing Messages) pairs
4749
* @tparam K key
4850
* @tparam V vertex type
@@ -71,7 +73,7 @@ object Bagel extends Logging {
7173
var msgs = messages
7274
var noActivity = false
7375
do {
74-
logInfo("Starting superstep "+superstep+".")
76+
logInfo("Starting superstep " + superstep + ".")
7577
val startTime = System.currentTimeMillis
7678

7779
val aggregated = agg(verts, aggregator)
@@ -97,7 +99,8 @@ object Bagel extends Logging {
9799
verts
98100
}
99101

100-
/** Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]] and the default storage level */
102+
/** Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]] and the default
103+
* storage level */
101104
def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest, C: Manifest](
102105
sc: SparkContext,
103106
vertices: RDD[(K, V)],
@@ -106,8 +109,8 @@ object Bagel extends Logging {
106109
partitioner: Partitioner,
107110
numPartitions: Int
108111
)(
109-
compute: (V, Option[C], Int) => (V, Array[M])
110-
): RDD[(K, V)] = run(sc, vertices, messages, combiner, numPartitions, DEFAULT_STORAGE_LEVEL)(compute)
112+
compute: (V, Option[C], Int) => (V, Array[M])): RDD[(K, V)] = run(sc, vertices, messages,
113+
combiner, numPartitions, DEFAULT_STORAGE_LEVEL)(compute)
111114

112115
/** Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]] */
113116
def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest, C: Manifest](
@@ -127,8 +130,8 @@ object Bagel extends Logging {
127130
}
128131

129132
/**
130-
* Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]], default [[org.apache.spark.HashPartitioner]]
131-
* and default storage level
133+
* Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]], default
134+
* [[org.apache.spark.HashPartitioner]] and default storage level
132135
*/
133136
def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest, C: Manifest](
134137
sc: SparkContext,
@@ -138,9 +141,13 @@ object Bagel extends Logging {
138141
numPartitions: Int
139142
)(
140143
compute: (V, Option[C], Int) => (V, Array[M])
141-
): RDD[(K, V)] = run(sc, vertices, messages, combiner, numPartitions, DEFAULT_STORAGE_LEVEL)(compute)
144+
): RDD[(K, V)] = run(sc, vertices, messages, combiner, numPartitions,
145+
DEFAULT_STORAGE_LEVEL)(compute)
142146

143-
/** Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]] and the default [[org.apache.spark.HashPartitioner]]*/
147+
/**
148+
* Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]] and the
149+
* default [[org.apache.spark.HashPartitioner]]
150+
*/
144151
def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest, C: Manifest](
145152
sc: SparkContext,
146153
vertices: RDD[(K, V)],
@@ -158,7 +165,8 @@ object Bagel extends Logging {
158165
}
159166

160167
/**
161-
* Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]], default [[org.apache.spark.HashPartitioner]],
168+
* Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]],
169+
* default [[org.apache.spark.HashPartitioner]],
162170
* [[org.apache.spark.bagel.DefaultCombiner]] and the default storage level
163171
*/
164172
def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest](
@@ -171,7 +179,8 @@ object Bagel extends Logging {
171179
): RDD[(K, V)] = run(sc, vertices, messages, numPartitions, DEFAULT_STORAGE_LEVEL)(compute)
172180

173181
/**
174-
* Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]], the default [[org.apache.spark.HashPartitioner]]
182+
* Runs a Bagel program with no [[org.apache.spark.bagel.Aggregator]],
183+
* the default [[org.apache.spark.HashPartitioner]]
175184
* and [[org.apache.spark.bagel.DefaultCombiner]]
176185
*/
177186
def run[K: Manifest, V <: Vertex : Manifest, M <: Message[K] : Manifest](
@@ -227,8 +236,9 @@ object Bagel extends Logging {
227236
})
228237

229238
numMsgs += newMsgs.size
230-
if (newVert.active)
239+
if (newVert.active) {
231240
numActiveVerts += 1
241+
}
232242

233243
Some((newVert, newMsgs))
234244
}.persist(storageLevel)

bin/run-example

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,20 @@ else
7676
fi
7777
fi
7878

79+
# Set JAVA_OPTS to be able to load native libraries and to set heap size
80+
JAVA_OPTS="$SPARK_JAVA_OPTS"
81+
JAVA_OPTS="$JAVA_OPTS -Djava.library.path=$SPARK_LIBRARY_PATH"
82+
# Load extra JAVA_OPTS from conf/java-opts, if it exists
83+
if [ -e "$FWDIR/conf/java-opts" ] ; then
84+
JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
85+
fi
86+
export JAVA_OPTS
87+
7988
if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
8089
echo -n "Spark Command: "
81-
echo "$RUNNER" -cp "$CLASSPATH" "$@"
90+
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
8291
echo "========================================"
8392
echo
8493
fi
8594

86-
exec "$RUNNER" -cp "$CLASSPATH" "$@"
95+
exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"

bin/run-example2.cmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ if "x%SPARK_EXAMPLES_JAR%"=="x" (
4949

5050
rem Compute Spark classpath using external script
5151
set DONT_PRINT_CLASSPATH=1
52-
call "%FWDIR%sbin\compute-classpath.cmd"
52+
call "%FWDIR%bin\compute-classpath.cmd"
5353
set DONT_PRINT_CLASSPATH=0
5454
set CLASSPATH=%SPARK_EXAMPLES_JAR%;%CLASSPATH%
5555

bin/spark-shell

Lines changed: 50 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@
2121
# Shell script for starting the Spark Shell REPL
2222
# Note that it will set MASTER to spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}
2323
# if those two env vars are set in spark-env.sh but MASTER is not.
24-
# Options:
25-
# -c <cores> Set the number of cores for REPL to use
2624

2725
cygwin=false
2826
case "`uname`" in
@@ -32,26 +30,69 @@ esac
3230
# Enter posix mode for bash
3331
set -o posix
3432

33+
CORE_PATTERN="^[0-9]+$"
34+
MEM_PATTERN="^[0-9]+[m|g|M|G]$"
35+
3536
FWDIR="$(cd `dirname $0`/..; pwd)"
3637

38+
if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then
39+
echo "Usage: spark-shell [OPTIONS]"
40+
echo "OPTIONS:"
41+
echo "-c --cores num, the maximum number of cores to be used by the spark shell"
42+
echo "-em --execmem num[m|g], the memory used by each executor of spark shell"
43+
echo "-dm --drivermem num[m|g], the memory used by the spark shell and driver"
44+
echo "-h --help, print this help information"
45+
exit
46+
fi
47+
48+
SPARK_SHELL_OPTS=""
49+
3750
for o in "$@"; do
3851
if [ "$1" = "-c" -o "$1" = "--cores" ]; then
3952
shift
40-
if [ -n "$1" ]; then
41-
OPTIONS="-Dspark.cores.max=$1"
53+
if [[ "$1" =~ $CORE_PATTERN ]]; then
54+
SPARK_SHELL_OPTS="$SPARK_SHELL_OPTS -Dspark.cores.max=$1"
4255
shift
56+
else
57+
echo "ERROR: wrong format for -c/--cores"
58+
exit 1
59+
fi
60+
fi
61+
if [ "$1" = "-em" -o "$1" = "--execmem" ]; then
62+
shift
63+
if [[ $1 =~ $MEM_PATTERN ]]; then
64+
SPARK_SHELL_OPTS="$SPARK_SHELL_OPTS -Dspark.executor.memory=$1"
65+
shift
66+
else
67+
echo "ERROR: wrong format for --execmem/-em"
68+
exit 1
69+
fi
70+
fi
71+
if [ "$1" = "-dm" -o "$1" = "--drivermem" ]; then
72+
shift
73+
if [[ $1 =~ $MEM_PATTERN ]]; then
74+
export SPARK_MEM=$1
75+
shift
76+
else
77+
echo "ERROR: wrong format for --drivermem/-dm"
78+
exit 1
4379
fi
4480
fi
4581
done
4682

4783
# Set MASTER from spark-env if possible
84+
DEFAULT_SPARK_MASTER_PORT=7077
4885
if [ -z "$MASTER" ]; then
4986
if [ -e "$FWDIR/conf/spark-env.sh" ]; then
5087
. "$FWDIR/conf/spark-env.sh"
5188
fi
52-
if [[ "x" != "x$SPARK_MASTER_IP" && "y" != "y$SPARK_MASTER_PORT" ]]; then
53-
MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
54-
export MASTER
89+
if [ "x" != "x$SPARK_MASTER_IP" ]; then
90+
if [ "y" != "y$SPARK_MASTER_PORT" ]; then
91+
SPARK_MASTER_PORT="${SPARK_MASTER_PORT}"
92+
else
93+
SPARK_MASTER_PORT=$DEFAULT_SPARK_MASTER_PORT
94+
fi
95+
export MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
5596
fi
5697
fi
5798

@@ -90,10 +131,10 @@ if $cygwin; then
90131
# "Backspace sends ^H" setting in "Keys" section of the Mintty options
91132
# (see https://github.com/sbt/sbt/issues/562).
92133
stty -icanon min 1 -echo > /dev/null 2>&1
93-
$FWDIR/bin/spark-class -Djline.terminal=unix $OPTIONS org.apache.spark.repl.Main "$@"
134+
$FWDIR/bin/spark-class -Djline.terminal=unix $SPARK_SHELL_OPTS org.apache.spark.repl.Main "$@"
94135
stty icanon echo > /dev/null 2>&1
95136
else
96-
$FWDIR/bin/spark-class $OPTIONS org.apache.spark.repl.Main "$@"
137+
$FWDIR/bin/spark-class $SPARK_SHELL_OPTS org.apache.spark.repl.Main "$@"
97138
fi
98139

99140
# record the exit status lest it be overwritten:

conf/metrics.properties.template

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@
6767
# period 10 Poll period
6868
# unit seconds Units of poll period
6969
# ttl 1 TTL of messages sent by Ganglia
70-
# mode multicast Ganglia network mode ('unicast' or 'mulitcast')
70+
# mode multicast Ganglia network mode ('unicast' or 'multicast')
7171

7272
# org.apache.spark.metrics.sink.JmxSink
7373

core/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent</artifactId>
24-
<version>0.9.0-incubating-SNAPSHOT</version>
24+
<version>1.0.0-incubating-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

core/src/main/scala/org/apache/spark/CacheManager.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
3131
private val loading = new HashSet[RDDBlockId]()
3232

3333
/** Gets or computes an RDD split. Used by RDD.iterator() when an RDD is cached. */
34-
def getOrCompute[T](rdd: RDD[T], split: Partition, context: TaskContext, storageLevel: StorageLevel)
35-
: Iterator[T] = {
34+
def getOrCompute[T](rdd: RDD[T], split: Partition, context: TaskContext,
35+
storageLevel: StorageLevel): Iterator[T] = {
3636
val key = RDDBlockId(rdd.id, split.index)
3737
logDebug("Looking for partition " + key)
3838
blockManager.get(key) match {

0 commit comments

Comments
 (0)