Skip to content

Commit 5f3105a

Browse files
committed
Merge remote-tracking branch 'upstream/master'
Conflicts: core/src/main/scala/org/apache/spark/rdd/RDD.scala streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala
2 parents 70f494f + b715aa0 commit 5f3105a

File tree

907 files changed

+41461
-7026
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

907 files changed

+41461
-7026
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ conf/spark-env.sh
1919
conf/streaming-env.sh
2020
conf/log4j.properties
2121
conf/spark-defaults.conf
22+
conf/hive-site.xml
2223
docs/_site
2324
docs/api
2425
target/
@@ -50,9 +51,11 @@ unit-tests.log
5051
rat-results.txt
5152
scalastyle.txt
5253
conf/*.conf
54+
scalastyle-output.xml
5355

5456
# For Hive
5557
metastore_db/
5658
metastore/
5759
warehouse/
5860
TempStatsStore/
61+
sql/hive-thriftserver/test_warehouses

.rat-excludes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,4 @@ dist/*
5555
.*ipr
5656
.*iws
5757
logs
58+
.*scalastyle-output.xml

LICENSE

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
272272

273273

274274
========================================================================
275-
For Py4J (python/lib/py4j0.7.egg and files in assembly/lib/net/sf/py4j):
275+
For Py4J (python/lib/py4j-0.8.2.1-src.zip)
276276
========================================================================
277277

278278
Copyright (c) 2009-2011, Barthelemy Dagenais All rights reserved.
@@ -442,7 +442,7 @@ Written by Pavel Binko, Dino Ferrero Merlino, Wolfgang Hoschek, Tony Johnson, An
442442

443443

444444
========================================================================
445-
Fo SnapTree:
445+
For SnapTree:
446446
========================================================================
447447

448448
SNAPTREE LICENSE
@@ -482,6 +482,24 @@ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
482482
SUCH DAMAGE.
483483

484484

485+
========================================================================
486+
For Timsort (core/src/main/java/org/apache/spark/util/collection/Sorter.java):
487+
========================================================================
488+
Copyright (C) 2008 The Android Open Source Project
489+
490+
Licensed under the Apache License, Version 2.0 (the "License");
491+
you may not use this file except in compliance with the License.
492+
You may obtain a copy of the License at
493+
494+
http://www.apache.org/licenses/LICENSE-2.0
495+
496+
Unless required by applicable law or agreed to in writing, software
497+
distributed under the License is distributed on an "AS IS" BASIS,
498+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
499+
See the License for the specific language governing permissions and
500+
limitations under the License.
501+
502+
485503
========================================================================
486504
BSD-style licenses
487505
========================================================================
@@ -514,7 +532,7 @@ The following components are provided under a BSD-style license. See project lin
514532
(New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
515533
(The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
516534
(The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
517-
(The New BSD License) Py4J (net.sf.py4j:py4j:0.8.1 - http://py4j.sourceforge.net/)
535+
(The New BSD License) Py4J (net.sf.py4j:py4j:0.8.2.1 - http://py4j.sourceforge.net/)
518536
(Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/)
519537
(ISC/BSD License) jbcrypt (org.mindrot:jbcrypt:0.3m - http://www.mindrot.org/)
520538

@@ -531,3 +549,4 @@ The following components are provided under the MIT License. See project link fo
531549
(MIT License) pyrolite (org.spark-project:pyrolite:2.0.1 - http://pythonhosted.org/Pyro4/)
532550
(MIT License) scopt (com.github.scopt:scopt_2.10:3.2.0 - https://github.com/scopt/scopt)
533551
(The MIT License) Mockito (org.mockito:mockito-all:1.8.5 - http://www.mockito.org)
552+
(MIT License) jquery (https://jquery.org/license/)

assembly/pom.xml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
<deb.pkg.name>spark</deb.pkg.name>
4040
<deb.install.path>/usr/share/spark</deb.install.path>
4141
<deb.user>root</deb.user>
42+
<deb.bin.filemode>744</deb.bin.filemode>
4243
</properties>
4344

4445
<dependencies>
@@ -164,6 +165,16 @@
164165
</dependency>
165166
</dependencies>
166167
</profile>
168+
<profile>
169+
<id>hive-thriftserver</id>
170+
<dependencies>
171+
<dependency>
172+
<groupId>org.apache.spark</groupId>
173+
<artifactId>spark-hive-thriftserver_${scala.binary.version}</artifactId>
174+
<version>${project.version}</version>
175+
</dependency>
176+
</dependencies>
177+
</profile>
167178
<profile>
168179
<id>spark-ganglia-lgpl</id>
169180
<dependencies>
@@ -276,7 +287,7 @@
276287
<user>${deb.user}</user>
277288
<group>${deb.user}</group>
278289
<prefix>${deb.install.path}/bin</prefix>
279-
<filemode>744</filemode>
290+
<filemode>${deb.bin.filemode}</filemode>
280291
</mapper>
281292
</data>
282293
<data>

bagel/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
<groupId>org.apache.spark</groupId>
2929
<artifactId>spark-bagel_2.10</artifactId>
3030
<properties>
31-
<sbt.project.name>bagel</sbt.project.name>
31+
<sbt.project.name>bagel</sbt.project.name>
3232
</properties>
3333
<packaging>jar</packaging>
3434
<name>Spark Project Bagel</name>

bagel/src/main/scala/org/apache/spark/bagel/Bagel.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ object Bagel extends Logging {
7272
var verts = vertices
7373
var msgs = messages
7474
var noActivity = false
75+
var lastRDD: RDD[(K, (V, Array[M]))] = null
7576
do {
7677
logInfo("Starting superstep " + superstep + ".")
7778
val startTime = System.currentTimeMillis
@@ -83,6 +84,10 @@ object Bagel extends Logging {
8384
val superstep_ = superstep // Create a read-only copy of superstep for capture in closure
8485
val (processed, numMsgs, numActiveVerts) =
8586
comp[K, V, M, C](sc, grouped, compute(_, _, aggregated, superstep_), storageLevel)
87+
if (lastRDD != null) {
88+
lastRDD.unpersist(false)
89+
}
90+
lastRDD = processed
8691

8792
val timeTaken = System.currentTimeMillis - startTime
8893
logInfo("Superstep %d took %d s".format(superstep, timeTaken / 1000))

bin/beeline

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/usr/bin/env bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
#
21+
# Shell script for starting BeeLine
22+
23+
# Enter posix mode for bash
24+
set -o posix
25+
26+
# Figure out where Spark is installed
27+
FWDIR="$(cd `dirname $0`/..; pwd)"
28+
29+
CLASS="org.apache.hive.beeline.BeeLine"
30+
exec "$FWDIR/bin/spark-class" $CLASS "$@"

bin/compute-classpath.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ if [ -n "$SPARK_PREPEND_CLASSES" ]; then
5252
CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/classes"
5353
CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/classes"
5454
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes"
55+
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive-thriftserver/target/scala-$SCALA_VERSION/classes"
5556
CLASSPATH="$CLASSPATH:$FWDIR/yarn/stable/target/scala-$SCALA_VERSION/classes"
5657
fi
5758

bin/pyspark

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,18 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
2323
# Export this as SPARK_HOME
2424
export SPARK_HOME="$FWDIR"
2525

26+
source $FWDIR/bin/utils.sh
27+
2628
SCALA_VERSION=2.10
2729

28-
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
30+
function usage() {
2931
echo "Usage: ./bin/pyspark [options]" 1>&2
3032
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
3133
exit 0
34+
}
35+
36+
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
37+
usage
3238
fi
3339

3440
# Exit if the user hasn't compiled Spark
@@ -52,7 +58,7 @@ export PYSPARK_PYTHON
5258

5359
# Add the PySpark classes to the Python path:
5460
export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH
55-
export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.1-src.zip:$PYTHONPATH
61+
export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH
5662

5763
# Load the PySpark shell.py script when ./pyspark is used interactively:
5864
export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
@@ -66,10 +72,11 @@ fi
6672
# Build up arguments list manually to preserve quotes and backslashes.
6773
# We export Spark submit arguments as an environment variable because shell.py must run as a
6874
# PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks.
69-
75+
SUBMIT_USAGE_FUNCTION=usage
76+
gatherSparkSubmitOpts "$@"
7077
PYSPARK_SUBMIT_ARGS=""
7178
whitespace="[[:space:]]"
72-
for i in "$@"; do
79+
for i in "${SUBMISSION_OPTS[@]}"; do
7380
if [[ $i =~ \" ]]; then i=$(echo $i | sed 's/\"/\\\"/g'); fi
7481
if [[ $i =~ $whitespace ]]; then i=\"$i\"; fi
7582
PYSPARK_SUBMIT_ARGS="$PYSPARK_SUBMIT_ARGS $i"
@@ -90,7 +97,10 @@ fi
9097
if [[ "$1" =~ \.py$ ]]; then
9198
echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
9299
echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
93-
exec $FWDIR/bin/spark-submit "$@"
100+
primary=$1
101+
shift
102+
gatherSparkSubmitOpts "$@"
103+
exec $FWDIR/bin/spark-submit "${SUBMISSION_OPTS[@]}" $primary "${APPLICATION_OPTS[@]}"
94104
else
95105
# Only use ipython if no command line arguments were provided [SPARK-1134]
96106
if [[ "$IPYTHON" = "1" ]]; then

bin/pyspark2.cmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ rem Figure out which Python to use.
4545
if [%PYSPARK_PYTHON%] == [] set PYSPARK_PYTHON=python
4646

4747
set PYTHONPATH=%FWDIR%python;%PYTHONPATH%
48-
set PYTHONPATH=%FWDIR%python\lib\py4j-0.8.1-src.zip;%PYTHONPATH%
48+
set PYTHONPATH=%FWDIR%python\lib\py4j-0.8.2.1-src.zip;%PYTHONPATH%
4949

5050
set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
5151
set PYTHONSTARTUP=%FWDIR%python\pyspark\shell.py

0 commit comments

Comments
 (0)