Skip to content

Commit 623a5f0

Browse files
committed
merge master
2 parents f06d5ba + 9a5d482 commit 623a5f0

File tree

266 files changed

+10861
-3524
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

266 files changed

+10861
-3524
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
sbt/*.jar
88
.settings
99
.cache
10-
.mima-excludes
10+
.generated-mima-excludes
1111
/build/
1212
work/
1313
out/

.rat-excludes

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ target
33
.project
44
.classpath
55
.mima-excludes
6+
.generated-mima-excludes
67
.rat-excludes
78
.*md
89
derby.log
@@ -43,3 +44,9 @@ test.out/*
4344
.*iml
4445
service.properties
4546
db.lck
47+
build/*
48+
dist/*
49+
.*out
50+
.*ipr
51+
.*iws
52+
logs

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,14 @@ You can find the latest Spark documentation, including a programming
99
guide, on the project webpage at <http://spark.apache.org/documentation.html>.
1010
This README file only contains basic setup instructions.
1111

12-
1312
## Building Spark
1413

1514
Spark is built on Scala 2.10. To build Spark and its example programs, run:
1615

1716
./sbt/sbt assembly
1817

18+
(You do not need to do this if you downloaded a pre-built package.)
19+
1920
## Interactive Scala Shell
2021

2122
The easiest way to start using Spark is through the Scala shell:
@@ -41,9 +42,9 @@ And run the following command, which should also return 1000:
4142
Spark also comes with several sample programs in the `examples` directory.
4243
To run one of them, use `./bin/run-example <class> [params]`. For example:
4344

44-
./bin/run-example org.apache.spark.examples.SparkLR
45+
./bin/run-example SparkPi
4546

46-
will run the Logistic Regression example locally.
47+
will run the Pi example locally.
4748

4849
You can set the MASTER environment variable when running examples to submit
4950
examples to a cluster. This can be a mesos:// or spark:// URL,

assembly/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@
9696
<filter>
9797
<artifact>*:*</artifact>
9898
<excludes>
99-
<exclude>org.datanucleus:*</exclude>
99+
<exclude>org/datanucleus/**</exclude>
100100
<exclude>META-INF/*.SF</exclude>
101101
<exclude>META-INF/*.DSA</exclude>
102102
<exclude>META-INF/*.RSA</exclude>
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
/**
19+
* Bagel: An implementation of Pregel in Spark. THIS IS DEPRECATED - use Spark's GraphX library.
20+
*/
21+
package org.apache.spark.bagel;
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark
19+
20+
/**
21+
* Bagel: An implementation of Pregel in Spark. THIS IS DEPRECATED - use Spark's GraphX library.
22+
*/
23+
package object bagel

bin/compute-classpath.cmd

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@ rem
2020
rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
2121
rem script and the ExecutorRunner in standalone cluster mode.
2222

23+
rem If we're called from spark-class2.cmd, it already set enabledelayedexpansion and setting
24+
rem it here would stop us from affecting its copy of the CLASSPATH variable; otherwise we
25+
rem need to set it here because we use !datanucleus_jars! below.
26+
if "%DONT_PRINT_CLASSPATH%"=="1" goto skip_delayed_expansion
27+
setlocal enabledelayedexpansion
28+
:skip_delayed_expansion
29+
2330
set SCALA_VERSION=2.10
2431

2532
rem Figure out where the Spark framework is installed
@@ -31,7 +38,7 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
3138
rem Build up classpath
3239
set CLASSPATH=%FWDIR%conf
3340
if exist "%FWDIR%RELEASE" (
34-
for %%d in ("%FWDIR%jars\spark-assembly*.jar") do (
41+
for %%d in ("%FWDIR%lib\spark-assembly*.jar") do (
3542
set ASSEMBLY_JAR=%%d
3643
)
3744
) else (
@@ -42,6 +49,21 @@ if exist "%FWDIR%RELEASE" (
4249

4350
set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
4451

52+
rem When Hive support is needed, Datanucleus jars must be included on the classpath.
53+
rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
54+
rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
55+
rem built with Hive, so look for them there.
56+
if exist "%FWDIR%RELEASE" (
57+
set datanucleus_dir=%FWDIR%lib
58+
) else (
59+
set datanucleus_dir=%FWDIR%lib_managed\jars
60+
)
61+
set "datanucleus_jars="
62+
for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
63+
set datanucleus_jars=!datanucleus_jars!;%%d
64+
)
65+
set CLASSPATH=%CLASSPATH%;%datanucleus_jars%
66+
4567
set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
4668
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
4769
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes

bin/pyspark

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,20 @@
1717
# limitations under the License.
1818
#
1919

20-
# Figure out where the Scala framework is installed
20+
# Figure out where Spark is installed
2121
FWDIR="$(cd `dirname $0`/..; pwd)"
2222

2323
# Export this as SPARK_HOME
2424
export SPARK_HOME="$FWDIR"
2525

2626
SCALA_VERSION=2.10
2727

28+
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
29+
echo "Usage: ./bin/pyspark [options]"
30+
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
31+
exit 0
32+
fi
33+
2834
# Exit if the user hasn't compiled Spark
2935
if [ ! -f "$FWDIR/RELEASE" ]; then
3036
# Exit if the user hasn't compiled Spark
@@ -52,13 +58,34 @@ export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.1-src.zip:$PYTHONPATH
5258
export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
5359
export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py
5460

61+
# If IPython options are specified, assume user wants to run IPython
5562
if [ -n "$IPYTHON_OPTS" ]; then
5663
IPYTHON=1
5764
fi
5865

59-
# Only use ipython if no command line arguments were provided [SPARK-1134]
60-
if [[ "$IPYTHON" = "1" && $# = 0 ]] ; then
61-
exec ipython $IPYTHON_OPTS
66+
# Build up arguments list manually to preserve quotes and backslashes.
67+
# We export Spark submit arguments as an environment variable because shell.py must run as a
68+
# PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks.
69+
70+
PYSPARK_SUBMIT_ARGS=""
71+
whitespace="[[:space:]]"
72+
for i in "$@"; do
73+
if [[ $i =~ \" ]]; then i=$(echo $i | sed 's/\"/\\\"/g'); fi
74+
if [[ $i =~ $whitespace ]]; then i=\"$i\"; fi
75+
PYSPARK_SUBMIT_ARGS="$PYSPARK_SUBMIT_ARGS $i"
76+
done
77+
export PYSPARK_SUBMIT_ARGS
78+
79+
# If a python file is provided, directly run spark-submit.
80+
if [[ "$1" =~ \.py$ ]]; then
81+
echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
82+
echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
83+
exec $FWDIR/bin/spark-submit "$@"
6284
else
63-
exec "$PYSPARK_PYTHON" "$@"
85+
# Only use ipython if no command line arguments were provided [SPARK-1134]
86+
if [[ "$IPYTHON" = "1" ]]; then
87+
exec ipython $IPYTHON_OPTS
88+
else
89+
exec "$PYSPARK_PYTHON"
90+
fi
6491
fi

bin/pyspark2.cmd

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ set FOUND_JAR=0
3131
for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do (
3232
set FOUND_JAR=1
3333
)
34-
if "%FOUND_JAR%"=="0" (
34+
if [%FOUND_JAR%] == [0] (
3535
echo Failed to find Spark assembly JAR.
3636
echo You need to build Spark with sbt\sbt assembly before running this program.
3737
goto exit
@@ -42,15 +42,30 @@ rem Load environment variables from conf\spark-env.cmd, if it exists
4242
if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
4343

4444
rem Figure out which Python to use.
45-
if "x%PYSPARK_PYTHON%"=="x" set PYSPARK_PYTHON=python
45+
if [%PYSPARK_PYTHON%] == [] set PYSPARK_PYTHON=python
4646

4747
set PYTHONPATH=%FWDIR%python;%PYTHONPATH%
4848
set PYTHONPATH=%FWDIR%python\lib\py4j-0.8.1-src.zip;%PYTHONPATH%
4949

5050
set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
5151
set PYTHONSTARTUP=%FWDIR%python\pyspark\shell.py
52+
set PYSPARK_SUBMIT_ARGS=%*
5253

5354
echo Running %PYSPARK_PYTHON% with PYTHONPATH=%PYTHONPATH%
5455

55-
"%PYSPARK_PYTHON%" %*
56+
rem Check whether the argument is a file
57+
for /f %%i in ('echo %1^| findstr /R "\.py"') do (
58+
set PYTHON_FILE=%%i
59+
)
60+
61+
if [%PYTHON_FILE%] == [] (
62+
%PYSPARK_PYTHON%
63+
) else (
64+
echo.
65+
echo WARNING: Running python applications through ./bin/pyspark.cmd is deprecated as of Spark 1.0.
66+
echo Use ./bin/spark-submit ^<python file^>
67+
echo.
68+
"%FWDIR%\bin\spark-submit.cmd" %PYSPARK_SUBMIT_ARGS%
69+
)
70+
5671
:exit

bin/run-example

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,16 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
2323
export SPARK_HOME="$FWDIR"
2424
EXAMPLES_DIR="$FWDIR"/examples
2525

26+
if [ -n "$1" ]; then
27+
EXAMPLE_CLASS="$1"
28+
shift
29+
else
30+
echo "Usage: ./bin/run-example <example-class> [example-args]"
31+
echo " - set MASTER=XX to use a specific master"
32+
echo " - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)"
33+
exit 1
34+
fi
35+
2636
if [ -f "$FWDIR/RELEASE" ]; then
2737
export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`
2838
elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
@@ -37,23 +47,12 @@ fi
3747

3848
EXAMPLE_MASTER=${MASTER:-"local[*]"}
3949

40-
if [ -n "$1" ]; then
41-
EXAMPLE_CLASS="$1"
42-
shift
43-
else
44-
echo "usage: ./bin/run-example <example-class> [example-args]"
45-
echo " - set MASTER=XX to use a specific master"
46-
echo " - can use abbreviated example class name (e.g. SparkPi, mllib.MovieLensALS)"
47-
echo
48-
exit -1
49-
fi
50-
5150
if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples* ]]; then
5251
EXAMPLE_CLASS="org.apache.spark.examples.$EXAMPLE_CLASS"
5352
fi
5453

5554
./bin/spark-submit \
5655
--master $EXAMPLE_MASTER \
5756
--class $EXAMPLE_CLASS \
58-
$SPARK_EXAMPLES_JAR \
57+
"$SPARK_EXAMPLES_JAR" \
5958
"$@"

0 commit comments

Comments
 (0)