Skip to content

Commit 51fb3d6

Browse files
committed
merge master
2 parents 3771474 + abea2d4 commit 51fb3d6

File tree

244 files changed

+11066
-4354
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

244 files changed

+11066
-4354
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
sbt/*.jar
88
.settings
99
.cache
10-
.mima-excludes
10+
.generated-mima-excludes
1111
/build/
1212
work/
1313
out/

.rat-excludes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ target
33
.project
44
.classpath
55
.mima-excludes
6+
.generated-mima-excludes
67
.rat-excludes
78
.*md
89
derby.log

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,14 @@ You can find the latest Spark documentation, including a programming
99
guide, on the project webpage at <http://spark.apache.org/documentation.html>.
1010
This README file only contains basic setup instructions.
1111

12-
1312
## Building Spark
1413

1514
Spark is built on Scala 2.10. To build Spark and its example programs, run:
1615

1716
./sbt/sbt assembly
1817

18+
(You do not need to do this if you downloaded a pre-built package.)
19+
1920
## Interactive Scala Shell
2021

2122
The easiest way to start using Spark is through the Scala shell:
@@ -41,9 +42,9 @@ And run the following command, which should also return 1000:
4142
Spark also comes with several sample programs in the `examples` directory.
4243
To run one of them, use `./bin/run-example <class> [params]`. For example:
4344

44-
./bin/run-example org.apache.spark.examples.SparkLR
45+
./bin/run-example SparkPi
4546

46-
will run the Logistic Regression example locally.
47+
will run the Pi example locally.
4748

4849
You can set the MASTER environment variable when running examples to submit
4950
examples to a cluster. This can be a mesos:// or spark:// URL,

assembly/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@
9696
<filter>
9797
<artifact>*:*</artifact>
9898
<excludes>
99-
<exclude>org.datanucleus:*</exclude>
99+
<exclude>org/datanucleus/**</exclude>
100100
<exclude>META-INF/*.SF</exclude>
101101
<exclude>META-INF/*.DSA</exclude>
102102
<exclude>META-INF/*.RSA</exclude>

bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@ class BagelSuite extends FunSuite with Assertions with BeforeAndAfter with Timeo
3838
sc.stop()
3939
sc = null
4040
}
41-
// To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
42-
System.clearProperty("spark.driver.port")
4341
}
4442

4543
test("halting by voting") {

bin/compute-classpath.cmd

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@ rem
2020
rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
2121
rem script and the ExecutorRunner in standalone cluster mode.
2222

23+
rem If we're called from spark-class2.cmd, it already set enabledelayedexpansion and setting
24+
rem it here would stop us from affecting its copy of the CLASSPATH variable; otherwise we
25+
rem need to set it here because we use !datanucleus_jars! below.
26+
if "%DONT_PRINT_CLASSPATH%"=="1" goto skip_delayed_expansion
27+
setlocal enabledelayedexpansion
28+
:skip_delayed_expansion
29+
2330
set SCALA_VERSION=2.10
2431

2532
rem Figure out where the Spark framework is installed
@@ -31,7 +38,7 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
3138
rem Build up classpath
3239
set CLASSPATH=%FWDIR%conf
3340
if exist "%FWDIR%RELEASE" (
34-
for %%d in ("%FWDIR%jars\spark-assembly*.jar") do (
41+
for %%d in ("%FWDIR%lib\spark-assembly*.jar") do (
3542
set ASSEMBLY_JAR=%%d
3643
)
3744
) else (
@@ -42,6 +49,21 @@ if exist "%FWDIR%RELEASE" (
4249

4350
set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
4451

52+
rem When Hive support is needed, Datanucleus jars must be included on the classpath.
53+
rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
54+
rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
55+
rem built with Hive, so look for them there.
56+
if exist "%FWDIR%RELEASE" (
57+
set datanucleus_dir=%FWDIR%lib
58+
) else (
59+
set datanucleus_dir=%FWDIR%lib_managed\jars
60+
)
61+
set "datanucleus_jars="
62+
for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
63+
set datanucleus_jars=!datanucleus_jars!;%%d
64+
)
65+
set CLASSPATH=%CLASSPATH%;%datanucleus_jars%
66+
4567
set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
4668
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
4769
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes

bin/pyspark

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,20 @@
1717
# limitations under the License.
1818
#
1919

20-
# Figure out where the Scala framework is installed
20+
# Figure out where Spark is installed
2121
FWDIR="$(cd `dirname $0`/..; pwd)"
2222

2323
# Export this as SPARK_HOME
2424
export SPARK_HOME="$FWDIR"
2525

2626
SCALA_VERSION=2.10
2727

28+
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
29+
echo "Usage: ./bin/pyspark [options]"
30+
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
31+
exit 0
32+
fi
33+
2834
# Exit if the user hasn't compiled Spark
2935
if [ ! -f "$FWDIR/RELEASE" ]; then
3036
# Exit if the user hasn't compiled Spark
@@ -52,13 +58,34 @@ export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.1-src.zip:$PYTHONPATH
5258
export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
5359
export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py
5460

61+
# If IPython options are specified, assume user wants to run IPython
5562
if [ -n "$IPYTHON_OPTS" ]; then
5663
IPYTHON=1
5764
fi
5865

59-
# Only use ipython if no command line arguments were provided [SPARK-1134]
60-
if [[ "$IPYTHON" = "1" && $# = 0 ]] ; then
61-
exec ipython $IPYTHON_OPTS
66+
# Build up arguments list manually to preserve quotes and backslashes.
67+
# We export Spark submit arguments as an environment variable because shell.py must run as a
68+
# PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks.
69+
70+
PYSPARK_SUBMIT_ARGS=""
71+
whitespace="[[:space:]]"
72+
for i in "$@"; do
73+
if [[ $i =~ \" ]]; then i=$(echo $i | sed 's/\"/\\\"/g'); fi
74+
if [[ $i =~ $whitespace ]]; then i=\"$i\"; fi
75+
PYSPARK_SUBMIT_ARGS="$PYSPARK_SUBMIT_ARGS $i"
76+
done
77+
export PYSPARK_SUBMIT_ARGS
78+
79+
# If a python file is provided, directly run spark-submit.
80+
if [[ "$1" =~ \.py$ ]]; then
81+
echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
82+
echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
83+
exec $FWDIR/bin/spark-submit "$@"
6284
else
63-
exec "$PYSPARK_PYTHON" "$@"
85+
# Only use ipython if no command line arguments were provided [SPARK-1134]
86+
if [[ "$IPYTHON" = "1" ]]; then
87+
exec ipython $IPYTHON_OPTS
88+
else
89+
exec "$PYSPARK_PYTHON"
90+
fi
6491
fi

bin/pyspark2.cmd

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ set FOUND_JAR=0
3131
for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do (
3232
set FOUND_JAR=1
3333
)
34-
if "%FOUND_JAR%"=="0" (
34+
if [%FOUND_JAR%] == [0] (
3535
echo Failed to find Spark assembly JAR.
3636
echo You need to build Spark with sbt\sbt assembly before running this program.
3737
goto exit
@@ -42,15 +42,30 @@ rem Load environment variables from conf\spark-env.cmd, if it exists
4242
if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
4343

4444
rem Figure out which Python to use.
45-
if "x%PYSPARK_PYTHON%"=="x" set PYSPARK_PYTHON=python
45+
if [%PYSPARK_PYTHON%] == [] set PYSPARK_PYTHON=python
4646

4747
set PYTHONPATH=%FWDIR%python;%PYTHONPATH%
4848
set PYTHONPATH=%FWDIR%python\lib\py4j-0.8.1-src.zip;%PYTHONPATH%
4949

5050
set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
5151
set PYTHONSTARTUP=%FWDIR%python\pyspark\shell.py
52+
set PYSPARK_SUBMIT_ARGS=%*
5253

5354
echo Running %PYSPARK_PYTHON% with PYTHONPATH=%PYTHONPATH%
5455

55-
"%PYSPARK_PYTHON%" %*
56+
rem Check whether the argument is a file
57+
for /f %%i in ('echo %1^| findstr /R "\.py"') do (
58+
set PYTHON_FILE=%%i
59+
)
60+
61+
if [%PYTHON_FILE%] == [] (
62+
%PYSPARK_PYTHON%
63+
) else (
64+
echo.
65+
echo WARNING: Running python applications through ./bin/pyspark.cmd is deprecated as of Spark 1.0.
66+
echo Use ./bin/spark-submit ^<python file^>
67+
echo.
68+
"%FWDIR%\bin\spark-submit.cmd" %PYSPARK_SUBMIT_ARGS%
69+
)
70+
5671
:exit

bin/run-example

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,16 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
2323
export SPARK_HOME="$FWDIR"
2424
EXAMPLES_DIR="$FWDIR"/examples
2525

26+
if [ -n "$1" ]; then
27+
EXAMPLE_CLASS="$1"
28+
shift
29+
else
30+
echo "Usage: ./bin/run-example <example-class> [example-args]"
31+
echo " - set MASTER=XX to use a specific master"
32+
echo " - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)"
33+
exit 1
34+
fi
35+
2636
if [ -f "$FWDIR/RELEASE" ]; then
2737
export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`
2838
elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
@@ -37,23 +47,12 @@ fi
3747

3848
EXAMPLE_MASTER=${MASTER:-"local[*]"}
3949

40-
if [ -n "$1" ]; then
41-
EXAMPLE_CLASS="$1"
42-
shift
43-
else
44-
echo "usage: ./bin/run-example <example-class> [example-args]"
45-
echo " - set MASTER=XX to use a specific master"
46-
echo " - can use abbreviated example class name (e.g. SparkPi, mllib.MovieLensALS)"
47-
echo
48-
exit -1
49-
fi
50-
5150
if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples* ]]; then
5251
EXAMPLE_CLASS="org.apache.spark.examples.$EXAMPLE_CLASS"
5352
fi
5453

5554
./bin/spark-submit \
5655
--master $EXAMPLE_MASTER \
5756
--class $EXAMPLE_CLASS \
58-
$SPARK_EXAMPLES_JAR \
57+
"$SPARK_EXAMPLES_JAR" \
5958
"$@"

bin/run-example2.cmd

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,32 +30,59 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
3030

3131
rem Test that an argument was given
3232
if not "x%1"=="x" goto arg_given
33-
echo Usage: run-example ^<example-class^> [^<args^>]
33+
echo Usage: run-example ^<example-class^> [example-args]
34+
echo - set MASTER=XX to use a specific master
35+
echo - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)
3436
goto exit
3537
:arg_given
3638

3739
set EXAMPLES_DIR=%FWDIR%examples
3840

3941
rem Figure out the JAR file that our examples were packaged into.
4042
set SPARK_EXAMPLES_JAR=
41-
for %%d in ("%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\spark-examples*assembly*.jar") do (
42-
set SPARK_EXAMPLES_JAR=%%d
43+
if exist "%FWDIR%RELEASE" (
44+
for %%d in ("%FWDIR%lib\spark-examples*.jar") do (
45+
set SPARK_EXAMPLES_JAR=%%d
46+
)
47+
) else (
48+
for %%d in ("%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\spark-examples*.jar") do (
49+
set SPARK_EXAMPLES_JAR=%%d
50+
)
4351
)
4452
if "x%SPARK_EXAMPLES_JAR%"=="x" (
4553
echo Failed to find Spark examples assembly JAR.
4654
echo You need to build Spark with sbt\sbt assembly before running this program.
4755
goto exit
4856
)
4957

50-
rem Compute Spark classpath using external script
51-
set DONT_PRINT_CLASSPATH=1
52-
call "%FWDIR%bin\compute-classpath.cmd"
53-
set DONT_PRINT_CLASSPATH=0
54-
set CLASSPATH=%SPARK_EXAMPLES_JAR%;%CLASSPATH%
58+
rem Set master from MASTER environment variable if given
59+
if "x%MASTER%"=="x" (
60+
set EXAMPLE_MASTER=local[*]
61+
) else (
62+
set EXAMPLE_MASTER=%MASTER%
63+
)
64+
65+
rem If the EXAMPLE_CLASS does not start with org.apache.spark.examples, add that
66+
set EXAMPLE_CLASS=%1
67+
set PREFIX=%EXAMPLE_CLASS:~0,25%
68+
if not %PREFIX%==org.apache.spark.examples (
69+
set EXAMPLE_CLASS=org.apache.spark.examples.%EXAMPLE_CLASS%
70+
)
71+
72+
rem Get the tail of the argument list, to skip the first one. This is surprisingly
73+
rem complicated on Windows.
74+
set "ARGS="
75+
:top
76+
shift
77+
if "%~1" neq "" (
78+
set ARGS=%ARGS% "%~1"
79+
goto :top
80+
)
81+
if defined ARGS set ARGS=%ARGS:~1%
5582

56-
rem Figure out where java is.
57-
set RUNNER=java
58-
if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java
83+
call "%FWDIR%bin\spark-submit.cmd" ^
84+
--master %EXAMPLE_MASTER% ^
85+
--class %EXAMPLE_CLASS% ^
86+
"%SPARK_EXAMPLES_JAR%" %ARGS%
5987

60-
"%RUNNER%" -cp "%CLASSPATH%" %JAVA_OPTS% %*
6188
:exit

0 commit comments

Comments
 (0)