Skip to content

Commit 8f90b22

Browse files
committed
Merge branch 'master' of https://github.com/apache/spark into SPARK-1609
2 parents bcf36cb + dc3b640 commit 8f90b22

File tree

61 files changed

+685
-496
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+685
-496
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ conf/java-opts
1818
conf/spark-env.sh
1919
conf/streaming-env.sh
2020
conf/log4j.properties
21+
conf/spark-defaults.conf
2122
docs/_site
2223
docs/api
2324
target/

bin/compute-classpath.cmd

Lines changed: 88 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,69 +1,88 @@
1-
@echo off
2-
3-
rem
4-
rem Licensed to the Apache Software Foundation (ASF) under one or more
5-
rem contributor license agreements. See the NOTICE file distributed with
6-
rem this work for additional information regarding copyright ownership.
7-
rem The ASF licenses this file to You under the Apache License, Version 2.0
8-
rem (the "License"); you may not use this file except in compliance with
9-
rem the License. You may obtain a copy of the License at
10-
rem
11-
rem http://www.apache.org/licenses/LICENSE-2.0
12-
rem
13-
rem Unless required by applicable law or agreed to in writing, software
14-
rem distributed under the License is distributed on an "AS IS" BASIS,
15-
rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16-
rem See the License for the specific language governing permissions and
17-
rem limitations under the License.
18-
rem
19-
20-
rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
21-
rem script and the ExecutorRunner in standalone cluster mode.
22-
23-
set SCALA_VERSION=2.10
24-
25-
rem Figure out where the Spark framework is installed
26-
set FWDIR=%~dp0..\
27-
28-
rem Load environment variables from conf\spark-env.cmd, if it exists
29-
if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
30-
31-
rem Build up classpath
32-
set CLASSPATH=%FWDIR%conf
33-
if exist "%FWDIR%RELEASE" (
34-
for %%d in ("%FWDIR%jars\spark-assembly*.jar") do (
35-
set ASSEMBLY_JAR=%%d
36-
)
37-
) else (
38-
for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do (
39-
set ASSEMBLY_JAR=%%d
40-
)
41-
)
42-
set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
43-
44-
if "x%SPARK_TESTING%"=="x1" (
45-
rem Add test clases to path
46-
set CLASSPATH=%CLASSPATH%;%FWDIR%core\target\scala-%SCALA_VERSION%\test-classes
47-
set CLASSPATH=%CLASSPATH%;%FWDIR%repl\target\scala-%SCALA_VERSION%\test-classes
48-
set CLASSPATH=%CLASSPATH%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\test-classes
49-
set CLASSPATH=%CLASSPATH%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\test-classes
50-
set CLASSPATH=%CLASSPATH%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\test-classes
51-
)
52-
53-
rem Add hadoop conf dir - else FileSystem.*, etc fail
54-
rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
55-
rem the configurtion files.
56-
if "x%HADOOP_CONF_DIR%"=="x" goto no_hadoop_conf_dir
57-
set CLASSPATH=%CLASSPATH%;%HADOOP_CONF_DIR%
58-
:no_hadoop_conf_dir
59-
60-
if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
61-
set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
62-
:no_yarn_conf_dir
63-
64-
rem A bit of a hack to allow calling this script within run2.cmd without seeing output
65-
if "%DONT_PRINT_CLASSPATH%"=="1" goto exit
66-
67-
echo %CLASSPATH%
68-
69-
:exit
1+
@echo off
2+
3+
rem
4+
rem Licensed to the Apache Software Foundation (ASF) under one or more
5+
rem contributor license agreements. See the NOTICE file distributed with
6+
rem this work for additional information regarding copyright ownership.
7+
rem The ASF licenses this file to You under the Apache License, Version 2.0
8+
rem (the "License"); you may not use this file except in compliance with
9+
rem the License. You may obtain a copy of the License at
10+
rem
11+
rem http://www.apache.org/licenses/LICENSE-2.0
12+
rem
13+
rem Unless required by applicable law or agreed to in writing, software
14+
rem distributed under the License is distributed on an "AS IS" BASIS,
15+
rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
rem See the License for the specific language governing permissions and
17+
rem limitations under the License.
18+
rem
19+
20+
rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
21+
rem script and the ExecutorRunner in standalone cluster mode.
22+
23+
set SCALA_VERSION=2.10
24+
25+
rem Figure out where the Spark framework is installed
26+
set FWDIR=%~dp0..\
27+
28+
rem Load environment variables from conf\spark-env.cmd, if it exists
29+
if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
30+
31+
rem Build up classpath
32+
set CLASSPATH=%FWDIR%conf
33+
if exist "%FWDIR%RELEASE" (
34+
for %%d in ("%FWDIR%jars\spark-assembly*.jar") do (
35+
set ASSEMBLY_JAR=%%d
36+
)
37+
) else (
38+
for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do (
39+
set ASSEMBLY_JAR=%%d
40+
)
41+
)
42+
43+
set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
44+
45+
set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
46+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
47+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes
48+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\classes
49+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%graphx\target\scala-%SCALA_VERSION%\classes
50+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\classes
51+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%tools\target\scala-%SCALA_VERSION%\classes
52+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\catalyst\target\scala-%SCALA_VERSION%\classes
53+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\core\target\scala-%SCALA_VERSION%\classes
54+
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%sql\hive\target\scala-%SCALA_VERSION%\classes
55+
56+
set SPARK_TEST_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\test-classes
57+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\test-classes
58+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\test-classes
59+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\test-classes
60+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%graphx\target\scala-%SCALA_VERSION%\test-classes
61+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\test-classes
62+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\catalyst\target\scala-%SCALA_VERSION%\test-classes
63+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\core\target\scala-%SCALA_VERSION%\test-classes
64+
set SPARK_TEST_CLASSES=%SPARK_TEST_CLASSES%;%FWDIR%sql\hive\target\scala-%SCALA_VERSION%\test-classes
65+
66+
if "x%SPARK_TESTING%"=="x1" (
67+
rem Add test clases to path - note, add SPARK_CLASSES and SPARK_TEST_CLASSES before CLASSPATH
68+
rem so that local compilation takes precedence over assembled jar
69+
set CLASSPATH=%SPARK_CLASSES%;%SPARK_TEST_CLASSES%;%CLASSPATH%
70+
)
71+
72+
rem Add hadoop conf dir - else FileSystem.*, etc fail
73+
rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
74+
rem the configurtion files.
75+
if "x%HADOOP_CONF_DIR%"=="x" goto no_hadoop_conf_dir
76+
set CLASSPATH=%CLASSPATH%;%HADOOP_CONF_DIR%
77+
:no_hadoop_conf_dir
78+
79+
if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
80+
set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
81+
:no_yarn_conf_dir
82+
83+
rem A bit of a hack to allow calling this script within run2.cmd without seeing output
84+
if "%DONT_PRINT_CLASSPATH%"=="1" goto exit
85+
86+
echo %CLASSPATH%
87+
88+
:exit

bin/spark-shell

Lines changed: 9 additions & 168 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,8 @@
1919

2020
#
2121
# Shell script for starting the Spark Shell REPL
22-
# Note that it will set MASTER to spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}
23-
# if those two env vars are set in spark-env.sh but MASTER is not.
2422

23+
args="$@"
2524
cygwin=false
2625
case "`uname`" in
2726
CYGWIN*) cygwin=true;;
@@ -30,133 +29,16 @@ esac
3029
# Enter posix mode for bash
3130
set -o posix
3231

32+
if [[ "$@" == *--help* ]]; then
33+
echo "Usage: ./bin/spark-shell [options]"
34+
./bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
35+
exit 0
36+
fi
37+
3338
## Global script variables
3439
FWDIR="$(cd `dirname $0`/..; pwd)"
3540

36-
SPARK_REPL_OPTS="${SPARK_REPL_OPTS:-""}"
37-
DEFAULT_MASTER="local[*]"
38-
MASTER=${MASTER:-""}
39-
40-
info_log=0
41-
42-
#CLI Color Templates
43-
txtund=$(tput sgr 0 1) # Underline
44-
txtbld=$(tput bold) # Bold
45-
bldred=${txtbld}$(tput setaf 1) # red
46-
bldyel=${txtbld}$(tput setaf 3) # yellow
47-
bldblu=${txtbld}$(tput setaf 4) # blue
48-
bldwht=${txtbld}$(tput setaf 7) # white
49-
txtrst=$(tput sgr0) # Reset
50-
info=${bldwht}*${txtrst} # Feedback
51-
pass=${bldblu}*${txtrst}
52-
warn=${bldred}*${txtrst}
53-
ques=${bldblu}?${txtrst}
54-
55-
# Helper function to describe the script usage
56-
function usage() {
57-
cat << EOF
58-
${txtbld}Usage${txtrst}: spark-shell [OPTIONS]
59-
60-
${txtbld}OPTIONS${txtrst}:
61-
-h --help : Print this help information.
62-
-c --cores : The maximum number of cores to be used by the Spark Shell.
63-
-em --executor-memory : The memory used by each executor of the Spark Shell, the number
64-
is followed by m for megabytes or g for gigabytes, e.g. "1g".
65-
-dm --driver-memory : The memory used by the Spark Shell, the number is followed
66-
by m for megabytes or g for gigabytes, e.g. "1g".
67-
-m --master : A full string that describes the Spark Master, defaults to "local[*]"
68-
e.g. "spark://localhost:7077".
69-
--log-conf : Enables logging of the supplied SparkConf as INFO at start of the
70-
Spark Context.
71-
72-
e.g.
73-
spark-shell -m spark://localhost:7077 -c 4 -dm 512m -em 2g
74-
75-
EOF
76-
}
77-
78-
function out_error(){
79-
echo -e "${txtund}${bldred}ERROR${txtrst}: $1"
80-
usage
81-
exit 1
82-
}
83-
84-
function log_info(){
85-
[ $info_log -eq 1 ] && echo -e "${bldyel}INFO${txtrst}: $1"
86-
}
87-
88-
function log_warn(){
89-
echo -e "${txtund}${bldyel}WARN${txtrst}: $1"
90-
}
91-
92-
# PATTERNS used to validate more than one optional arg.
93-
ARG_FLAG_PATTERN="^-"
94-
MEM_PATTERN="^[0-9]+[m|g|M|G]$"
95-
NUM_PATTERN="^[0-9]+$"
96-
PORT_PATTERN="^[0-9]+$"
97-
98-
# Setters for optional args.
99-
function set_cores(){
100-
CORE_PATTERN="^[0-9]+$"
101-
if [[ "$1" =~ $CORE_PATTERN ]]; then
102-
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.cores.max=$1"
103-
else
104-
out_error "wrong format for $2"
105-
fi
106-
}
107-
108-
function set_em(){
109-
if [[ $1 =~ $MEM_PATTERN ]]; then
110-
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.executor.memory=$1"
111-
else
112-
out_error "wrong format for $2"
113-
fi
114-
}
115-
116-
function set_dm(){
117-
if [[ $1 =~ $MEM_PATTERN ]]; then
118-
export SPARK_DRIVER_MEMORY=$1
119-
else
120-
out_error "wrong format for $2"
121-
fi
122-
}
123-
124-
function set_spark_log_conf(){
125-
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.logConf=$1"
126-
}
127-
128-
function set_spark_master(){
129-
if ! [[ "$1" =~ $ARG_FLAG_PATTERN ]]; then
130-
export MASTER="$1"
131-
else
132-
out_error "wrong format for $2"
133-
fi
134-
}
135-
136-
function resolve_spark_master(){
137-
# Set MASTER from spark-env if possible
138-
DEFAULT_SPARK_MASTER_PORT=7077
139-
if [ -z "$MASTER" ]; then
140-
. $FWDIR/bin/load-spark-env.sh
141-
if [ -n "$SPARK_MASTER_IP" ]; then
142-
SPARK_MASTER_PORT="${SPARK_MASTER_PORT:-"$DEFAULT_SPARK_MASTER_PORT"}"
143-
export MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
144-
fi
145-
fi
146-
147-
if [ -z "$MASTER" ]; then
148-
export MASTER="$DEFAULT_MASTER"
149-
fi
150-
151-
}
152-
15341
function main(){
154-
log_info "Base Directory set to $FWDIR"
155-
156-
resolve_spark_master
157-
log_info "Spark Master is $MASTER"
158-
159-
log_info "Spark REPL options $SPARK_REPL_OPTS"
16042
if $cygwin; then
16143
# Workaround for issue involving JLine and Cygwin
16244
# (see http://sourceforge.net/p/jline/bugs/40/).
@@ -165,55 +47,14 @@ function main(){
16547
# (see https://github.com/sbt/sbt/issues/562).
16648
stty -icanon min 1 -echo > /dev/null 2>&1
16749
export SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Djline.terminal=unix"
168-
$FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
50+
$FWDIR/bin/spark-submit spark-internal "$args" --class org.apache.spark.repl.Main
16951
stty icanon echo > /dev/null 2>&1
17052
else
17153
export SPARK_REPL_OPTS
172-
$FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
54+
$FWDIR/bin/spark-submit spark-internal "$args" --class org.apache.spark.repl.Main
17355
fi
17456
}
17557

176-
for option in "$@"
177-
do
178-
case $option in
179-
-h | --help )
180-
usage
181-
exit 1
182-
;;
183-
-c | --cores)
184-
shift
185-
_1=$1
186-
shift
187-
set_cores $_1 "-c/--cores"
188-
;;
189-
-em | --executor-memory)
190-
shift
191-
_1=$1
192-
shift
193-
set_em $_1 "-em/--executor-memory"
194-
;;
195-
-dm | --driver-memory)
196-
shift
197-
_1=$1
198-
shift
199-
set_dm $_1 "-dm/--driver-memory"
200-
;;
201-
-m | --master)
202-
shift
203-
_1=$1
204-
shift
205-
set_spark_master $_1 "-m/--master"
206-
;;
207-
--log-conf)
208-
shift
209-
set_spark_log_conf "true"
210-
info_log=1
211-
;;
212-
?)
213-
;;
214-
esac
215-
done
216-
21758
# Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in
21859
# binary distribution of Spark where Scala is not installed
21960
exit_status=127

bin/spark-submit

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,15 @@ export SPARK_HOME="$(cd `dirname $0`/..; pwd)"
2121
ORIG_ARGS=$@
2222

2323
while (($#)); do
24-
if [ $1 = "--deploy-mode" ]; then
24+
if [ "$1" = "--deploy-mode" ]; then
2525
DEPLOY_MODE=$2
26-
elif [ $1 = "--driver-memory" ]; then
26+
elif [ "$1" = "--driver-memory" ]; then
2727
DRIVER_MEMORY=$2
28-
elif [ $1 = "--driver-library-path" ]; then
28+
elif [ "$1" = "--driver-library-path" ]; then
2929
export _SPARK_LIBRARY_PATH=$2
30-
elif [ $1 = "--driver-class-path" ]; then
30+
elif [ "$1" = "--driver-class-path" ]; then
3131
export SPARK_CLASSPATH="$SPARK_CLASSPATH:$2"
32-
elif [ $1 = "--driver-java-options" ]; then
32+
elif [ "$1" = "--driver-java-options" ]; then
3333
export SPARK_JAVA_OPTS="$SPARK_JAVA_OPTS $2"
3434
fi
3535
shift

0 commit comments

Comments
 (0)