Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
250cb95
Do not ignore spark.driver.extra* for client mode
andrewor14 Aug 4, 2014
a2ab1b0
Parse spark.driver.extra* in bash
andrewor14 Aug 6, 2014
0025474
Revert SparkSubmit handling of --driver-* options for only cluster mode
andrewor14 Aug 6, 2014
63ed2e9
Merge branch 'master' of github.com:apache/spark into submit-driver-e…
andrewor14 Aug 6, 2014
75ee6b4
Remove accidentally added file
andrewor14 Aug 6, 2014
8843562
Fix compilation issues...
andrewor14 Aug 6, 2014
98dd8e3
Add warning if properties file does not exist
andrewor14 Aug 6, 2014
130f295
Handle spark.driver.memory too
andrewor14 Aug 6, 2014
4edcaa8
Redirect stdout to stderr for python
andrewor14 Aug 6, 2014
e5cfb46
Collapse duplicate code + fix potential whitespace issues
andrewor14 Aug 6, 2014
4ec22a1
Merge branch 'master' of github.com:apache/spark into submit-driver-e…
andrewor14 Aug 6, 2014
ef12f74
Minor formatting
andrewor14 Aug 6, 2014
fa2136e
Escape Java options + parse java properties files properly
andrewor14 Aug 7, 2014
dec2343
Only export variables if they exist
andrewor14 Aug 7, 2014
a4df3c4
Move parsing and escaping logic to utils.sh
andrewor14 Aug 7, 2014
de765c9
Print spark-class command properly
andrewor14 Aug 7, 2014
8e552b7
Include an example of spark.*.extraJavaOptions
andrewor14 Aug 7, 2014
c13a2cb
Merge branch 'master' of github.com:apache/spark into submit-driver-e…
andrewor14 Aug 7, 2014
c854859
Add small comment
andrewor14 Aug 7, 2014
1cdc6b1
Fix bug: escape escaped double quotes properly
andrewor14 Aug 7, 2014
45a1eb9
Fix bug: escape escaped backslashes and quotes properly...
andrewor14 Aug 7, 2014
aabfc7e
escape -> split (minor)
andrewor14 Aug 7, 2014
a992ae2
Escape spark.*.extraJavaOptions correctly
andrewor14 Aug 7, 2014
c7b9926
Minor changes to spark-defaults.conf.template
andrewor14 Aug 7, 2014
5d8f8c4
Merge branch 'master' of github.com:apache/spark into submit-driver-e…
andrewor14 Aug 7, 2014
e793e5f
Handle multi-line arguments
andrewor14 Aug 8, 2014
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions bin/spark-class
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
# Export this as SPARK_HOME
export SPARK_HOME="$FWDIR"

# Load utility functions
. "$SPARK_HOME/bin/utils.sh"

. $FWDIR/bin/load-spark-env.sh

if [ -z "$1" ]; then
Expand Down Expand Up @@ -75,8 +78,10 @@ case "$1" in

# Spark submit uses SPARK_SUBMIT_OPTS and SPARK_JAVA_OPTS
'org.apache.spark.deploy.SparkSubmit')
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS \
-Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH"
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS"
if [ -n "$SPARK_SUBMIT_LIBRARY_PATH" ]; then
OUR_JAVA_OPTS="$OUR_JAVA_OPTS -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH"
fi
OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
;;

Expand All @@ -101,11 +106,16 @@ fi
# Set JAVA_OPTS to be able to load native libraries and to set heap size
JAVA_OPTS="-XX:MaxPermSize=128m $OUR_JAVA_OPTS"
JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"

# Load extra JAVA_OPTS from conf/java-opts, if it exists
if [ -e "$FWDIR/conf/java-opts" ] ; then
JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
fi
export JAVA_OPTS

# Split JAVA_OPTS properly to handle whitespace, double quotes and backslashes
# This exports the split java options into SPLIT_JAVA_OPTS
split_java_options "$JAVA_OPTS"

# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!

TOOLS_DIR="$FWDIR"/tools
Expand Down Expand Up @@ -147,9 +157,12 @@ fi
export CLASSPATH

if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
# Put quotes around system properties in case they contain spaces
# This exports the resulting list of java opts into QUOTED_JAVA_OPTS
quote_java_property "${SPLIT_JAVA_OPTS[@]}"
echo -n "Spark Command: " 1>&2
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2
echo "$RUNNER" -cp "$CLASSPATH" "${QUOTED_JAVA_OPTS[@]}" "$@" 1>&2
echo -e "========================================\n" 1>&2
fi

exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
exec "$RUNNER" -cp "$CLASSPATH" "${SPLIT_JAVA_OPTS[@]}" "$@"
66 changes: 64 additions & 2 deletions bin/spark-submit
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,16 @@
export SPARK_HOME="$(cd `dirname $0`/..; pwd)"
ORIG_ARGS=("$@")

# Load utility functions
. "$SPARK_HOME/bin/utils.sh"

while (($#)); do
if [ "$1" = "--deploy-mode" ]; then
DEPLOY_MODE=$2
elif [ "$1" = "--driver-memory" ]; then
DRIVER_MEMORY=$2
elif [ "$1" = "--properties-file" ]; then
PROPERTIES_FILE=$2
elif [ "$1" = "--driver-library-path" ]; then
export SPARK_SUBMIT_LIBRARY_PATH=$2
elif [ "$1" = "--driver-class-path" ]; then
Expand All @@ -36,9 +41,66 @@ while (($#)); do
done

DEPLOY_MODE=${DEPLOY_MODE:-"client"}
DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf"
PROPERTIES_FILE=${PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"}

unset DRIVER_EXTRA_JAVA_OPTIONS
unset EXECUTOR_EXTRA_JAVA_OPTIONS

# A few Spark configs must be parsed early on before launching the JVM:
#
# [spark.driver.extra*]
# These configs encode java options, class paths, and library paths
# needed to launch the JVM if we are running Spark in client mode
#
# [spark.*.extraJavaOptions]
# The escaped characters in these configs must be preserved for
# splitting the arguments in Java later. For these configs, we
# export the raw values as environment variables.
#
if [[ -f "$PROPERTIES_FILE" ]]; then
echo "Using properties file $PROPERTIES_FILE." 1>&2
# This exports the value of the given key into JAVA_PROPERTY_VALUE
parse_java_property "spark.driver.memory"
DRIVER_MEMORY_CONF="$JAVA_PROPERTY_VALUE"
parse_java_property "spark.driver.extraLibraryPath"
DRIVER_EXTRA_LIBRARY_PATH="$JAVA_PROPERTY_VALUE"
parse_java_property "spark.driver.extraClassPath"
DRIVER_EXTRA_CLASSPATH="$JAVA_PROPERTY_VALUE"
parse_java_property "spark.driver.extraJavaOptions"
DRIVER_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE"
parse_java_property "spark.executor.extraJavaOptions"
EXECUTOR_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE"
if [[ -n "DRIVER_EXTRA_JAVA_OPTS" ]]; then
export DRIVER_EXTRA_JAVA_OPTS
fi
if [[ -n "EXECUTOR_EXTRA_JAVA_OPTS" ]]; then
export EXECUTOR_EXTRA_JAVA_OPTS
fi
elif [[ "$PROPERTIES_FILE" != "$DEFAULT_PROPERTIES_FILE" ]]; then
echo "Warning: properties file $PROPERTIES_FILE does not exist." 1>&2
fi

if [ -n "$DRIVER_MEMORY" ] && [ $DEPLOY_MODE == "client" ]; then
export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY
# For client mode, the driver will be launched in the JVM that launches
# SparkSubmit, so we need to handle the class paths, java options, and
# memory pre-emptively in bash. Otherwise, it will be too late by the
# time the JVM has started.

if [[ $DEPLOY_MODE == "client" ]]; then
if [[ -n "$DRIVER_EXTRA_JAVA_OPTS" ]]; then
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS"
fi
if [[ -n "$DRIVER_EXTRA_CLASSPATH" ]]; then
export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH"
fi
if [[ -n "$DRIVER_EXTRA_LIBRARY_PATH" ]]; then
export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH"
fi
# Favor command line memory over config memory
DRIVER_MEMORY=${DRIVER_MEMORY:-"$DRIVER_MEMORY_CONF"}
if [[ -n "$DRIVER_MEMORY" ]]; then
export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY
fi
fi

exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"
Expand Down
108 changes: 108 additions & 0 deletions bin/utils.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# * ---------------------------------------------------- *
# | Utility functions for launching Spark applications |
# * ---------------------------------------------------- *

# Parse the value of a config from a java properties file according to the specifications in
# http://docs.oracle.com/javase/7/docs/api/java/util/Properties.html#load(java.io.Reader).
# This accepts the name of the config as an argument, and expects the path of the property
# file to be found in PROPERTIES_FILE. The value is returned through JAVA_PROPERTY_VALUE.
parse_java_property() {
JAVA_PROPERTY_VALUE="" # return value
config_buffer="" # buffer for collecting parts of a config value
multi_line=0 # whether this config is spanning multiple lines
while read -r line; do
# Strip leading and trailing whitespace
line=$(echo "$line" | sed "s/^[[:space:]]\(.*\)[[:space:]]*$/\1/")
contains_config=$(echo "$line" | grep -e "^$1")
if [[ -n "$contains_config" || "$multi_line" == 1 ]]; then
has_more_lines=$(echo "$line" | grep -e "\\\\$")
if [[ -n "$has_more_lines" ]]; then
# Strip trailing backslash
line=$(echo "$line" | sed "s/\\\\$//")
config_buffer="$config_buffer $line"
multi_line=1
else
JAVA_PROPERTY_VALUE="$config_buffer $line"
break
fi
fi
done < "$PROPERTIES_FILE"

# Actually extract the value of the config
JAVA_PROPERTY_VALUE=$( \
echo "$JAVA_PROPERTY_VALUE" | \
sed "s/$1//" | \
sed "s/^[[:space:]]*[:=]\{0,1\}//" | \
sed "s/^[[:space:]]*\(.*\)[[:space:]]*$/\1/g" \
)
export JAVA_PROPERTY_VALUE
}

# Properly split java options, dealing with whitespace, double quotes and backslashes.
# This accepts a string and returns the resulting list through SPLIT_JAVA_OPTS.
split_java_options() {
SPLIT_JAVA_OPTS=() # return value
option_buffer="" # buffer for collecting parts of an option
opened_quotes=0 # whether we are expecting a closing double quotes
for word in $1; do
contains_quote=$(echo "$word" | sed "s/\\\\\"//g" | grep "\"")
if [[ -n "$contains_quote" ]]; then
# Flip the bit
opened_quotes=$(((opened_quotes + 1) % 2))
fi
if [[ $opened_quotes == 0 ]]; then
# Remove all non-escaped quotes around the value
SPLIT_JAVA_OPTS+=("$(
echo "$option_buffer $word" | \
sed "s/^[[:space:]]*//" | \
sed "s/\([^\\]\)\"/\1/g" | \
sed "s/\\\\\([\\\"]\)/\1/g"
)")
option_buffer=""
else
# We are expecting a closing double quote, so keep buffering
option_buffer="$option_buffer $word"
fi
done
# Something is wrong if we ended with open double quotes
if [[ $opened_quotes == 1 ]]; then
echo -e "Java options parse error! Expecting closing double quotes:" 1>&2
echo -e " ${SPLIT_JAVA_OPTS[@]}" 1>&2
exit 1
fi
export SPLIT_JAVA_OPTS
}

# Put double quotes around each of the given java options that is a system property.
# This accepts a list and returns the quoted list through QUOTED_JAVA_OPTS
quote_java_property() {
QUOTED_JAVA_OPTS=()
for opt in "$@"; do
is_system_property=$(echo "$opt" | grep -e "^-D")
if [[ -n "$is_system_property" ]]; then
QUOTED_JAVA_OPTS+=(\"$opt\")
else
QUOTED_JAVA_OPTS+=("$opt")
fi
done
export QUOTED_JAVA_OPTS
}

9 changes: 5 additions & 4 deletions conf/spark-defaults.conf.template
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
# This is useful for setting default environmental settings.

# Example:
# spark.master spark://master:7077
# spark.eventLog.enabled true
# spark.eventLog.dir hdfs://namenode:8021/directory
# spark.serializer org.apache.spark.serializer.KryoSerializer
# spark.master spark://master:7077
# spark.eventLog.enabled true
# spark.eventLog.dir hdfs://namenode:8021/directory
# spark.serializer org.apache.spark.serializer.KryoSerializer
# spark.executor.extraJavaOptions -XX:+PrintGCDetail -Dnumbers="one \"two\" three"
17 changes: 10 additions & 7 deletions core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -195,18 +195,21 @@ object SparkSubmit {
OptionAssigner(args.jars, YARN, CLUSTER, clOption = "--addJars"),

// Other options
OptionAssigner(args.driverExtraClassPath, STANDALONE | YARN, CLUSTER,
sysProp = "spark.driver.extraClassPath"),
OptionAssigner(args.driverExtraJavaOptions, STANDALONE | YARN, CLUSTER,
sysProp = "spark.driver.extraJavaOptions"),
OptionAssigner(args.driverExtraLibraryPath, STANDALONE | YARN, CLUSTER,
sysProp = "spark.driver.extraLibraryPath"),
OptionAssigner(args.executorMemory, STANDALONE | MESOS | YARN, ALL_DEPLOY_MODES,
sysProp = "spark.executor.memory"),
OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS, ALL_DEPLOY_MODES,
sysProp = "spark.cores.max"),
OptionAssigner(args.files, LOCAL | STANDALONE | MESOS, ALL_DEPLOY_MODES,
sysProp = "spark.files")
sysProp = "spark.files"),

// Only process driver specific options for cluster mode here,
// because they have already been processed in bash for client mode
OptionAssigner(args.driverExtraClassPath, STANDALONE | YARN, CLUSTER,
sysProp = "spark.driver.extraClassPath"),
OptionAssigner(args.driverExtraJavaOptions, STANDALONE | YARN, CLUSTER,
sysProp = "spark.driver.extraJavaOptions"),
OptionAssigner(args.driverExtraLibraryPath, STANDALONE | YARN, CLUSTER,
sysProp = "spark.driver.extraLibraryPath")
)

// In client mode, launch the application main class directly
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,15 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
}
}
}
// For spark.*.extraJavaOptions, we cannot rely on the Java properties loader because it
// un-escapes certain characters (" and \) needed to split the string into java options.
// For these configs, use the equivalent environment variables instead.
sys.env.get("DRIVER_EXTRA_JAVA_OPTS").foreach { opts =>
defaultProperties("spark.driver.extraJavaOptions") = opts
}
sys.env.get("EXECUTOR_EXTRA_JAVA_OPTS").foreach { opts =>
defaultProperties("spark.executor.extraJavaOptions") = opts
}
defaultProperties
}

Expand Down