diff --git a/bin/spark-class b/bin/spark-class index 3f6beca5becf0..ce5bebe5929ea 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -30,6 +30,9 @@ FWDIR="$(cd `dirname $0`/..; pwd)" # Export this as SPARK_HOME export SPARK_HOME="$FWDIR" +# Load utility functions +. "$SPARK_HOME/bin/utils.sh" + . $FWDIR/bin/load-spark-env.sh if [ -z "$1" ]; then @@ -75,8 +78,10 @@ case "$1" in # Spark submit uses SPARK_SUBMIT_OPTS and SPARK_JAVA_OPTS 'org.apache.spark.deploy.SparkSubmit') - OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS \ - -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH" + OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS" + if [ -n "$SPARK_SUBMIT_LIBRARY_PATH" ]; then + OUR_JAVA_OPTS="$OUR_JAVA_OPTS -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH" + fi OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM} ;; @@ -101,11 +106,16 @@ fi # Set JAVA_OPTS to be able to load native libraries and to set heap size JAVA_OPTS="-XX:MaxPermSize=128m $OUR_JAVA_OPTS" JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM" + # Load extra JAVA_OPTS from conf/java-opts, if it exists if [ -e "$FWDIR/conf/java-opts" ] ; then JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`" fi -export JAVA_OPTS + +# Split JAVA_OPTS properly to handle whitespace, double quotes and backslashes +# This exports the split java options into SPLIT_JAVA_OPTS +split_java_options "$JAVA_OPTS" + # Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala! TOOLS_DIR="$FWDIR"/tools @@ -147,9 +157,12 @@ fi export CLASSPATH if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then + # Put quotes around system properties in case they contain spaces + # This exports the resulting list of java opts into QUOTED_JAVA_OPTS + quote_java_property "${SPLIT_JAVA_OPTS[@]}" echo -n "Spark Command: " 1>&2 - echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2 + echo "$RUNNER" -cp "$CLASSPATH" "${QUOTED_JAVA_OPTS[@]}" "$@" 1>&2 echo -e "========================================\n" 1>&2 fi -exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" +exec "$RUNNER" -cp "$CLASSPATH" "${SPLIT_JAVA_OPTS[@]}" "$@" diff --git a/bin/spark-submit b/bin/spark-submit index 9e7cecedd0325..1a815becae9b0 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -20,11 +20,16 @@ export SPARK_HOME="$(cd `dirname $0`/..; pwd)" ORIG_ARGS=("$@") +# Load utility functions +. "$SPARK_HOME/bin/utils.sh" + while (($#)); do if [ "$1" = "--deploy-mode" ]; then DEPLOY_MODE=$2 elif [ "$1" = "--driver-memory" ]; then DRIVER_MEMORY=$2 + elif [ "$1" = "--properties-file" ]; then + PROPERTIES_FILE=$2 elif [ "$1" = "--driver-library-path" ]; then export SPARK_SUBMIT_LIBRARY_PATH=$2 elif [ "$1" = "--driver-class-path" ]; then @@ -36,9 +41,66 @@ while (($#)); do done DEPLOY_MODE=${DEPLOY_MODE:-"client"} +DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf" +PROPERTIES_FILE=${PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"} + +unset DRIVER_EXTRA_JAVA_OPTIONS +unset EXECUTOR_EXTRA_JAVA_OPTIONS + +# A few Spark configs must be parsed early on before launching the JVM: +# +# [spark.driver.extra*] +# These configs encode java options, class paths, and library paths +# needed to launch the JVM if we are running Spark in client mode +# +# [spark.*.extraJavaOptions] +# The escaped characters in these configs must be preserved for +# splitting the arguments in Java later. For these configs, we +# export the raw values as environment variables. +# +if [[ -f "$PROPERTIES_FILE" ]]; then + echo "Using properties file $PROPERTIES_FILE." 1>&2 + # This exports the value of the given key into JAVA_PROPERTY_VALUE + parse_java_property "spark.driver.memory" + DRIVER_MEMORY_CONF="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.driver.extraLibraryPath" + DRIVER_EXTRA_LIBRARY_PATH="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.driver.extraClassPath" + DRIVER_EXTRA_CLASSPATH="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.driver.extraJavaOptions" + DRIVER_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.executor.extraJavaOptions" + EXECUTOR_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE" + if [[ -n "DRIVER_EXTRA_JAVA_OPTS" ]]; then + export DRIVER_EXTRA_JAVA_OPTS + fi + if [[ -n "EXECUTOR_EXTRA_JAVA_OPTS" ]]; then + export EXECUTOR_EXTRA_JAVA_OPTS + fi +elif [[ "$PROPERTIES_FILE" != "$DEFAULT_PROPERTIES_FILE" ]]; then + echo "Warning: properties file $PROPERTIES_FILE does not exist." 1>&2 +fi -if [ -n "$DRIVER_MEMORY" ] && [ $DEPLOY_MODE == "client" ]; then - export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY +# For client mode, the driver will be launched in the JVM that launches +# SparkSubmit, so we need to handle the class paths, java options, and +# memory pre-emptively in bash. Otherwise, it will be too late by the +# time the JVM has started. + +if [[ $DEPLOY_MODE == "client" ]]; then + if [[ -n "$DRIVER_EXTRA_JAVA_OPTS" ]]; then + export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS" + fi + if [[ -n "$DRIVER_EXTRA_CLASSPATH" ]]; then + export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH" + fi + if [[ -n "$DRIVER_EXTRA_LIBRARY_PATH" ]]; then + export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH" + fi + # Favor command line memory over config memory + DRIVER_MEMORY=${DRIVER_MEMORY:-"$DRIVER_MEMORY_CONF"} + if [[ -n "$DRIVER_MEMORY" ]]; then + export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY + fi fi exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}" diff --git a/bin/utils.sh b/bin/utils.sh new file mode 100755 index 0000000000000..5280b9c40e929 --- /dev/null +++ b/bin/utils.sh @@ -0,0 +1,108 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# * ---------------------------------------------------- * +# | Utility functions for launching Spark applications | +# * ---------------------------------------------------- * + +# Parse the value of a config from a java properties file according to the specifications in +# http://docs.oracle.com/javase/7/docs/api/java/util/Properties.html#load(java.io.Reader). +# This accepts the name of the config as an argument, and expects the path of the property +# file to be found in PROPERTIES_FILE. The value is returned through JAVA_PROPERTY_VALUE. +parse_java_property() { + JAVA_PROPERTY_VALUE="" # return value + config_buffer="" # buffer for collecting parts of a config value + multi_line=0 # whether this config is spanning multiple lines + while read -r line; do + # Strip leading and trailing whitespace + line=$(echo "$line" | sed "s/^[[:space:]]\(.*\)[[:space:]]*$/\1/") + contains_config=$(echo "$line" | grep -e "^$1") + if [[ -n "$contains_config" || "$multi_line" == 1 ]]; then + has_more_lines=$(echo "$line" | grep -e "\\\\$") + if [[ -n "$has_more_lines" ]]; then + # Strip trailing backslash + line=$(echo "$line" | sed "s/\\\\$//") + config_buffer="$config_buffer $line" + multi_line=1 + else + JAVA_PROPERTY_VALUE="$config_buffer $line" + break + fi + fi + done < "$PROPERTIES_FILE" + + # Actually extract the value of the config + JAVA_PROPERTY_VALUE=$( \ + echo "$JAVA_PROPERTY_VALUE" | \ + sed "s/$1//" | \ + sed "s/^[[:space:]]*[:=]\{0,1\}//" | \ + sed "s/^[[:space:]]*\(.*\)[[:space:]]*$/\1/g" \ + ) + export JAVA_PROPERTY_VALUE +} + +# Properly split java options, dealing with whitespace, double quotes and backslashes. +# This accepts a string and returns the resulting list through SPLIT_JAVA_OPTS. +split_java_options() { + SPLIT_JAVA_OPTS=() # return value + option_buffer="" # buffer for collecting parts of an option + opened_quotes=0 # whether we are expecting a closing double quotes + for word in $1; do + contains_quote=$(echo "$word" | sed "s/\\\\\"//g" | grep "\"") + if [[ -n "$contains_quote" ]]; then + # Flip the bit + opened_quotes=$(((opened_quotes + 1) % 2)) + fi + if [[ $opened_quotes == 0 ]]; then + # Remove all non-escaped quotes around the value + SPLIT_JAVA_OPTS+=("$( + echo "$option_buffer $word" | \ + sed "s/^[[:space:]]*//" | \ + sed "s/\([^\\]\)\"/\1/g" | \ + sed "s/\\\\\([\\\"]\)/\1/g" + )") + option_buffer="" + else + # We are expecting a closing double quote, so keep buffering + option_buffer="$option_buffer $word" + fi + done + # Something is wrong if we ended with open double quotes + if [[ $opened_quotes == 1 ]]; then + echo -e "Java options parse error! Expecting closing double quotes:" 1>&2 + echo -e " ${SPLIT_JAVA_OPTS[@]}" 1>&2 + exit 1 + fi + export SPLIT_JAVA_OPTS +} + +# Put double quotes around each of the given java options that is a system property. +# This accepts a list and returns the quoted list through QUOTED_JAVA_OPTS +quote_java_property() { + QUOTED_JAVA_OPTS=() + for opt in "$@"; do + is_system_property=$(echo "$opt" | grep -e "^-D") + if [[ -n "$is_system_property" ]]; then + QUOTED_JAVA_OPTS+=(\"$opt\") + else + QUOTED_JAVA_OPTS+=("$opt") + fi + done + export QUOTED_JAVA_OPTS +} + diff --git a/conf/spark-defaults.conf.template b/conf/spark-defaults.conf.template index 2779342769c14..ad7273d830c16 100644 --- a/conf/spark-defaults.conf.template +++ b/conf/spark-defaults.conf.template @@ -2,7 +2,8 @@ # This is useful for setting default environmental settings. # Example: -# spark.master spark://master:7077 -# spark.eventLog.enabled true -# spark.eventLog.dir hdfs://namenode:8021/directory -# spark.serializer org.apache.spark.serializer.KryoSerializer +# spark.master spark://master:7077 +# spark.eventLog.enabled true +# spark.eventLog.dir hdfs://namenode:8021/directory +# spark.serializer org.apache.spark.serializer.KryoSerializer +# spark.executor.extraJavaOptions -XX:+PrintGCDetail -Dnumbers="one \"two\" three" diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 318509a67a36f..f8cdbc3c392b5 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -195,18 +195,21 @@ object SparkSubmit { OptionAssigner(args.jars, YARN, CLUSTER, clOption = "--addJars"), // Other options - OptionAssigner(args.driverExtraClassPath, STANDALONE | YARN, CLUSTER, - sysProp = "spark.driver.extraClassPath"), - OptionAssigner(args.driverExtraJavaOptions, STANDALONE | YARN, CLUSTER, - sysProp = "spark.driver.extraJavaOptions"), - OptionAssigner(args.driverExtraLibraryPath, STANDALONE | YARN, CLUSTER, - sysProp = "spark.driver.extraLibraryPath"), OptionAssigner(args.executorMemory, STANDALONE | MESOS | YARN, ALL_DEPLOY_MODES, sysProp = "spark.executor.memory"), OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS, ALL_DEPLOY_MODES, sysProp = "spark.cores.max"), OptionAssigner(args.files, LOCAL | STANDALONE | MESOS, ALL_DEPLOY_MODES, - sysProp = "spark.files") + sysProp = "spark.files"), + + // Only process driver specific options for cluster mode here, + // because they have already been processed in bash for client mode + OptionAssigner(args.driverExtraClassPath, STANDALONE | YARN, CLUSTER, + sysProp = "spark.driver.extraClassPath"), + OptionAssigner(args.driverExtraJavaOptions, STANDALONE | YARN, CLUSTER, + sysProp = "spark.driver.extraJavaOptions"), + OptionAssigner(args.driverExtraLibraryPath, STANDALONE | YARN, CLUSTER, + sysProp = "spark.driver.extraLibraryPath") ) // In client mode, launch the application main class directly diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 087dd4d633db0..614089272c1e8 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -76,6 +76,15 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { } } } + // For spark.*.extraJavaOptions, we cannot rely on the Java properties loader because it + // un-escapes certain characters (" and \) needed to split the string into java options. + // For these configs, use the equivalent environment variables instead. + sys.env.get("DRIVER_EXTRA_JAVA_OPTS").foreach { opts => + defaultProperties("spark.driver.extraJavaOptions") = opts + } + sys.env.get("EXECUTOR_EXTRA_JAVA_OPTS").foreach { opts => + defaultProperties("spark.executor.extraJavaOptions") = opts + } defaultProperties }