From 250cb955efe9c9bdf24be6cefcfb1dfa71d39bc4 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Mon, 4 Aug 2014 13:12:15 -0700 Subject: [PATCH 01/22] Do not ignore spark.driver.extra* for client mode --- .../scala/org/apache/spark/deploy/SparkSubmit.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 318509a67a36f..430bf27c1c8de 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -171,6 +171,12 @@ object SparkSubmit { OptionAssigner(args.master, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.master"), OptionAssigner(args.name, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.app.name"), OptionAssigner(args.jars, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.jars"), + OptionAssigner(args.driverExtraClassPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, + sysProp = "spark.driver.extraClassPath"), + OptionAssigner(args.driverExtraJavaOptions, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, + sysProp = "spark.driver.extraJavaOptions"), + OptionAssigner(args.driverExtraLibraryPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, + sysProp = "spark.driver.extraLibraryPath"), // Standalone cluster only OptionAssigner(args.driverMemory, STANDALONE, CLUSTER, clOption = "--memory"), @@ -195,12 +201,6 @@ object SparkSubmit { OptionAssigner(args.jars, YARN, CLUSTER, clOption = "--addJars"), // Other options - OptionAssigner(args.driverExtraClassPath, STANDALONE | YARN, CLUSTER, - sysProp = "spark.driver.extraClassPath"), - OptionAssigner(args.driverExtraJavaOptions, STANDALONE | YARN, CLUSTER, - sysProp = "spark.driver.extraJavaOptions"), - OptionAssigner(args.driverExtraLibraryPath, STANDALONE | YARN, CLUSTER, - sysProp = "spark.driver.extraLibraryPath"), OptionAssigner(args.executorMemory, STANDALONE | MESOS | YARN, ALL_DEPLOY_MODES, sysProp = "spark.executor.memory"), OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS, ALL_DEPLOY_MODES, From a2ab1b0a3a976e361ea86fc20fc7083e7f9885ca Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 5 Aug 2014 21:32:05 -0700 Subject: [PATCH 02/22] Parse spark.driver.extra* in bash --- bin/spark-submit | 32 +++++++++++++++++++-- bin/spark-submit.new | 67 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 2 deletions(-) create mode 100755 bin/spark-submit.new diff --git a/bin/spark-submit b/bin/spark-submit index 9e7cecedd0325..70d4d9c06893a 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -25,6 +25,8 @@ while (($#)); do DEPLOY_MODE=$2 elif [ "$1" = "--driver-memory" ]; then DRIVER_MEMORY=$2 + elif [ "$1" = "--properties-file" ]; then + PROPERTIES_FILE=$2 elif [ "$1" = "--driver-library-path" ]; then export SPARK_SUBMIT_LIBRARY_PATH=$2 elif [ "$1" = "--driver-class-path" ]; then @@ -36,9 +38,35 @@ while (($#)); do done DEPLOY_MODE=${DEPLOY_MODE:-"client"} +PROPERTIES_FILE=${PROPERTIES_FILE:-"$SPARK_HOME/conf/spark-defaults.conf"} -if [ -n "$DRIVER_MEMORY" ] && [ $DEPLOY_MODE == "client" ]; then - export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY +# For client mode, the driver will be launched in the JVM that launches +# SparkSubmit, so we need to handle the class paths, java options, and +# memory pre-emptively in bash. Otherwise, it will be too late by the +# time the JVM has started. + +if [ $DEPLOY_MODE == "client" ]; then + if [ -n "$DRIVER_MEMORY" ]; then + export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY + fi + # We parse the default properties file here, assuming each line is + # a key value pair delimited either by white space or "=" sign. All + # spark.driver.* configs must be processed now before it's too late. + if [ -f "$PROPERTIES_FILE" ]; then + DRIVER_EXTRA_JAVA_OPTS="spark.driver.extraJavaOptions" + DRIVER_EXTRA_CLASSPATH="spark.driver.extraClassPath" + DRIVER_EXTRA_LIBRARY_PATH="spark.driver.extraLibraryPath" + # Remove "=" sign and double quotes around the value, if any + DRIVER_EXTRA_JAVA_OPTS=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_JAVA_OPTS" | \ + sed "s/$DRIVER_EXTRA_JAVA_OPTS//g" | sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g") + DRIVER_EXTRA_CLASSPATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_CLASSPATH" | \ + sed "s/$DRIVER_EXTRA_CLASSPATH//g" | sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g") + DRIVER_EXTRA_LIBRARY_PATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_LIBRARY_PATH" | \ + sed "s/$DRIVER_EXTRA_LIBRARY_PATH//g" | sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g") + export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS" + export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH" + export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH" + fi fi exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}" diff --git a/bin/spark-submit.new b/bin/spark-submit.new new file mode 100755 index 0000000000000..eb68a5c72e952 --- /dev/null +++ b/bin/spark-submit.new @@ -0,0 +1,67 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +SPARK_HOME="$(cd `dirname $0`/..; pwd)" +ORIG_ARGS=("$@") + +while (($#)); do + if [ "$1" = "--deploy-mode" ]; then + DEPLOY_MODE=$2 + elif [ "$1" = "--driver-memory" ]; then + DRIVER_MEMORY=$2 + elif [ "$1" = "--driver-library-path" ]; then + SPARK_SUBMIT_LIBRARY_PATH=$2 + elif [ "$1" = "--driver-class-path" ]; then + SPARK_SUBMIT_CLASSPATH=$2 + elif [ "$1" = "--driver-java-options" ]; then + SPARK_SUBMIT_OPTS=$2 + elif [ "$1" = "--properties-file" ]; then + PROPERTIES_FILE=$2 + fi + shift +done + +DEPLOY_MODE=${DEPLOY_MODE:-"client"} + +if [ -n "$DRIVER_MEMORY" ] && [ $DEPLOY_MODE == "client" ]; then + SPARK_DRIVER_MEMORY=$DRIVER_MEMORY +fi + +PROPERTIES_FILE=${PROPERTIES_FILE:-"$SPARK_HOME/conf/spark-defaults.conf"} +if [ -f $PROPERTIES_FILE ]; then + DRIVER_EXTRA_JAVA_OPTIONS="spark.driver.extraJavaOptions" + DRIVER_EXTRA_CLASSPATH="spark.driver.extraClassPath" + DRIVER_EXTRA_LIBRARY_PATH="spark.driver.extraLibraryPath" + DRIVER_EXTRA_JAVA_OPTIONS=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_JAVA_OPTIONS" | sed "s/$DRIVER_EXTRA_JAVA_OPTIONS//g" | sed "s/^=//g") + DRIVER_EXTRA_CLASSPATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_CLASSPATH" | sed "s/$DRIVER_EXTRA_CLASSPATH//g" | sed "s/^=//g") + DRIVER_EXTRA_LIBRARY_PATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_LIBRARY_PATH" | sed "s/$DRIVER_EXTRA_LIBRARY_PATH//g" | sed "s/^=//g") +fi + +echo "DEPLOY_MODE = $DEPLOY_MODE" +echo "DRIVER_MEMORY = $DRIVER_MEMORY" +echo "SPARK_DRIVER_MEMORY = $SPARK_DRIVER_MEMORY" +echo "SPARK_SUBMIT_LIBRARY_PATH = $SPARK_SUBMIT_LIBRARY_PATH" +echo "SPARK_SUBMIT_CLASSPATH = $SPARK_SUBMIT_CLASSPATH" +echo "SPARK_SUBMIT_OPTS = $SPARK_SUBMIT_OPTS" +echo "DRIVER_EXTRA_JAVA_OPTIONS = $DRIVER_EXTRA_JAVA_OPTIONS" +echo "DRIVER_EXTRA_CLASSPATH = $DRIVER_EXTRA_CLASSPATH" +echo "DRIVER_EXTRA_LIBRARY_PATH = $DRIVER_EXTRA_LIBRARY_PATH" + +echo "exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"" + From 0025474d7412607e1ca620d1942393d78a28b7f8 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 5 Aug 2014 21:35:16 -0700 Subject: [PATCH 03/22] Revert SparkSubmit handling of --driver-* options for only cluster mode --- .../org/apache/spark/deploy/SparkSubmit.scala | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 430bf27c1c8de..0fb94ebe8e45b 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -171,12 +171,6 @@ object SparkSubmit { OptionAssigner(args.master, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.master"), OptionAssigner(args.name, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.app.name"), OptionAssigner(args.jars, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.jars"), - OptionAssigner(args.driverExtraClassPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, - sysProp = "spark.driver.extraClassPath"), - OptionAssigner(args.driverExtraJavaOptions, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, - sysProp = "spark.driver.extraJavaOptions"), - OptionAssigner(args.driverExtraLibraryPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, - sysProp = "spark.driver.extraLibraryPath"), // Standalone cluster only OptionAssigner(args.driverMemory, STANDALONE, CLUSTER, clOption = "--memory"), @@ -207,6 +201,15 @@ object SparkSubmit { sysProp = "spark.cores.max"), OptionAssigner(args.files, LOCAL | STANDALONE | MESOS, ALL_DEPLOY_MODES, sysProp = "spark.files") + + // Only process driver specific options for cluster mode here, + // because they have already been processed in bash for client mode + OptionAssigner(args.driverExtraClassPath, STANDALONE | YARN, CLUSTER, + sysProp = "spark.driver.extraClassPath"), + OptionAssigner(args.driverExtraJavaOptions, STANDALONE | YARN, CLUSTER, + sysProp = "spark.driver.extraJavaOptions"), + OptionAssigner(args.driverExtraLibraryPath, STANDALONE | YARN, CLUSTER, + sysProp = "spark.driver.extraLibraryPath"), ) // In client mode, launch the application main class directly From 75ee6b4a1c6df1a911cf62ded81e4eabb737b345 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 5 Aug 2014 21:36:35 -0700 Subject: [PATCH 04/22] Remove accidentally added file --- bin/spark-submit.new | 67 -------------------------------------------- 1 file changed, 67 deletions(-) delete mode 100755 bin/spark-submit.new diff --git a/bin/spark-submit.new b/bin/spark-submit.new deleted file mode 100755 index eb68a5c72e952..0000000000000 --- a/bin/spark-submit.new +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env bash - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -SPARK_HOME="$(cd `dirname $0`/..; pwd)" -ORIG_ARGS=("$@") - -while (($#)); do - if [ "$1" = "--deploy-mode" ]; then - DEPLOY_MODE=$2 - elif [ "$1" = "--driver-memory" ]; then - DRIVER_MEMORY=$2 - elif [ "$1" = "--driver-library-path" ]; then - SPARK_SUBMIT_LIBRARY_PATH=$2 - elif [ "$1" = "--driver-class-path" ]; then - SPARK_SUBMIT_CLASSPATH=$2 - elif [ "$1" = "--driver-java-options" ]; then - SPARK_SUBMIT_OPTS=$2 - elif [ "$1" = "--properties-file" ]; then - PROPERTIES_FILE=$2 - fi - shift -done - -DEPLOY_MODE=${DEPLOY_MODE:-"client"} - -if [ -n "$DRIVER_MEMORY" ] && [ $DEPLOY_MODE == "client" ]; then - SPARK_DRIVER_MEMORY=$DRIVER_MEMORY -fi - -PROPERTIES_FILE=${PROPERTIES_FILE:-"$SPARK_HOME/conf/spark-defaults.conf"} -if [ -f $PROPERTIES_FILE ]; then - DRIVER_EXTRA_JAVA_OPTIONS="spark.driver.extraJavaOptions" - DRIVER_EXTRA_CLASSPATH="spark.driver.extraClassPath" - DRIVER_EXTRA_LIBRARY_PATH="spark.driver.extraLibraryPath" - DRIVER_EXTRA_JAVA_OPTIONS=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_JAVA_OPTIONS" | sed "s/$DRIVER_EXTRA_JAVA_OPTIONS//g" | sed "s/^=//g") - DRIVER_EXTRA_CLASSPATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_CLASSPATH" | sed "s/$DRIVER_EXTRA_CLASSPATH//g" | sed "s/^=//g") - DRIVER_EXTRA_LIBRARY_PATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_LIBRARY_PATH" | sed "s/$DRIVER_EXTRA_LIBRARY_PATH//g" | sed "s/^=//g") -fi - -echo "DEPLOY_MODE = $DEPLOY_MODE" -echo "DRIVER_MEMORY = $DRIVER_MEMORY" -echo "SPARK_DRIVER_MEMORY = $SPARK_DRIVER_MEMORY" -echo "SPARK_SUBMIT_LIBRARY_PATH = $SPARK_SUBMIT_LIBRARY_PATH" -echo "SPARK_SUBMIT_CLASSPATH = $SPARK_SUBMIT_CLASSPATH" -echo "SPARK_SUBMIT_OPTS = $SPARK_SUBMIT_OPTS" -echo "DRIVER_EXTRA_JAVA_OPTIONS = $DRIVER_EXTRA_JAVA_OPTIONS" -echo "DRIVER_EXTRA_CLASSPATH = $DRIVER_EXTRA_CLASSPATH" -echo "DRIVER_EXTRA_LIBRARY_PATH = $DRIVER_EXTRA_LIBRARY_PATH" - -echo "exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"" - From 8843562bb6883a092e6f4032f05fe01932db086b Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 5 Aug 2014 21:37:11 -0700 Subject: [PATCH 05/22] Fix compilation issues... --- core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 0fb94ebe8e45b..f8cdbc3c392b5 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -200,7 +200,7 @@ object SparkSubmit { OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS, ALL_DEPLOY_MODES, sysProp = "spark.cores.max"), OptionAssigner(args.files, LOCAL | STANDALONE | MESOS, ALL_DEPLOY_MODES, - sysProp = "spark.files") + sysProp = "spark.files"), // Only process driver specific options for cluster mode here, // because they have already been processed in bash for client mode @@ -209,7 +209,7 @@ object SparkSubmit { OptionAssigner(args.driverExtraJavaOptions, STANDALONE | YARN, CLUSTER, sysProp = "spark.driver.extraJavaOptions"), OptionAssigner(args.driverExtraLibraryPath, STANDALONE | YARN, CLUSTER, - sysProp = "spark.driver.extraLibraryPath"), + sysProp = "spark.driver.extraLibraryPath") ) // In client mode, launch the application main class directly From 98dd8e327ac5940d8a4a3820027645ee4b88178e Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 5 Aug 2014 21:39:07 -0700 Subject: [PATCH 06/22] Add warning if properties file does not exist --- bin/spark-submit | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bin/spark-submit b/bin/spark-submit index 70d4d9c06893a..3f4a3840b4d9b 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -53,6 +53,7 @@ if [ $DEPLOY_MODE == "client" ]; then # a key value pair delimited either by white space or "=" sign. All # spark.driver.* configs must be processed now before it's too late. if [ -f "$PROPERTIES_FILE" ]; then + echo "Using properties file $PROPERTIES_FILE" DRIVER_EXTRA_JAVA_OPTS="spark.driver.extraJavaOptions" DRIVER_EXTRA_CLASSPATH="spark.driver.extraClassPath" DRIVER_EXTRA_LIBRARY_PATH="spark.driver.extraLibraryPath" @@ -66,6 +67,8 @@ if [ $DEPLOY_MODE == "client" ]; then export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS" export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH" export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH" + else + echo "Warning: properties file $PROPERTIES_FILE does not exist." fi fi From 130f295e085d95e8205d882174a5667d29b3b1f2 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 5 Aug 2014 22:12:28 -0700 Subject: [PATCH 07/22] Handle spark.driver.memory too --- bin/spark-submit | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/bin/spark-submit b/bin/spark-submit index 3f4a3840b4d9b..e86032e006dd6 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -46,30 +46,36 @@ PROPERTIES_FILE=${PROPERTIES_FILE:-"$SPARK_HOME/conf/spark-defaults.conf"} # time the JVM has started. if [ $DEPLOY_MODE == "client" ]; then - if [ -n "$DRIVER_MEMORY" ]; then - export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY - fi # We parse the default properties file here, assuming each line is # a key value pair delimited either by white space or "=" sign. All # spark.driver.* configs must be processed now before it's too late. if [ -f "$PROPERTIES_FILE" ]; then echo "Using properties file $PROPERTIES_FILE" + DRIVER_MEMORY_CONF="spark.driver.memory" DRIVER_EXTRA_JAVA_OPTS="spark.driver.extraJavaOptions" DRIVER_EXTRA_CLASSPATH="spark.driver.extraClassPath" DRIVER_EXTRA_LIBRARY_PATH="spark.driver.extraLibraryPath" # Remove "=" sign and double quotes around the value, if any - DRIVER_EXTRA_JAVA_OPTS=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_JAVA_OPTS" | \ - sed "s/$DRIVER_EXTRA_JAVA_OPTS//g" | sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g") - DRIVER_EXTRA_CLASSPATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_CLASSPATH" | \ - sed "s/$DRIVER_EXTRA_CLASSPATH//g" | sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g") - DRIVER_EXTRA_LIBRARY_PATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_LIBRARY_PATH" | \ - sed "s/$DRIVER_EXTRA_LIBRARY_PATH//g" | sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g") + DRIVER_MEMORY_CONF=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_MEMORY_CONF" | sed "s/$DRIVER_MEMORY_CONF//g" | \ + sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g" | sed "s/^[[:space:]]*//g" | sed "s/[[:space:]]*$//g") + DRIVER_EXTRA_JAVA_OPTS=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_JAVA_OPTS" | sed "s/$DRIVER_EXTRA_JAVA_OPTS//g" | \ + sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g" | sed "s/^[[:space:]]*//g" | sed "s/[[:space:]]*$//g") + DRIVER_EXTRA_CLASSPATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_CLASSPATH" | sed "s/$DRIVER_EXTRA_CLASSPATH//g" | \ + sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g" | sed "s/^[[:space:]]*//g" | sed "s/[[:space:]]*$//g") + DRIVER_EXTRA_LIBRARY_PATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_LIBRARY_PATH" | sed "s/$DRIVER_EXTRA_LIBRARY_PATH//g" | \ + sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g" | sed "s/^[[:space:]]*//g" | sed "s/[[:space:]]*$//g") export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS" export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH" export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH" else echo "Warning: properties file $PROPERTIES_FILE does not exist." fi + + # Favor command line memory over config memory + DRIVER_MEMORY=${DRIVER_MEMORY:-"$DRIVER_MEMORY_CONF"} + if [ -n "$DRIVER_MEMORY" ]; then + export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY + fi fi exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}" From 4edcaa8027961578246c5cfa8a2d82a92a031265 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 5 Aug 2014 23:17:56 -0700 Subject: [PATCH 08/22] Redirect stdout to stderr for python --- bin/spark-submit | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/spark-submit b/bin/spark-submit index e86032e006dd6..d56696f907831 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -50,7 +50,7 @@ if [ $DEPLOY_MODE == "client" ]; then # a key value pair delimited either by white space or "=" sign. All # spark.driver.* configs must be processed now before it's too late. if [ -f "$PROPERTIES_FILE" ]; then - echo "Using properties file $PROPERTIES_FILE" + echo "Using properties file $PROPERTIES_FILE" 1>&2 DRIVER_MEMORY_CONF="spark.driver.memory" DRIVER_EXTRA_JAVA_OPTS="spark.driver.extraJavaOptions" DRIVER_EXTRA_CLASSPATH="spark.driver.extraClassPath" @@ -68,7 +68,7 @@ if [ $DEPLOY_MODE == "client" ]; then export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH" export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH" else - echo "Warning: properties file $PROPERTIES_FILE does not exist." + echo "Warning: properties file $PROPERTIES_FILE does not exist." 1>&2 fi # Favor command line memory over config memory From e5cfb4627df353125f8f2382bad4bb35aa03c7fb Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 6 Aug 2014 13:26:04 -0700 Subject: [PATCH 09/22] Collapse duplicate code + fix potential whitespace issues --- bin/spark-submit | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/bin/spark-submit b/bin/spark-submit index d56696f907831..46689975d1fe2 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -50,25 +50,28 @@ if [ $DEPLOY_MODE == "client" ]; then # a key value pair delimited either by white space or "=" sign. All # spark.driver.* configs must be processed now before it's too late. if [ -f "$PROPERTIES_FILE" ]; then - echo "Using properties file $PROPERTIES_FILE" 1>&2 - DRIVER_MEMORY_CONF="spark.driver.memory" - DRIVER_EXTRA_JAVA_OPTS="spark.driver.extraJavaOptions" - DRIVER_EXTRA_CLASSPATH="spark.driver.extraClassPath" - DRIVER_EXTRA_LIBRARY_PATH="spark.driver.extraLibraryPath" - # Remove "=" sign and double quotes around the value, if any - DRIVER_MEMORY_CONF=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_MEMORY_CONF" | sed "s/$DRIVER_MEMORY_CONF//g" | \ - sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g" | sed "s/^[[:space:]]*//g" | sed "s/[[:space:]]*$//g") - DRIVER_EXTRA_JAVA_OPTS=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_JAVA_OPTS" | sed "s/$DRIVER_EXTRA_JAVA_OPTS//g" | \ - sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g" | sed "s/^[[:space:]]*//g" | sed "s/[[:space:]]*$//g") - DRIVER_EXTRA_CLASSPATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_CLASSPATH" | sed "s/$DRIVER_EXTRA_CLASSPATH//g" | \ - sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g" | sed "s/^[[:space:]]*//g" | sed "s/[[:space:]]*$//g") - DRIVER_EXTRA_LIBRARY_PATH=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$DRIVER_EXTRA_LIBRARY_PATH" | sed "s/$DRIVER_EXTRA_LIBRARY_PATH//g" | \ - sed "s/^=//g" | sed "s/^\"\(.*\)\"$/\1/g" | sed "s/^[[:space:]]*//g" | sed "s/[[:space:]]*$//g") + echo "Using properties file $PROPERTIES_FILE." 1>&2 + + # Parse the value of the given config + # This removes the "=" sign, whitespace, and double quotes around the value (if any) + parse_config() { + result=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$1" | \ + sed "s/$1//g" | \ + sed "s/^[[:space:]]*=//g" | \ + sed "s/^[[:space:]]*\"\(.*\)\"[[:space:]]*$/\1/g" | \ + sed "s/^[[:space:]]*\(.*\)[[:space:]]*$/\1/g" \ + ) + } + parse_config "spark.driver.memory"; DRIVER_MEMORY_CONF="$result" + parse_config "spark.driver.extraJavaOptions"; DRIVER_EXTRA_JAVA_OPTS="$result" + parse_config "spark.driver.extraClassPath"; DRIVER_EXTRA_CLASSPATH="$result" + parse_config "spark.driver.extraLibraryPath"; DRIVER_EXTRA_LIBRARY_PATH="$result" + export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS" export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH" export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH" else - echo "Warning: properties file $PROPERTIES_FILE does not exist." 1>&2 + echo "Warning: properties file $PROPERTIES_FILE does not exist!" 1>&2 fi # Favor command line memory over config memory From ef12f74b9b7e7edcefb6b82cb53de3eccbf0d9ad Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 6 Aug 2014 13:31:32 -0700 Subject: [PATCH 10/22] Minor formatting --- bin/spark-submit | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/spark-submit b/bin/spark-submit index 46689975d1fe2..faf09f3331064 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -55,7 +55,9 @@ if [ $DEPLOY_MODE == "client" ]; then # Parse the value of the given config # This removes the "=" sign, whitespace, and double quotes around the value (if any) parse_config() { - result=$(sed "/^#/ d" "$PROPERTIES_FILE" | grep "$1" | \ + result=$( \ + sed "/^#/ d" "$PROPERTIES_FILE" | \ + grep "$1" | \ sed "s/$1//g" | \ sed "s/^[[:space:]]*=//g" | \ sed "s/^[[:space:]]*\"\(.*\)\"[[:space:]]*$/\1/g" | \ From fa2136ed14145f8fa18f40e6e1a3a776048c01ab Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 6 Aug 2014 22:23:44 -0700 Subject: [PATCH 11/22] Escape Java options + parse java properties files properly --- bin/spark-class | 35 +++++++++++++++++++++++++++++++++-- bin/spark-submit | 11 +++++------ 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index 3f6beca5becf0..829f4cef2b272 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -146,10 +146,41 @@ if $cygwin; then fi export CLASSPATH +# Properly escape java options, dealing with whitespace, double quotes and backslashes +# This accepts a string, and returns the escaped list through ESCAPED_JAVA_OPTS +escape_java_options() { + ESCAPED_JAVA_OPTS=() # return value + option_buffer="" # buffer for collecting parts of an option + opened_quotes=0 # whether we are expecting a closing double quotes + for word in $1; do + contains_quote=$(echo "$word" | grep \" | grep -v \\\\\") + if [ -n "$contains_quote" ]; then + # Flip the bit + opened_quotes=$(((opened_quotes + 1) % 2)) + fi + if [[ $opened_quotes == 0 ]]; then + ESCAPED_JAVA_OPTS+=("$(echo "$option_buffer $word" | sed "s/^[[:space:]]*//" | sed "s/\([^\\]\)\"/\1/g")") + option_buffer="" + else + option_buffer="$option_buffer $word" + fi + done + # Something is wrong if we ended with open double quotes + if [[ $opened_quotes == 1 ]]; then + echo "Java options parse error! Expecting closing double quotes." 1>&2 + exit 1 + fi +} + +escape_java_options "$JAVA_OPTS" +for option in "${ESCAPED_JAVA_OPTS[@]}"; do + echo "$option" +done + if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then echo -n "Spark Command: " 1>&2 - echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2 + echo "$RUNNER" -cp "$CLASSPATH" "${ESCAPED_JAVA_OPTS[@]}" "$@" 1>&2 echo -e "========================================\n" 1>&2 fi -exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" +exec "$RUNNER" -cp "$CLASSPATH" "${ESCAPED_JAVA_OPTS[@]}" "$@" diff --git a/bin/spark-submit b/bin/spark-submit index faf09f3331064..d48c48a5244a8 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -52,15 +52,14 @@ if [ $DEPLOY_MODE == "client" ]; then if [ -f "$PROPERTIES_FILE" ]; then echo "Using properties file $PROPERTIES_FILE." 1>&2 - # Parse the value of the given config - # This removes the "=" sign, whitespace, and double quotes around the value (if any) + # Parse the value of the given config according to the specifications outlined in + # http://docs.oracle.com/javase/7/docs/api/java/util/Properties.html#load(java.io.Reader) parse_config() { result=$( \ - sed "/^#/ d" "$PROPERTIES_FILE" | \ + sed "/^[#!]/ d" "conf/spark-defaults.conf" | \ grep "$1" | \ - sed "s/$1//g" | \ - sed "s/^[[:space:]]*=//g" | \ - sed "s/^[[:space:]]*\"\(.*\)\"[[:space:]]*$/\1/g" | \ + sed "s/$1//" | \ + sed "s/^[[:space:]]*[:=]\{0,1\}//" | \ sed "s/^[[:space:]]*\(.*\)[[:space:]]*$/\1/g" \ ) } From dec23439ad82718f786ea022b1f118f202687cc1 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 6 Aug 2014 22:28:38 -0700 Subject: [PATCH 12/22] Only export variables if they exist --- bin/spark-class | 6 ++++-- bin/spark-submit | 12 +++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index 829f4cef2b272..700615d051204 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -75,8 +75,10 @@ case "$1" in # Spark submit uses SPARK_SUBMIT_OPTS and SPARK_JAVA_OPTS 'org.apache.spark.deploy.SparkSubmit') - OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS \ - -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH" + OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS" + if [ -n "$SPARK_SUBMIT_LIBRARY_PATH" ]; then + OUR_JAVA_OPTS="$OUT_JAVA_OPTS -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH" + fi OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM} ;; diff --git a/bin/spark-submit b/bin/spark-submit index d48c48a5244a8..3889ae79f6dc0 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -68,9 +68,15 @@ if [ $DEPLOY_MODE == "client" ]; then parse_config "spark.driver.extraClassPath"; DRIVER_EXTRA_CLASSPATH="$result" parse_config "spark.driver.extraLibraryPath"; DRIVER_EXTRA_LIBRARY_PATH="$result" - export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS" - export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH" - export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH" + if [ -n "$DRIVER_EXTRA_JAVA_OPTS" ]; then + export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS" + fi + if [ -n "$DRIVER_EXTRA_CLASSPATH" ]; then + export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH" + fi + if [ -n "$DRIVER_EXTRA_LIBRARY_PATH" ]; then + export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH" + fi else echo "Warning: properties file $PROPERTIES_FILE does not exist!" 1>&2 fi From a4df3c4165ce4546742fbd0b9d92ea612973bb2e Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 6 Aug 2014 22:47:57 -0700 Subject: [PATCH 13/22] Move parsing and escaping logic to utils.sh This commit also fixes a deadly typo. --- bin/spark-class | 43 ++++++++--------------------------- bin/spark-submit | 39 +++++++++++--------------------- bin/utils.sh | 59 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 59 deletions(-) create mode 100755 bin/utils.sh diff --git a/bin/spark-class b/bin/spark-class index 700615d051204..a61d6a369cf80 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -30,6 +30,9 @@ FWDIR="$(cd `dirname $0`/..; pwd)" # Export this as SPARK_HOME export SPARK_HOME="$FWDIR" +# Load utility functions +. "$SPARK_HOME/bin/utils.sh" + . $FWDIR/bin/load-spark-env.sh if [ -z "$1" ]; then @@ -77,7 +80,7 @@ case "$1" in 'org.apache.spark.deploy.SparkSubmit') OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS" if [ -n "$SPARK_SUBMIT_LIBRARY_PATH" ]; then - OUR_JAVA_OPTS="$OUT_JAVA_OPTS -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH" + OUR_JAVA_OPTS="$OUR_JAVA_OPTS -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH" fi OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM} ;; @@ -103,11 +106,16 @@ fi # Set JAVA_OPTS to be able to load native libraries and to set heap size JAVA_OPTS="-XX:MaxPermSize=128m $OUR_JAVA_OPTS" JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM" + # Load extra JAVA_OPTS from conf/java-opts, if it exists if [ -e "$FWDIR/conf/java-opts" ] ; then JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`" fi -export JAVA_OPTS + +# Escape JAVA_OPTS properly to handle whitespace, double quotes and backslashes +# This exports the escaped java options into ESCAPED_JAVA_OPTS +escape_java_options "$JAVA_OPTS" + # Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala! TOOLS_DIR="$FWDIR"/tools @@ -148,37 +156,6 @@ if $cygwin; then fi export CLASSPATH -# Properly escape java options, dealing with whitespace, double quotes and backslashes -# This accepts a string, and returns the escaped list through ESCAPED_JAVA_OPTS -escape_java_options() { - ESCAPED_JAVA_OPTS=() # return value - option_buffer="" # buffer for collecting parts of an option - opened_quotes=0 # whether we are expecting a closing double quotes - for word in $1; do - contains_quote=$(echo "$word" | grep \" | grep -v \\\\\") - if [ -n "$contains_quote" ]; then - # Flip the bit - opened_quotes=$(((opened_quotes + 1) % 2)) - fi - if [[ $opened_quotes == 0 ]]; then - ESCAPED_JAVA_OPTS+=("$(echo "$option_buffer $word" | sed "s/^[[:space:]]*//" | sed "s/\([^\\]\)\"/\1/g")") - option_buffer="" - else - option_buffer="$option_buffer $word" - fi - done - # Something is wrong if we ended with open double quotes - if [[ $opened_quotes == 1 ]]; then - echo "Java options parse error! Expecting closing double quotes." 1>&2 - exit 1 - fi -} - -escape_java_options "$JAVA_OPTS" -for option in "${ESCAPED_JAVA_OPTS[@]}"; do - echo "$option" -done - if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then echo -n "Spark Command: " 1>&2 echo "$RUNNER" -cp "$CLASSPATH" "${ESCAPED_JAVA_OPTS[@]}" "$@" 1>&2 diff --git a/bin/spark-submit b/bin/spark-submit index 3889ae79f6dc0..6d7f139fdc3cc 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -20,6 +20,14 @@ export SPARK_HOME="$(cd `dirname $0`/..; pwd)" ORIG_ARGS=("$@") +# Load utility functions +. "$SPARK_HOME/bin/utils.sh" + +# For client mode, the driver will be launched in the JVM that launches +# SparkSubmit, so we need to handle the class paths, java options, and +# memory pre-emptively in bash. Otherwise, it will be too late by the +# time the JVM has started. + while (($#)); do if [ "$1" = "--deploy-mode" ]; then DEPLOY_MODE=$2 @@ -40,34 +48,14 @@ done DEPLOY_MODE=${DEPLOY_MODE:-"client"} PROPERTIES_FILE=${PROPERTIES_FILE:-"$SPARK_HOME/conf/spark-defaults.conf"} -# For client mode, the driver will be launched in the JVM that launches -# SparkSubmit, so we need to handle the class paths, java options, and -# memory pre-emptively in bash. Otherwise, it will be too late by the -# time the JVM has started. - if [ $DEPLOY_MODE == "client" ]; then - # We parse the default properties file here, assuming each line is - # a key value pair delimited either by white space or "=" sign. All - # spark.driver.* configs must be processed now before it's too late. + # Parse the default properties file here for spark.driver.* configs if [ -f "$PROPERTIES_FILE" ]; then echo "Using properties file $PROPERTIES_FILE." 1>&2 - - # Parse the value of the given config according to the specifications outlined in - # http://docs.oracle.com/javase/7/docs/api/java/util/Properties.html#load(java.io.Reader) - parse_config() { - result=$( \ - sed "/^[#!]/ d" "conf/spark-defaults.conf" | \ - grep "$1" | \ - sed "s/$1//" | \ - sed "s/^[[:space:]]*[:=]\{0,1\}//" | \ - sed "s/^[[:space:]]*\(.*\)[[:space:]]*$/\1/g" \ - ) - } - parse_config "spark.driver.memory"; DRIVER_MEMORY_CONF="$result" - parse_config "spark.driver.extraJavaOptions"; DRIVER_EXTRA_JAVA_OPTS="$result" - parse_config "spark.driver.extraClassPath"; DRIVER_EXTRA_CLASSPATH="$result" - parse_config "spark.driver.extraLibraryPath"; DRIVER_EXTRA_LIBRARY_PATH="$result" - + parse_java_property "spark.driver.memory"; DRIVER_MEMORY_CONF="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.driver.extraJavaOptions"; DRIVER_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.driver.extraClassPath"; DRIVER_EXTRA_CLASSPATH="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.driver.extraLibraryPath"; DRIVER_EXTRA_LIBRARY_PATH="$JAVA_PROPERTY_VALUE" if [ -n "$DRIVER_EXTRA_JAVA_OPTS" ]; then export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS" fi @@ -80,7 +68,6 @@ if [ $DEPLOY_MODE == "client" ]; then else echo "Warning: properties file $PROPERTIES_FILE does not exist!" 1>&2 fi - # Favor command line memory over config memory DRIVER_MEMORY=${DRIVER_MEMORY:-"$DRIVER_MEMORY_CONF"} if [ -n "$DRIVER_MEMORY" ]; then diff --git a/bin/utils.sh b/bin/utils.sh new file mode 100755 index 0000000000000..613b72a49cf4b --- /dev/null +++ b/bin/utils.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Parse the value of a config from a java properties file according to the specifications in +# http://docs.oracle.com/javase/7/docs/api/java/util/Properties.html#load(java.io.Reader). +# This accepts the name of the config and returns the value through JAVA_PROPERTY_VALUE. +parse_java_property() { + JAVA_PROPERTY_VALUE=$( \ + sed "/^[#!]/ d" "conf/spark-defaults.conf" | \ + grep "$1" | \ + sed "s/$1//" | \ + sed "s/^[[:space:]]*[:=]\{0,1\}//" | \ + sed "s/^[[:space:]]*\(.*\)[[:space:]]*$/\1/g" \ + ) + export JAVA_PROPERTY_VALUE +} + +# Properly escape java options, dealing with whitespace, double quotes and backslashes +# This accepts a string, and returns the escaped list through ESCAPED_JAVA_OPTS. +escape_java_options() { + ESCAPED_JAVA_OPTS=() # return value + option_buffer="" # buffer for collecting parts of an option + opened_quotes=0 # whether we are expecting a closing double quotes + for word in $1; do + contains_quote=$(echo "$word" | grep \" | grep -v \\\\\") + if [ -n "$contains_quote" ]; then + # Flip the bit + opened_quotes=$(((opened_quotes + 1) % 2)) + fi + if [[ $opened_quotes == 0 ]]; then + ESCAPED_JAVA_OPTS+=("$(echo "$option_buffer $word" | sed "s/^[[:space:]]*//" | sed "s/\([^\\]\)\"/\1/g")") + option_buffer="" + else + option_buffer="$option_buffer $word" + fi + done + # Something is wrong if we ended with open double quotes + if [[ $opened_quotes == 1 ]]; then + echo "Java options parse error! Expecting closing double quotes." 1>&2 + exit 1 + fi + export ESCAPED_JAVA_OPTS +} + From de765c9813275b939741e1b78567b2443fab5f2d Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 6 Aug 2014 23:22:05 -0700 Subject: [PATCH 14/22] Print spark-class command properly --- bin/spark-class | 5 ++++- bin/utils.sh | 28 +++++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index a61d6a369cf80..5399ea1e23117 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -157,8 +157,11 @@ fi export CLASSPATH if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then + # Put quotes around system properties in case they contain spaces + # This exports the resulting list of java opts into QUOTED_JAVA_OPTS + quote_java_property "${ESCAPED_JAVA_OPTS[@]}" echo -n "Spark Command: " 1>&2 - echo "$RUNNER" -cp "$CLASSPATH" "${ESCAPED_JAVA_OPTS[@]}" "$@" 1>&2 + echo "$RUNNER" -cp "$CLASSPATH" "${QUOTED_JAVA_OPTS[@]}" "$@" 1>&2 echo -e "========================================\n" 1>&2 fi diff --git a/bin/utils.sh b/bin/utils.sh index 613b72a49cf4b..cf15f69308c8d 100755 --- a/bin/utils.sh +++ b/bin/utils.sh @@ -16,9 +16,14 @@ # limitations under the License. # +# * ---------------------------------------------------- * +# | Utility functions for launching Spark applications | +# * ---------------------------------------------------- * + # Parse the value of a config from a java properties file according to the specifications in # http://docs.oracle.com/javase/7/docs/api/java/util/Properties.html#load(java.io.Reader). # This accepts the name of the config and returns the value through JAVA_PROPERTY_VALUE. +# This currently does not support multi-line configs. parse_java_property() { JAVA_PROPERTY_VALUE=$( \ sed "/^[#!]/ d" "conf/spark-defaults.conf" | \ @@ -30,22 +35,24 @@ parse_java_property() { export JAVA_PROPERTY_VALUE } -# Properly escape java options, dealing with whitespace, double quotes and backslashes -# This accepts a string, and returns the escaped list through ESCAPED_JAVA_OPTS. +# Properly escape java options, dealing with whitespace, double quotes and backslashes. +# This accepts a string and returns the escaped list through ESCAPED_JAVA_OPTS. escape_java_options() { ESCAPED_JAVA_OPTS=() # return value option_buffer="" # buffer for collecting parts of an option opened_quotes=0 # whether we are expecting a closing double quotes for word in $1; do contains_quote=$(echo "$word" | grep \" | grep -v \\\\\") - if [ -n "$contains_quote" ]; then + if [[ -n "$contains_quote" ]]; then # Flip the bit opened_quotes=$(((opened_quotes + 1) % 2)) fi if [[ $opened_quotes == 0 ]]; then + # Remove all non-escaped quotes around the value ESCAPED_JAVA_OPTS+=("$(echo "$option_buffer $word" | sed "s/^[[:space:]]*//" | sed "s/\([^\\]\)\"/\1/g")") option_buffer="" else + # We are expecting a closing double quote, so keep buffering option_buffer="$option_buffer $word" fi done @@ -57,3 +64,18 @@ escape_java_options() { export ESCAPED_JAVA_OPTS } +# Put double quotes around each of the given java options that is a system property. +# This accepts a list and returns the quoted list through QUOTED_JAVA_OPTS +quote_java_property() { + QUOTED_JAVA_OPTS=() + for opt in "$@"; do + is_system_property=$(echo "$opt" | grep -e "^-D") + if [[ -n "$is_system_property" ]]; then + QUOTED_JAVA_OPTS+=(\"$opt\") + else + QUOTED_JAVA_OPTS+=("$opt") + fi + done + export QUOTED_JAVA_OPTS +} + From 8e552b733d52ada89dd7c0e8692fcca87fc00d26 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 6 Aug 2014 23:25:36 -0700 Subject: [PATCH 15/22] Include an example of spark.*.extraJavaOptions Right now it's not super obvious how to specify multiple java options, especially ones with white spaces. --- conf/spark-defaults.conf.template | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/conf/spark-defaults.conf.template b/conf/spark-defaults.conf.template index 2779342769c14..974ece201495c 100644 --- a/conf/spark-defaults.conf.template +++ b/conf/spark-defaults.conf.template @@ -2,7 +2,8 @@ # This is useful for setting default environmental settings. # Example: -# spark.master spark://master:7077 -# spark.eventLog.enabled true -# spark.eventLog.dir hdfs://namenode:8021/directory -# spark.serializer org.apache.spark.serializer.KryoSerializer +# spark.master spark://master:7077 +# spark.eventLog.enabled true +# spark.eventLog.dir hdfs://namenode:8021/directory +# spark.serializer org.apache.spark.serializer.KryoSerializer +# spark.executor.extraJavaOptions -XX:+PrintGCDetail -Dmy.property="one two three" From c854859be8a604ac04c74488e7729423c47acd37 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Wed, 6 Aug 2014 23:38:39 -0700 Subject: [PATCH 16/22] Add small comment --- bin/spark-submit | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/spark-submit b/bin/spark-submit index 6d7f139fdc3cc..be8c464599e00 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -52,6 +52,7 @@ if [ $DEPLOY_MODE == "client" ]; then # Parse the default properties file here for spark.driver.* configs if [ -f "$PROPERTIES_FILE" ]; then echo "Using properties file $PROPERTIES_FILE." 1>&2 + # This exports the value of the given key into JAVA_PROPERTY_VALUE parse_java_property "spark.driver.memory"; DRIVER_MEMORY_CONF="$JAVA_PROPERTY_VALUE" parse_java_property "spark.driver.extraJavaOptions"; DRIVER_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE" parse_java_property "spark.driver.extraClassPath"; DRIVER_EXTRA_CLASSPATH="$JAVA_PROPERTY_VALUE" From 1cdc6b15ff375bfb0ce3fe3f6b6c434dc4e30947 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Thu, 7 Aug 2014 12:33:55 -0700 Subject: [PATCH 17/22] Fix bug: escape escaped double quotes properly The previous code used to ignore all closing quotes if the same token also has an escaped double quote. For example, in -Dkey="I am the \"man\"" the last token contains both escaped quotes and valid quotes. This used to be interpreted as a token that doesn't have a closing quote when it actually does. This is fixed in this commit. --- bin/utils.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/utils.sh b/bin/utils.sh index cf15f69308c8d..6aa730760c759 100755 --- a/bin/utils.sh +++ b/bin/utils.sh @@ -42,7 +42,7 @@ escape_java_options() { option_buffer="" # buffer for collecting parts of an option opened_quotes=0 # whether we are expecting a closing double quotes for word in $1; do - contains_quote=$(echo "$word" | grep \" | grep -v \\\\\") + contains_quote=$(echo "$word" | sed "s/\\\\\"//g" | grep "\"") if [[ -n "$contains_quote" ]]; then # Flip the bit opened_quotes=$(((opened_quotes + 1) % 2)) From 45a1eb996773fa1828d1d489cbc451f2033845e0 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Thu, 7 Aug 2014 13:28:56 -0700 Subject: [PATCH 18/22] Fix bug: escape escaped backslashes and quotes properly... This is so that the way this is parsed and the way Java parses its java opts is consistent. --- bin/utils.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bin/utils.sh b/bin/utils.sh index 6aa730760c759..e1624fe07752f 100755 --- a/bin/utils.sh +++ b/bin/utils.sh @@ -49,7 +49,12 @@ escape_java_options() { fi if [[ $opened_quotes == 0 ]]; then # Remove all non-escaped quotes around the value - ESCAPED_JAVA_OPTS+=("$(echo "$option_buffer $word" | sed "s/^[[:space:]]*//" | sed "s/\([^\\]\)\"/\1/g")") + ESCAPED_JAVA_OPTS+=("$( + echo "$option_buffer $word" | \ + sed "s/^[[:space:]]*//" | \ + sed "s/\([^\\]\)\"/\1/g" | \ + sed "s/\\\\\([\\\"]\)/\1/g" + )") option_buffer="" else # We are expecting a closing double quote, so keep buffering From aabfc7e1da8897b266020da6c480cbe7d774bc99 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Thu, 7 Aug 2014 14:24:57 -0700 Subject: [PATCH 19/22] escape -> split (minor) --- bin/spark-class | 10 +++++----- bin/utils.sh | 16 ++++++++-------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index 5399ea1e23117..ce5bebe5929ea 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -112,9 +112,9 @@ if [ -e "$FWDIR/conf/java-opts" ] ; then JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`" fi -# Escape JAVA_OPTS properly to handle whitespace, double quotes and backslashes -# This exports the escaped java options into ESCAPED_JAVA_OPTS -escape_java_options "$JAVA_OPTS" +# Split JAVA_OPTS properly to handle whitespace, double quotes and backslashes +# This exports the split java options into SPLIT_JAVA_OPTS +split_java_options "$JAVA_OPTS" # Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala! @@ -159,10 +159,10 @@ export CLASSPATH if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then # Put quotes around system properties in case they contain spaces # This exports the resulting list of java opts into QUOTED_JAVA_OPTS - quote_java_property "${ESCAPED_JAVA_OPTS[@]}" + quote_java_property "${SPLIT_JAVA_OPTS[@]}" echo -n "Spark Command: " 1>&2 echo "$RUNNER" -cp "$CLASSPATH" "${QUOTED_JAVA_OPTS[@]}" "$@" 1>&2 echo -e "========================================\n" 1>&2 fi -exec "$RUNNER" -cp "$CLASSPATH" "${ESCAPED_JAVA_OPTS[@]}" "$@" +exec "$RUNNER" -cp "$CLASSPATH" "${SPLIT_JAVA_OPTS[@]}" "$@" diff --git a/bin/utils.sh b/bin/utils.sh index e1624fe07752f..431813072754f 100755 --- a/bin/utils.sh +++ b/bin/utils.sh @@ -35,12 +35,12 @@ parse_java_property() { export JAVA_PROPERTY_VALUE } -# Properly escape java options, dealing with whitespace, double quotes and backslashes. -# This accepts a string and returns the escaped list through ESCAPED_JAVA_OPTS. -escape_java_options() { - ESCAPED_JAVA_OPTS=() # return value - option_buffer="" # buffer for collecting parts of an option - opened_quotes=0 # whether we are expecting a closing double quotes +# Properly split java options, dealing with whitespace, double quotes and backslashes. +# This accepts a string and returns the resulting list through SPLIT_JAVA_OPTS. +split_java_options() { + SPLIT_JAVA_OPTS=() # return value + option_buffer="" # buffer for collecting parts of an option + opened_quotes=0 # whether we are expecting a closing double quotes for word in $1; do contains_quote=$(echo "$word" | sed "s/\\\\\"//g" | grep "\"") if [[ -n "$contains_quote" ]]; then @@ -49,7 +49,7 @@ escape_java_options() { fi if [[ $opened_quotes == 0 ]]; then # Remove all non-escaped quotes around the value - ESCAPED_JAVA_OPTS+=("$( + SPLIT_JAVA_OPTS+=("$( echo "$option_buffer $word" | \ sed "s/^[[:space:]]*//" | \ sed "s/\([^\\]\)\"/\1/g" | \ @@ -66,7 +66,7 @@ escape_java_options() { echo "Java options parse error! Expecting closing double quotes." 1>&2 exit 1 fi - export ESCAPED_JAVA_OPTS + export SPLIT_JAVA_OPTS } # Put double quotes around each of the given java options that is a system property. From a992ae2ba7067cf76fba0e3ef192b275eee40b57 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Thu, 7 Aug 2014 16:51:16 -0700 Subject: [PATCH 20/22] Escape spark.*.extraJavaOptions correctly We previously never dealt with this correctly, in that we evaluated all backslashes twice, once when passing spark.*.extraJavaOptions into SparkSubmit, and another time when calling Utils.splitCommandString. This means we need to pass the raw values of these configs directly to the JVM without evaluating the backslashes when launching SparkSubmit. The way we do this is through a few custom environment variables. As of this commit, the user should follow the format outlined in spark-defaults.conf.template for spark.*.extraJavaOptions, and the expected java options (with quotes, whitespaces and backslashes and everything) will be propagated to the driver or the executors correctly. --- bin/spark-submit | 81 ++++++++++++------- .../spark/deploy/SparkSubmitArguments.scala | 9 +++ 2 files changed, 63 insertions(+), 27 deletions(-) diff --git a/bin/spark-submit b/bin/spark-submit index be8c464599e00..1a815becae9b0 100755 --- a/bin/spark-submit +++ b/bin/spark-submit @@ -23,11 +23,6 @@ ORIG_ARGS=("$@") # Load utility functions . "$SPARK_HOME/bin/utils.sh" -# For client mode, the driver will be launched in the JVM that launches -# SparkSubmit, so we need to handle the class paths, java options, and -# memory pre-emptively in bash. Otherwise, it will be too late by the -# time the JVM has started. - while (($#)); do if [ "$1" = "--deploy-mode" ]; then DEPLOY_MODE=$2 @@ -46,32 +41,64 @@ while (($#)); do done DEPLOY_MODE=${DEPLOY_MODE:-"client"} -PROPERTIES_FILE=${PROPERTIES_FILE:-"$SPARK_HOME/conf/spark-defaults.conf"} +DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf" +PROPERTIES_FILE=${PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"} + +unset DRIVER_EXTRA_JAVA_OPTIONS +unset EXECUTOR_EXTRA_JAVA_OPTIONS + +# A few Spark configs must be parsed early on before launching the JVM: +# +# [spark.driver.extra*] +# These configs encode java options, class paths, and library paths +# needed to launch the JVM if we are running Spark in client mode +# +# [spark.*.extraJavaOptions] +# The escaped characters in these configs must be preserved for +# splitting the arguments in Java later. For these configs, we +# export the raw values as environment variables. +# +if [[ -f "$PROPERTIES_FILE" ]]; then + echo "Using properties file $PROPERTIES_FILE." 1>&2 + # This exports the value of the given key into JAVA_PROPERTY_VALUE + parse_java_property "spark.driver.memory" + DRIVER_MEMORY_CONF="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.driver.extraLibraryPath" + DRIVER_EXTRA_LIBRARY_PATH="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.driver.extraClassPath" + DRIVER_EXTRA_CLASSPATH="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.driver.extraJavaOptions" + DRIVER_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE" + parse_java_property "spark.executor.extraJavaOptions" + EXECUTOR_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE" + if [[ -n "DRIVER_EXTRA_JAVA_OPTS" ]]; then + export DRIVER_EXTRA_JAVA_OPTS + fi + if [[ -n "EXECUTOR_EXTRA_JAVA_OPTS" ]]; then + export EXECUTOR_EXTRA_JAVA_OPTS + fi +elif [[ "$PROPERTIES_FILE" != "$DEFAULT_PROPERTIES_FILE" ]]; then + echo "Warning: properties file $PROPERTIES_FILE does not exist." 1>&2 +fi + +# For client mode, the driver will be launched in the JVM that launches +# SparkSubmit, so we need to handle the class paths, java options, and +# memory pre-emptively in bash. Otherwise, it will be too late by the +# time the JVM has started. -if [ $DEPLOY_MODE == "client" ]; then - # Parse the default properties file here for spark.driver.* configs - if [ -f "$PROPERTIES_FILE" ]; then - echo "Using properties file $PROPERTIES_FILE." 1>&2 - # This exports the value of the given key into JAVA_PROPERTY_VALUE - parse_java_property "spark.driver.memory"; DRIVER_MEMORY_CONF="$JAVA_PROPERTY_VALUE" - parse_java_property "spark.driver.extraJavaOptions"; DRIVER_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE" - parse_java_property "spark.driver.extraClassPath"; DRIVER_EXTRA_CLASSPATH="$JAVA_PROPERTY_VALUE" - parse_java_property "spark.driver.extraLibraryPath"; DRIVER_EXTRA_LIBRARY_PATH="$JAVA_PROPERTY_VALUE" - if [ -n "$DRIVER_EXTRA_JAVA_OPTS" ]; then - export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS" - fi - if [ -n "$DRIVER_EXTRA_CLASSPATH" ]; then - export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH" - fi - if [ -n "$DRIVER_EXTRA_LIBRARY_PATH" ]; then - export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH" - fi - else - echo "Warning: properties file $PROPERTIES_FILE does not exist!" 1>&2 +if [[ $DEPLOY_MODE == "client" ]]; then + if [[ -n "$DRIVER_EXTRA_JAVA_OPTS" ]]; then + export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS" + fi + if [[ -n "$DRIVER_EXTRA_CLASSPATH" ]]; then + export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH" + fi + if [[ -n "$DRIVER_EXTRA_LIBRARY_PATH" ]]; then + export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH" fi # Favor command line memory over config memory DRIVER_MEMORY=${DRIVER_MEMORY:-"$DRIVER_MEMORY_CONF"} - if [ -n "$DRIVER_MEMORY" ]; then + if [[ -n "$DRIVER_MEMORY" ]]; then export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY fi fi diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 087dd4d633db0..614089272c1e8 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -76,6 +76,15 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { } } } + // For spark.*.extraJavaOptions, we cannot rely on the Java properties loader because it + // un-escapes certain characters (" and \) needed to split the string into java options. + // For these configs, use the equivalent environment variables instead. + sys.env.get("DRIVER_EXTRA_JAVA_OPTS").foreach { opts => + defaultProperties("spark.driver.extraJavaOptions") = opts + } + sys.env.get("EXECUTOR_EXTRA_JAVA_OPTS").foreach { opts => + defaultProperties("spark.executor.extraJavaOptions") = opts + } defaultProperties } From c7b99267c577195882c965029884941b79cc8ed0 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Thu, 7 Aug 2014 16:51:29 -0700 Subject: [PATCH 21/22] Minor changes to spark-defaults.conf.template ... to highlight our new-found ability to deal with special values. --- conf/spark-defaults.conf.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/spark-defaults.conf.template b/conf/spark-defaults.conf.template index 974ece201495c..ad7273d830c16 100644 --- a/conf/spark-defaults.conf.template +++ b/conf/spark-defaults.conf.template @@ -6,4 +6,4 @@ # spark.eventLog.enabled true # spark.eventLog.dir hdfs://namenode:8021/directory # spark.serializer org.apache.spark.serializer.KryoSerializer -# spark.executor.extraJavaOptions -XX:+PrintGCDetail -Dmy.property="one two three" +# spark.executor.extraJavaOptions -XX:+PrintGCDetail -Dnumbers="one \"two\" three" From e793e5f56c5c62d94fe0f2ac3d8aefc1d0b1573e Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Thu, 7 Aug 2014 18:38:17 -0700 Subject: [PATCH 22/22] Handle multi-line arguments --- bin/utils.sh | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/bin/utils.sh b/bin/utils.sh index 431813072754f..5280b9c40e929 100755 --- a/bin/utils.sh +++ b/bin/utils.sh @@ -22,12 +22,33 @@ # Parse the value of a config from a java properties file according to the specifications in # http://docs.oracle.com/javase/7/docs/api/java/util/Properties.html#load(java.io.Reader). -# This accepts the name of the config and returns the value through JAVA_PROPERTY_VALUE. -# This currently does not support multi-line configs. +# This accepts the name of the config as an argument, and expects the path of the property +# file to be found in PROPERTIES_FILE. The value is returned through JAVA_PROPERTY_VALUE. parse_java_property() { + JAVA_PROPERTY_VALUE="" # return value + config_buffer="" # buffer for collecting parts of a config value + multi_line=0 # whether this config is spanning multiple lines + while read -r line; do + # Strip leading and trailing whitespace + line=$(echo "$line" | sed "s/^[[:space:]]\(.*\)[[:space:]]*$/\1/") + contains_config=$(echo "$line" | grep -e "^$1") + if [[ -n "$contains_config" || "$multi_line" == 1 ]]; then + has_more_lines=$(echo "$line" | grep -e "\\\\$") + if [[ -n "$has_more_lines" ]]; then + # Strip trailing backslash + line=$(echo "$line" | sed "s/\\\\$//") + config_buffer="$config_buffer $line" + multi_line=1 + else + JAVA_PROPERTY_VALUE="$config_buffer $line" + break + fi + fi + done < "$PROPERTIES_FILE" + + # Actually extract the value of the config JAVA_PROPERTY_VALUE=$( \ - sed "/^[#!]/ d" "conf/spark-defaults.conf" | \ - grep "$1" | \ + echo "$JAVA_PROPERTY_VALUE" | \ sed "s/$1//" | \ sed "s/^[[:space:]]*[:=]\{0,1\}//" | \ sed "s/^[[:space:]]*\(.*\)[[:space:]]*$/\1/g" \ @@ -63,7 +84,8 @@ split_java_options() { done # Something is wrong if we ended with open double quotes if [[ $opened_quotes == 1 ]]; then - echo "Java options parse error! Expecting closing double quotes." 1>&2 + echo -e "Java options parse error! Expecting closing double quotes:" 1>&2 + echo -e " ${SPLIT_JAVA_OPTS[@]}" 1>&2 exit 1 fi export SPLIT_JAVA_OPTS