Skip to content

Commit a992ae2

Browse files
committed
Escape spark.*.extraJavaOptions correctly
We previously never dealt with this correctly, in that we evaluated all backslashes twice, once when passing spark.*.extraJavaOptions into SparkSubmit, and another time when calling Utils.splitCommandString. This means we need to pass the raw values of these configs directly to the JVM without evaluating the backslashes when launching SparkSubmit. The way we do this is through a few custom environment variables. As of this commit, the user should follow the format outlined in spark-defaults.conf.template for spark.*.extraJavaOptions, and the expected java options (with quotes, whitespaces and backslashes and everything) will be propagated to the driver or the executors correctly.
1 parent aabfc7e commit a992ae2

File tree

2 files changed

+63
-27
lines changed

2 files changed

+63
-27
lines changed

bin/spark-submit

Lines changed: 54 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,6 @@ ORIG_ARGS=("$@")
2323
# Load utility functions
2424
. "$SPARK_HOME/bin/utils.sh"
2525

26-
# For client mode, the driver will be launched in the JVM that launches
27-
# SparkSubmit, so we need to handle the class paths, java options, and
28-
# memory pre-emptively in bash. Otherwise, it will be too late by the
29-
# time the JVM has started.
30-
3126
while (($#)); do
3227
if [ "$1" = "--deploy-mode" ]; then
3328
DEPLOY_MODE=$2
@@ -46,32 +41,64 @@ while (($#)); do
4641
done
4742

4843
DEPLOY_MODE=${DEPLOY_MODE:-"client"}
49-
PROPERTIES_FILE=${PROPERTIES_FILE:-"$SPARK_HOME/conf/spark-defaults.conf"}
44+
DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf"
45+
PROPERTIES_FILE=${PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"}
46+
47+
unset DRIVER_EXTRA_JAVA_OPTIONS
48+
unset EXECUTOR_EXTRA_JAVA_OPTIONS
49+
50+
# A few Spark configs must be parsed early on before launching the JVM:
51+
#
52+
# [spark.driver.extra*]
53+
# These configs encode java options, class paths, and library paths
54+
# needed to launch the JVM if we are running Spark in client mode
55+
#
56+
# [spark.*.extraJavaOptions]
57+
# The escaped characters in these configs must be preserved for
58+
# splitting the arguments in Java later. For these configs, we
59+
# export the raw values as environment variables.
60+
#
61+
if [[ -f "$PROPERTIES_FILE" ]]; then
62+
echo "Using properties file $PROPERTIES_FILE." 1>&2
63+
# This exports the value of the given key into JAVA_PROPERTY_VALUE
64+
parse_java_property "spark.driver.memory"
65+
DRIVER_MEMORY_CONF="$JAVA_PROPERTY_VALUE"
66+
parse_java_property "spark.driver.extraLibraryPath"
67+
DRIVER_EXTRA_LIBRARY_PATH="$JAVA_PROPERTY_VALUE"
68+
parse_java_property "spark.driver.extraClassPath"
69+
DRIVER_EXTRA_CLASSPATH="$JAVA_PROPERTY_VALUE"
70+
parse_java_property "spark.driver.extraJavaOptions"
71+
DRIVER_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE"
72+
parse_java_property "spark.executor.extraJavaOptions"
73+
EXECUTOR_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE"
74+
if [[ -n "DRIVER_EXTRA_JAVA_OPTS" ]]; then
75+
export DRIVER_EXTRA_JAVA_OPTS
76+
fi
77+
if [[ -n "EXECUTOR_EXTRA_JAVA_OPTS" ]]; then
78+
export EXECUTOR_EXTRA_JAVA_OPTS
79+
fi
80+
elif [[ "$PROPERTIES_FILE" != "$DEFAULT_PROPERTIES_FILE" ]]; then
81+
echo "Warning: properties file $PROPERTIES_FILE does not exist." 1>&2
82+
fi
83+
84+
# For client mode, the driver will be launched in the JVM that launches
85+
# SparkSubmit, so we need to handle the class paths, java options, and
86+
# memory pre-emptively in bash. Otherwise, it will be too late by the
87+
# time the JVM has started.
5088

51-
if [ $DEPLOY_MODE == "client" ]; then
52-
# Parse the default properties file here for spark.driver.* configs
53-
if [ -f "$PROPERTIES_FILE" ]; then
54-
echo "Using properties file $PROPERTIES_FILE." 1>&2
55-
# This exports the value of the given key into JAVA_PROPERTY_VALUE
56-
parse_java_property "spark.driver.memory"; DRIVER_MEMORY_CONF="$JAVA_PROPERTY_VALUE"
57-
parse_java_property "spark.driver.extraJavaOptions"; DRIVER_EXTRA_JAVA_OPTS="$JAVA_PROPERTY_VALUE"
58-
parse_java_property "spark.driver.extraClassPath"; DRIVER_EXTRA_CLASSPATH="$JAVA_PROPERTY_VALUE"
59-
parse_java_property "spark.driver.extraLibraryPath"; DRIVER_EXTRA_LIBRARY_PATH="$JAVA_PROPERTY_VALUE"
60-
if [ -n "$DRIVER_EXTRA_JAVA_OPTS" ]; then
61-
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS"
62-
fi
63-
if [ -n "$DRIVER_EXTRA_CLASSPATH" ]; then
64-
export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH"
65-
fi
66-
if [ -n "$DRIVER_EXTRA_LIBRARY_PATH" ]; then
67-
export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH"
68-
fi
69-
else
70-
echo "Warning: properties file $PROPERTIES_FILE does not exist!" 1>&2
89+
if [[ $DEPLOY_MODE == "client" ]]; then
90+
if [[ -n "$DRIVER_EXTRA_JAVA_OPTS" ]]; then
91+
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS $DRIVER_EXTRA_JAVA_OPTS"
92+
fi
93+
if [[ -n "$DRIVER_EXTRA_CLASSPATH" ]]; then
94+
export SPARK_SUBMIT_CLASSPATH="$SPARK_SUBMIT_CLASSPATH:$DRIVER_EXTRA_CLASSPATH"
95+
fi
96+
if [[ -n "$DRIVER_EXTRA_LIBRARY_PATH" ]]; then
97+
export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DRIVER_EXTRA_LIBRARY_PATH"
7198
fi
7299
# Favor command line memory over config memory
73100
DRIVER_MEMORY=${DRIVER_MEMORY:-"$DRIVER_MEMORY_CONF"}
74-
if [ -n "$DRIVER_MEMORY" ]; then
101+
if [[ -n "$DRIVER_MEMORY" ]]; then
75102
export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY
76103
fi
77104
fi

core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,15 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
7676
}
7777
}
7878
}
79+
// For spark.*.extraJavaOptions, we cannot rely on the Java properties loader because it
80+
// un-escapes certain characters (" and \) needed to split the string into java options.
81+
// For these configs, use the equivalent environment variables instead.
82+
sys.env.get("DRIVER_EXTRA_JAVA_OPTS").foreach { opts =>
83+
defaultProperties("spark.driver.extraJavaOptions") = opts
84+
}
85+
sys.env.get("EXECUTOR_EXTRA_JAVA_OPTS").foreach { opts =>
86+
defaultProperties("spark.executor.extraJavaOptions") = opts
87+
}
7988
defaultProperties
8089
}
8190

0 commit comments

Comments
 (0)