Skip to content

Commit e34922a

Browse files
sryzapwendell
authored andcommitted
SPARK-2310. Support arbitrary Spark properties on the command line with ...
...spark-submit The PR allows invocations like spark-submit --class org.MyClass --spark.shuffle.spill false myjar.jar Author: Sandy Ryza <[email protected]> Closes apache#1253 from sryza/sandy-spark-2310 and squashes the following commits: 1dc9855 [Sandy Ryza] More doc and cleanup 00edfb9 [Sandy Ryza] Review comments 91b244a [Sandy Ryza] Change format to --conf PROP=VALUE 8fabe77 [Sandy Ryza] SPARK-2310. Support arbitrary Spark properties on the command line with spark-submit
1 parent 78d18fd commit e34922a

File tree

5 files changed

+32
-4
lines changed

5 files changed

+32
-4
lines changed

core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,9 @@ object SparkSubmit {
269269
sysProps.getOrElseUpdate(k, v)
270270
}
271271

272+
// Spark properties included on command line take precedence
273+
sysProps ++= args.sparkProperties
274+
272275
(childArgs, childClasspath, sysProps, childMainClass)
273276
}
274277

core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
5555
var verbose: Boolean = false
5656
var isPython: Boolean = false
5757
var pyFiles: String = null
58+
val sparkProperties: HashMap[String, String] = new HashMap[String, String]()
5859

5960
parseOpts(args.toList)
6061
loadDefaults()
@@ -177,6 +178,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
177178
| executorCores $executorCores
178179
| totalExecutorCores $totalExecutorCores
179180
| propertiesFile $propertiesFile
181+
| extraSparkProperties $sparkProperties
180182
| driverMemory $driverMemory
181183
| driverCores $driverCores
182184
| driverExtraClassPath $driverExtraClassPath
@@ -290,6 +292,13 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
290292
jars = Utils.resolveURIs(value)
291293
parse(tail)
292294

295+
case ("--conf" | "-c") :: value :: tail =>
296+
value.split("=", 2).toSeq match {
297+
case Seq(k, v) => sparkProperties(k) = v
298+
case _ => SparkSubmit.printErrorAndExit(s"Spark config without '=': $value")
299+
}
300+
parse(tail)
301+
293302
case ("--help" | "-h") :: tail =>
294303
printUsageAndExit(0)
295304

@@ -349,6 +358,8 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
349358
| on the PYTHONPATH for Python apps.
350359
| --files FILES Comma-separated list of files to be placed in the working
351360
| directory of each executor.
361+
|
362+
| --conf PROP=VALUE Arbitrary Spark configuration property.
352363
| --properties-file FILE Path to a file from which to load extra properties. If not
353364
| specified, this will look for conf/spark-defaults.conf.
354365
|

core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
120120
"--archives", "archive1.txt,archive2.txt",
121121
"--num-executors", "6",
122122
"--name", "beauty",
123+
"--conf", "spark.shuffle.spill=false",
123124
"thejar.jar",
124125
"arg1", "arg2")
125126
val appArgs = new SparkSubmitArguments(clArgs)
@@ -139,6 +140,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
139140
mainClass should be ("org.apache.spark.deploy.yarn.Client")
140141
classpath should have length (0)
141142
sysProps("spark.app.name") should be ("beauty")
143+
sysProps("spark.shuffle.spill") should be ("false")
142144
sysProps("SPARK_SUBMIT") should be ("true")
143145
}
144146

@@ -156,6 +158,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
156158
"--archives", "archive1.txt,archive2.txt",
157159
"--num-executors", "6",
158160
"--name", "trill",
161+
"--conf", "spark.shuffle.spill=false",
159162
"thejar.jar",
160163
"arg1", "arg2")
161164
val appArgs = new SparkSubmitArguments(clArgs)
@@ -176,6 +179,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
176179
sysProps("spark.yarn.dist.archives") should include regex (".*archive1.txt,.*archive2.txt")
177180
sysProps("spark.jars") should include regex (".*one.jar,.*two.jar,.*three.jar,.*thejar.jar")
178181
sysProps("SPARK_SUBMIT") should be ("true")
182+
sysProps("spark.shuffle.spill") should be ("false")
179183
}
180184

181185
test("handles standalone cluster mode") {
@@ -186,6 +190,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
186190
"--supervise",
187191
"--driver-memory", "4g",
188192
"--driver-cores", "5",
193+
"--conf", "spark.shuffle.spill=false",
189194
"thejar.jar",
190195
"arg1", "arg2")
191196
val appArgs = new SparkSubmitArguments(clArgs)
@@ -195,9 +200,10 @@ class SparkSubmitSuite extends FunSuite with Matchers {
195200
childArgsStr should include regex ("launch spark://h:p .*thejar.jar org.SomeClass arg1 arg2")
196201
mainClass should be ("org.apache.spark.deploy.Client")
197202
classpath should have size (0)
198-
sysProps should have size (2)
203+
sysProps should have size (3)
199204
sysProps.keys should contain ("spark.jars")
200205
sysProps.keys should contain ("SPARK_SUBMIT")
206+
sysProps("spark.shuffle.spill") should be ("false")
201207
}
202208

203209
test("handles standalone client mode") {
@@ -208,6 +214,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
208214
"--total-executor-cores", "5",
209215
"--class", "org.SomeClass",
210216
"--driver-memory", "4g",
217+
"--conf", "spark.shuffle.spill=false",
211218
"thejar.jar",
212219
"arg1", "arg2")
213220
val appArgs = new SparkSubmitArguments(clArgs)
@@ -218,6 +225,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
218225
classpath(0) should endWith ("thejar.jar")
219226
sysProps("spark.executor.memory") should be ("5g")
220227
sysProps("spark.cores.max") should be ("5")
228+
sysProps("spark.shuffle.spill") should be ("false")
221229
}
222230

223231
test("handles mesos client mode") {
@@ -228,6 +236,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
228236
"--total-executor-cores", "5",
229237
"--class", "org.SomeClass",
230238
"--driver-memory", "4g",
239+
"--conf", "spark.shuffle.spill=false",
231240
"thejar.jar",
232241
"arg1", "arg2")
233242
val appArgs = new SparkSubmitArguments(clArgs)
@@ -238,6 +247,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
238247
classpath(0) should endWith ("thejar.jar")
239248
sysProps("spark.executor.memory") should be ("5g")
240249
sysProps("spark.cores.max") should be ("5")
250+
sysProps("spark.shuffle.spill") should be ("false")
241251
}
242252

243253
test("launch simple application with spark-submit") {

docs/configuration.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,15 @@ val sc = new SparkContext(new SparkConf())
4242

4343
Then, you can supply configuration values at runtime:
4444
{% highlight bash %}
45-
./bin/spark-submit --name "My fancy app" --master local[4] myApp.jar
45+
./bin/spark-submit --name "My app" --master local[4] --conf spark.shuffle.spill=false
46+
--conf "spark.executor.extraJavaOptions=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps" myApp.jar
4647
{% endhighlight %}
4748

4849
The Spark shell and [`spark-submit`](cluster-overview.html#launching-applications-with-spark-submit)
4950
tool support two ways to load configurations dynamically. The first are command line options,
50-
such as `--master`, as shown above. Running `./bin/spark-submit --help` will show the entire list
51-
of options.
51+
such as `--master`, as shown above. `spark-submit` can accept any Spark property using the `--conf`
52+
flag, but uses special flags for properties that play a part in launching the Spark application.
53+
Running `./bin/spark-submit --help` will show the entire list of these options.
5254

5355
`bin/spark-submit` will also read configuration options from `conf/spark-defaults.conf`, in which
5456
each line consists of a key and a value separated by whitespace. For example:

docs/submitting-applications.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ dependencies, and can support different cluster managers and deploy modes that S
3333
--class <main-class>
3434
--master <master-url> \
3535
--deploy-mode <deploy-mode> \
36+
--conf <key>=<value> \
3637
... # other options
3738
<application-jar> \
3839
[application-arguments]
@@ -43,6 +44,7 @@ Some of the commonly used options are:
4344
* `--class`: The entry point for your application (e.g. `org.apache.spark.examples.SparkPi`)
4445
* `--master`: The [master URL](#master-urls) for the cluster (e.g. `spark://23.195.26.187:7077`)
4546
* `--deploy-mode`: Whether to deploy your driver on the worker nodes (`cluster`) or locally as an external client (`client`) (default: `client`)*
47+
* `--conf`: Arbitrary Spark configuration property in key=value format. For values that contain spaces wrap "key=value" in quotes (as shown).
4648
* `application-jar`: Path to a bundled jar including your application and all dependencies. The URL must be globally visible inside of your cluster, for instance, an `hdfs://` path or a `file://` path that is present on all nodes.
4749
* `application-arguments`: Arguments passed to the main method of your main class, if any
4850

0 commit comments

Comments
 (0)