Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,9 @@ object SparkSubmit {
sysProps.getOrElseUpdate(k, v)
}

// Spark properties included on command line take precedence
sysProps ++= args.sparkProperties

(childArgs, childClasspath, sysProps, childMainClass)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
var verbose: Boolean = false
var isPython: Boolean = false
var pyFiles: String = null
val sparkProperties: HashMap[String, String] = new HashMap[String, String]()

parseOpts(args.toList)
loadDefaults()
Expand Down Expand Up @@ -177,6 +178,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
| executorCores $executorCores
| totalExecutorCores $totalExecutorCores
| propertiesFile $propertiesFile
| extraSparkProperties $sparkProperties
| driverMemory $driverMemory
| driverCores $driverCores
| driverExtraClassPath $driverExtraClassPath
Expand Down Expand Up @@ -290,6 +292,13 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
jars = Utils.resolveURIs(value)
parse(tail)

case ("--conf" | "-c") :: value :: tail =>
value.split("=", 2).toSeq match {
case Seq(k, v) => sparkProperties(k) = v
case _ => SparkSubmit.printErrorAndExit(s"Spark config without '=': $value")
}
parse(tail)

case ("--help" | "-h") :: tail =>
printUsageAndExit(0)

Expand Down Expand Up @@ -349,6 +358,8 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
| on the PYTHONPATH for Python apps.
| --files FILES Comma-separated list of files to be placed in the working
| directory of each executor.
|
| --conf PROP=VALUE Arbitrary Spark configuration property.
| --properties-file FILE Path to a file from which to load extra properties. If not
| specified, this will look for conf/spark-defaults.conf.
|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
"--archives", "archive1.txt,archive2.txt",
"--num-executors", "6",
"--name", "beauty",
"--conf", "spark.shuffle.spill=false",
"thejar.jar",
"arg1", "arg2")
val appArgs = new SparkSubmitArguments(clArgs)
Expand All @@ -139,6 +140,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
mainClass should be ("org.apache.spark.deploy.yarn.Client")
classpath should have length (0)
sysProps("spark.app.name") should be ("beauty")
sysProps("spark.shuffle.spill") should be ("false")
sysProps("SPARK_SUBMIT") should be ("true")
}

Expand All @@ -156,6 +158,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
"--archives", "archive1.txt,archive2.txt",
"--num-executors", "6",
"--name", "trill",
"--conf", "spark.shuffle.spill=false",
"thejar.jar",
"arg1", "arg2")
val appArgs = new SparkSubmitArguments(clArgs)
Expand All @@ -176,6 +179,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
sysProps("spark.yarn.dist.archives") should include regex (".*archive1.txt,.*archive2.txt")
sysProps("spark.jars") should include regex (".*one.jar,.*two.jar,.*three.jar,.*thejar.jar")
sysProps("SPARK_SUBMIT") should be ("true")
sysProps("spark.shuffle.spill") should be ("false")
}

test("handles standalone cluster mode") {
Expand All @@ -186,6 +190,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
"--supervise",
"--driver-memory", "4g",
"--driver-cores", "5",
"--conf", "spark.shuffle.spill=false",
"thejar.jar",
"arg1", "arg2")
val appArgs = new SparkSubmitArguments(clArgs)
Expand All @@ -195,9 +200,10 @@ class SparkSubmitSuite extends FunSuite with Matchers {
childArgsStr should include regex ("launch spark://h:p .*thejar.jar org.SomeClass arg1 arg2")
mainClass should be ("org.apache.spark.deploy.Client")
classpath should have size (0)
sysProps should have size (2)
sysProps should have size (3)
sysProps.keys should contain ("spark.jars")
sysProps.keys should contain ("SPARK_SUBMIT")
sysProps("spark.shuffle.spill") should be ("false")
}

test("handles standalone client mode") {
Expand All @@ -208,6 +214,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
"--total-executor-cores", "5",
"--class", "org.SomeClass",
"--driver-memory", "4g",
"--conf", "spark.shuffle.spill=false",
"thejar.jar",
"arg1", "arg2")
val appArgs = new SparkSubmitArguments(clArgs)
Expand All @@ -218,6 +225,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
classpath(0) should endWith ("thejar.jar")
sysProps("spark.executor.memory") should be ("5g")
sysProps("spark.cores.max") should be ("5")
sysProps("spark.shuffle.spill") should be ("false")
}

test("handles mesos client mode") {
Expand All @@ -228,6 +236,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
"--total-executor-cores", "5",
"--class", "org.SomeClass",
"--driver-memory", "4g",
"--conf", "spark.shuffle.spill=false",
"thejar.jar",
"arg1", "arg2")
val appArgs = new SparkSubmitArguments(clArgs)
Expand All @@ -238,6 +247,7 @@ class SparkSubmitSuite extends FunSuite with Matchers {
classpath(0) should endWith ("thejar.jar")
sysProps("spark.executor.memory") should be ("5g")
sysProps("spark.cores.max") should be ("5")
sysProps("spark.shuffle.spill") should be ("false")
}

test("launch simple application with spark-submit") {
Expand Down
8 changes: 5 additions & 3 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,15 @@ val sc = new SparkContext(new SparkConf())

Then, you can supply configuration values at runtime:
{% highlight bash %}
./bin/spark-submit --name "My fancy app" --master local[4] myApp.jar
./bin/spark-submit --name "My app" --master local[4] --conf spark.shuffle.spill=false
--conf "spark.executor.extraJavaOptions=-XX:+PrintGCDetails -Xmn5g" myApp.jar
{% endhighlight %}

The Spark shell and [`spark-submit`](cluster-overview.html#launching-applications-with-spark-submit)
tool support two ways to load configurations dynamically. The first are command line options,
such as `--master`, as shown above. Running `./bin/spark-submit --help` will show the entire list
of options.
such as `--master`, as shown above. `spark-submit` can accept any Spark property using the `--conf`
flag, but uses special flags for properties that play a part in launching the Spark application.
Running `./bin/spark-submit --help` will show the entire list of these options.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we also make spark-submit without any options print the help? (maybe you are already doing that)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is already the case


`bin/spark-submit` will also read configuration options from `conf/spark-defaults.conf`, in which
each line consists of a key and a value separated by whitespace. For example:
Expand Down
2 changes: 2 additions & 0 deletions docs/submitting-applications.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ dependencies, and can support different cluster managers and deploy modes that S
--class <main-class>
--master <master-url> \
--deploy-mode <deploy-mode> \
--conf <key>=<value> \
... # other options
<application-jar> \
[application-arguments]
Expand All @@ -43,6 +44,7 @@ Some of the commonly used options are:
* `--class`: The entry point for your application (e.g. `org.apache.spark.examples.SparkPi`)
* `--master`: The [master URL](#master-urls) for the cluster (e.g. `spark://23.195.26.187:7077`)
* `--deploy-mode`: Whether to deploy your driver on the worker nodes (`cluster`) or locally as an external client (`client`) (default: `client`)*
* `--conf`: Arbitrary Spark configuration property in key=value format.
* `application-jar`: Path to a bundled jar including your application and all dependencies. The URL must be globally visible inside of your cluster, for instance, an `hdfs://` path or a `file://` path that is present on all nodes.
* `application-arguments`: Arguments passed to the main method of your main class, if any

Expand Down