From 8fabe77ad685742728edc331782d309fb56220ee Mon Sep 17 00:00:00 2001 From: Sandy Ryza Date: Fri, 27 Jun 2014 15:41:53 -0700 Subject: [PATCH 1/4] SPARK-2310. Support arbitrary Spark properties on the command line with spark-submit --- .../scala/org/apache/spark/deploy/SparkSubmit.scala | 3 +++ .../apache/spark/deploy/SparkSubmitArguments.scala | 9 +++++++++ .../org/apache/spark/deploy/SparkSubmitSuite.scala | 12 +++++++++++- docs/configuration.md | 7 ++++--- 4 files changed, 27 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 3d8373d8175ee..3b5642b6caa36 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -269,6 +269,9 @@ object SparkSubmit { sysProps.getOrElseUpdate(k, v) } + // Spark properties included on command line take precedence + sysProps ++= args.sparkProperties + (childArgs, childClasspath, sysProps, childMainClass) } diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 57655aa4c32b1..dd99511351935 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -55,6 +55,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { var verbose: Boolean = false var isPython: Boolean = false var pyFiles: String = null + var sparkProperties: HashMap[String, String] = new HashMap[String, String]() parseOpts(args.toList) loadDefaults() @@ -304,6 +305,14 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { case v if v.startsWith("--") && v.contains("=") && v.split("=").size == 2 => val parts = v.split("=") parse(Seq(parts(0), parts(1)) ++ tail) + // spark config property + case v if v.startsWith("--spark.") => + if (tail.isEmpty) { + val errMessage = s"Spark config without value: $v" + SparkSubmit.printErrorAndExit(errMessage) + } + sparkProperties(v.substring(2)) = tail.head + parse(tail.tail) case v if v.startsWith("-") => val errMessage = s"Unrecognized option '$value'." SparkSubmit.printErrorAndExit(errMessage) diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index 565c53e9529ff..4408722da75c8 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -120,6 +120,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { "--archives", "archive1.txt,archive2.txt", "--num-executors", "6", "--name", "beauty", + "--spark.shuffle.spill", "false", "thejar.jar", "arg1", "arg2") val appArgs = new SparkSubmitArguments(clArgs) @@ -139,6 +140,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { mainClass should be ("org.apache.spark.deploy.yarn.Client") classpath should have length (0) sysProps("spark.app.name") should be ("beauty") + sysProps("spark.shuffle.spill") should be ("false") sysProps("SPARK_SUBMIT") should be ("true") } @@ -156,6 +158,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { "--archives", "archive1.txt,archive2.txt", "--num-executors", "6", "--name", "trill", + "--spark.shuffle.spill", "false", "thejar.jar", "arg1", "arg2") val appArgs = new SparkSubmitArguments(clArgs) @@ -176,6 +179,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { sysProps("spark.yarn.dist.archives") should include regex (".*archive1.txt,.*archive2.txt") sysProps("spark.jars") should include regex (".*one.jar,.*two.jar,.*three.jar,.*thejar.jar") sysProps("SPARK_SUBMIT") should be ("true") + sysProps("spark.shuffle.spill") should be ("false") } test("handles standalone cluster mode") { @@ -186,6 +190,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { "--supervise", "--driver-memory", "4g", "--driver-cores", "5", + "--spark.shuffle.spill", "false", "thejar.jar", "arg1", "arg2") val appArgs = new SparkSubmitArguments(clArgs) @@ -195,9 +200,10 @@ class SparkSubmitSuite extends FunSuite with Matchers { childArgsStr should include regex ("launch spark://h:p .*thejar.jar org.SomeClass arg1 arg2") mainClass should be ("org.apache.spark.deploy.Client") classpath should have size (0) - sysProps should have size (2) + sysProps should have size (3) sysProps.keys should contain ("spark.jars") sysProps.keys should contain ("SPARK_SUBMIT") + sysProps("spark.shuffle.spill") should be ("false") } test("handles standalone client mode") { @@ -208,6 +214,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { "--total-executor-cores", "5", "--class", "org.SomeClass", "--driver-memory", "4g", + "--spark.shuffle.spill", "false", "thejar.jar", "arg1", "arg2") val appArgs = new SparkSubmitArguments(clArgs) @@ -218,6 +225,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { classpath(0) should endWith ("thejar.jar") sysProps("spark.executor.memory") should be ("5g") sysProps("spark.cores.max") should be ("5") + sysProps("spark.shuffle.spill") should be ("false") } test("handles mesos client mode") { @@ -228,6 +236,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { "--total-executor-cores", "5", "--class", "org.SomeClass", "--driver-memory", "4g", + "--spark.shuffle.spill", "false", "thejar.jar", "arg1", "arg2") val appArgs = new SparkSubmitArguments(clArgs) @@ -238,6 +247,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { classpath(0) should endWith ("thejar.jar") sysProps("spark.executor.memory") should be ("5g") sysProps("spark.cores.max") should be ("5") + sysProps("spark.shuffle.spill") should be ("false") } test("launch simple application with spark-submit") { diff --git a/docs/configuration.md b/docs/configuration.md index a70007c165442..852d963c446bf 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -42,13 +42,14 @@ val sc = new SparkContext(new SparkConf()) Then, you can supply configuration values at runtime: {% highlight bash %} -./bin/spark-submit --name "My fancy app" --master local[4] myApp.jar +./bin/spark-submit --name "My fancy app" --master local[4] myApp.jar --spark.shuffle.spill false {% endhighlight %} The Spark shell and [`spark-submit`](cluster-overview.html#launching-applications-with-spark-submit) tool support two ways to load configurations dynamically. The first are command line options, -such as `--master`, as shown above. Running `./bin/spark-submit --help` will show the entire list -of options. +such as `--master`, as shown above. `spark-submit` can accept any Spark property, but requires +special names for certain properties that play a part in launching the Spark application. Running +`./bin/spark-submit --help` will show the entire list of these options. `bin/spark-submit` will also read configuration options from `conf/spark-defaults.conf`, in which each line consists of a key and a value separated by whitespace. For example: From 91b244a95daadfab5ccc2d2d63a17a6afc9439a2 Mon Sep 17 00:00:00 2001 From: Sandy Ryza Date: Mon, 21 Jul 2014 00:43:42 -0700 Subject: [PATCH 2/4] Change format to --conf PROP=VALUE --- .../spark/deploy/SparkSubmitArguments.scala | 17 +++++++++-------- .../apache/spark/deploy/SparkSubmitSuite.scala | 10 +++++----- docs/configuration.md | 2 +- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index dd99511351935..fb2846f1c86b6 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -291,6 +291,14 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { jars = Utils.resolveURIs(value) parse(tail) + case ("--conf") :: value :: tail => + val equalsIndex = value.indexOf('=') + if (equalsIndex == -1) { + SparkSubmit.printErrorAndExit(s"Spark config without '=': $value") + } + sparkProperties(value.substring(0, equalsIndex)) = value.substring(equalsIndex+1) + parse(tail) + case ("--help" | "-h") :: tail => printUsageAndExit(0) @@ -305,14 +313,6 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { case v if v.startsWith("--") && v.contains("=") && v.split("=").size == 2 => val parts = v.split("=") parse(Seq(parts(0), parts(1)) ++ tail) - // spark config property - case v if v.startsWith("--spark.") => - if (tail.isEmpty) { - val errMessage = s"Spark config without value: $v" - SparkSubmit.printErrorAndExit(errMessage) - } - sparkProperties(v.substring(2)) = tail.head - parse(tail.tail) case v if v.startsWith("-") => val errMessage = s"Unrecognized option '$value'." SparkSubmit.printErrorAndExit(errMessage) @@ -372,6 +372,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { | | --help, -h Show this help message and exit | --verbose, -v Print additional debug output + | --conf PROP=VALUE Arbitrary Spark configuration property. | | Spark standalone with cluster deploy mode only: | --driver-cores NUM Cores for driver (Default: 1). diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index 4408722da75c8..f497a5e0a14f0 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -120,7 +120,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { "--archives", "archive1.txt,archive2.txt", "--num-executors", "6", "--name", "beauty", - "--spark.shuffle.spill", "false", + "--conf", "spark.shuffle.spill=false", "thejar.jar", "arg1", "arg2") val appArgs = new SparkSubmitArguments(clArgs) @@ -158,7 +158,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { "--archives", "archive1.txt,archive2.txt", "--num-executors", "6", "--name", "trill", - "--spark.shuffle.spill", "false", + "--conf", "spark.shuffle.spill=false", "thejar.jar", "arg1", "arg2") val appArgs = new SparkSubmitArguments(clArgs) @@ -190,7 +190,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { "--supervise", "--driver-memory", "4g", "--driver-cores", "5", - "--spark.shuffle.spill", "false", + "--conf", "spark.shuffle.spill=false", "thejar.jar", "arg1", "arg2") val appArgs = new SparkSubmitArguments(clArgs) @@ -214,7 +214,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { "--total-executor-cores", "5", "--class", "org.SomeClass", "--driver-memory", "4g", - "--spark.shuffle.spill", "false", + "--conf", "spark.shuffle.spill=false", "thejar.jar", "arg1", "arg2") val appArgs = new SparkSubmitArguments(clArgs) @@ -236,7 +236,7 @@ class SparkSubmitSuite extends FunSuite with Matchers { "--total-executor-cores", "5", "--class", "org.SomeClass", "--driver-memory", "4g", - "--spark.shuffle.spill", "false", + "--conf", "spark.shuffle.spill=false", "thejar.jar", "arg1", "arg2") val appArgs = new SparkSubmitArguments(clArgs) diff --git a/docs/configuration.md b/docs/configuration.md index 852d963c446bf..d9a81a658de2c 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -42,7 +42,7 @@ val sc = new SparkContext(new SparkConf()) Then, you can supply configuration values at runtime: {% highlight bash %} -./bin/spark-submit --name "My fancy app" --master local[4] myApp.jar --spark.shuffle.spill false +./bin/spark-submit --name "My app" --master local[4] myApp.jar --conf spark.shuffle.spill=false {% endhighlight %} The Spark shell and [`spark-submit`](cluster-overview.html#launching-applications-with-spark-submit) From 00edfb94433f6475df41422a114e2d2fcdc82261 Mon Sep 17 00:00:00 2001 From: Sandy Ryza Date: Mon, 21 Jul 2014 15:56:04 -0700 Subject: [PATCH 3/4] Review comments --- .../org/apache/spark/deploy/SparkSubmitArguments.scala | 10 +++++----- docs/configuration.md | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index fb2846f1c86b6..b09b8b9c093c8 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -292,11 +292,10 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { parse(tail) case ("--conf") :: value :: tail => - val equalsIndex = value.indexOf('=') - if (equalsIndex == -1) { - SparkSubmit.printErrorAndExit(s"Spark config without '=': $value") + value.split("=", 2).toSeq match { + case Seq(k, v) => sparkProperties(k) = v + case _ => SparkSubmit.printErrorAndExit(s"Spark config without '=': $value") } - sparkProperties(value.substring(0, equalsIndex)) = value.substring(equalsIndex+1) parse(tail) case ("--help" | "-h") :: tail => @@ -358,6 +357,8 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { | on the PYTHONPATH for Python apps. | --files FILES Comma-separated list of files to be placed in the working | directory of each executor. + | + | --conf PROP=VALUE Arbitrary Spark configuration property. | --properties-file FILE Path to a file from which to load extra properties. If not | specified, this will look for conf/spark-defaults.conf. | @@ -372,7 +373,6 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { | | --help, -h Show this help message and exit | --verbose, -v Print additional debug output - | --conf PROP=VALUE Arbitrary Spark configuration property. | | Spark standalone with cluster deploy mode only: | --driver-cores NUM Cores for driver (Default: 1). diff --git a/docs/configuration.md b/docs/configuration.md index d9a81a658de2c..4bd4edc821d84 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -47,9 +47,9 @@ Then, you can supply configuration values at runtime: The Spark shell and [`spark-submit`](cluster-overview.html#launching-applications-with-spark-submit) tool support two ways to load configurations dynamically. The first are command line options, -such as `--master`, as shown above. `spark-submit` can accept any Spark property, but requires -special names for certain properties that play a part in launching the Spark application. Running -`./bin/spark-submit --help` will show the entire list of these options. +such as `--master`, as shown above. `spark-submit` can accept any Spark property using the `--conf` +flag, but uses special flags for properties that play a part in launching the Spark application. +Running `./bin/spark-submit --help` will show the entire list of these options. `bin/spark-submit` will also read configuration options from `conf/spark-defaults.conf`, in which each line consists of a key and a value separated by whitespace. For example: From 1dc98559e3e42c6b42e856bdf4b3397810d58efa Mon Sep 17 00:00:00 2001 From: Sandy Ryza Date: Tue, 22 Jul 2014 09:43:59 -0700 Subject: [PATCH 4/4] More doc and cleanup --- .../scala/org/apache/spark/deploy/SparkSubmitArguments.scala | 5 +++-- docs/configuration.md | 3 ++- docs/submitting-applications.md | 2 ++ 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index b09b8b9c093c8..3ab67a43a3b55 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -55,7 +55,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { var verbose: Boolean = false var isPython: Boolean = false var pyFiles: String = null - var sparkProperties: HashMap[String, String] = new HashMap[String, String]() + val sparkProperties: HashMap[String, String] = new HashMap[String, String]() parseOpts(args.toList) loadDefaults() @@ -178,6 +178,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { | executorCores $executorCores | totalExecutorCores $totalExecutorCores | propertiesFile $propertiesFile + | extraSparkProperties $sparkProperties | driverMemory $driverMemory | driverCores $driverCores | driverExtraClassPath $driverExtraClassPath @@ -291,7 +292,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { jars = Utils.resolveURIs(value) parse(tail) - case ("--conf") :: value :: tail => + case ("--conf" | "-c") :: value :: tail => value.split("=", 2).toSeq match { case Seq(k, v) => sparkProperties(k) = v case _ => SparkSubmit.printErrorAndExit(s"Spark config without '=': $value") diff --git a/docs/configuration.md b/docs/configuration.md index 4bd4edc821d84..152ff7f8bbc6b 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -42,7 +42,8 @@ val sc = new SparkContext(new SparkConf()) Then, you can supply configuration values at runtime: {% highlight bash %} -./bin/spark-submit --name "My app" --master local[4] myApp.jar --conf spark.shuffle.spill=false +./bin/spark-submit --name "My app" --master local[4] --conf spark.shuffle.spill=false + --conf "spark.executor.extraJavaOptions=-XX:+PrintGCDetails -Xmn5g" myApp.jar {% endhighlight %} The Spark shell and [`spark-submit`](cluster-overview.html#launching-applications-with-spark-submit) diff --git a/docs/submitting-applications.md b/docs/submitting-applications.md index e05883072bfa8..2483cd7c2e253 100644 --- a/docs/submitting-applications.md +++ b/docs/submitting-applications.md @@ -33,6 +33,7 @@ dependencies, and can support different cluster managers and deploy modes that S --class --master \ --deploy-mode \ + --conf = \ ... # other options \ [application-arguments] @@ -43,6 +44,7 @@ Some of the commonly used options are: * `--class`: The entry point for your application (e.g. `org.apache.spark.examples.SparkPi`) * `--master`: The [master URL](#master-urls) for the cluster (e.g. `spark://23.195.26.187:7077`) * `--deploy-mode`: Whether to deploy your driver on the worker nodes (`cluster`) or locally as an external client (`client`) (default: `client`)* +* `--conf`: Arbitrary Spark configuration property in key=value format. * `application-jar`: Path to a bundled jar including your application and all dependencies. The URL must be globally visible inside of your cluster, for instance, an `hdfs://` path or a `file://` path that is present on all nodes. * `application-arguments`: Arguments passed to the main method of your main class, if any