apache · liancheng · Aug 1, 2014 · Aug 2, 2014 · Aug 2, 2014 · Aug 2, 2014
diff --git a/bin/beeline b/bin/beeline
@@ -17,29 +17,14 @@
 # limitations under the License.
 #
 
-# Figure out where Spark is installed
-FWDIR="$(cd `dirname $0`/..; pwd)"
+#
+# Shell script for starting BeeLine
 
-# Find the java binary
-if [ -n "${JAVA_HOME}" ]; then
-  RUNNER="${JAVA_HOME}/bin/java"
-else
-  if [ `command -v java` ]; then
-    RUNNER="java"
-  else
-    echo "JAVA_HOME is not set" >&2
-    exit 1
-  fi
-fi
+# Enter posix mode for bash
+set -o posix
 
-# Compute classpath using external script
-classpath_output=$($FWDIR/bin/compute-classpath.sh)
-if [[ "$?" != "0" ]]; then
-  echo "$classpath_output"
-  exit 1
-else
-  CLASSPATH=$classpath_output
-fi
+# Figure out where Spark is installed
+FWDIR="$(cd `dirname $0`/..; pwd)"
 
 CLASS="org.apache.hive.beeline.BeeLine"
-exec "$RUNNER" -cp "$CLASSPATH" $CLASS "$@"
+exec "$FWDIR/bin/spark-class" $CLASS "$@"
diff --git a/bin/pyspark b/bin/pyspark
@@ -27,7 +27,7 @@ SCALA_VERSION=2.10
 
 if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
   echo "Usage: ./bin/pyspark [options]" 1>&2
-  $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
+  exec "$FWDIR"/bin/spark-submit --help 2>&1 | grep -v "^Usage\|\(  spark-submit\)" 1>&2
   exit 0
 fi
 

diff --git a/bin/spark-shell b/bin/spark-shell
@@ -33,7 +33,7 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
 
 if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
   echo "Usage: ./bin/spark-shell [options]"
-  $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
+  exec "$FWDIR"/bin/spark-submit --help 2>&1 | grep -v "^Usage\|\(  spark-submit\)" 1>&2
   exit 0
 fi
 

diff --git a/bin/spark-sql b/bin/spark-sql
@@ -26,11 +26,18 @@ set -o posix
 # Figure out where Spark is installed
 FWDIR="$(cd `dirname $0`/..; pwd)"
 
-if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
-  echo "Usage: ./sbin/spark-sql [options]"
-  $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
+CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
+
+if [[ "$@" = --help ]] || [[ "$@" = -h ]]; then
+  echo "Usage: ./sbin/spark-sql [options] [--] [CLI options]"
+  exec "$FWDIR"/bin/spark-submit --help 2>&1 | grep -v "^Usage\|\(  spark-submit\)" 1>&2
+  echo
+  echo "CLI options:"
+  exec "$FWDIR"/bin/spark-submit\
+    --primary spark-internal --class $CLASS\
+    -- --help 2>&1 | grep -v -- "-H,--help" | tail -n +3 1>&2
+  echo
   exit 0
 fi
 
-CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
-exec "$FWDIR"/bin/spark-submit --class $CLASS spark-internal $@
+exec "$FWDIR"/bin/spark-submit --class $CLASS --primary spark-internal $@
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -311,6 +311,27 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
         verbose = true
         parse(tail)
 
+      case ("--primary" | "-p") :: value :: tail =>
+        primaryResource = if (!SparkSubmit.isShell(value) && !SparkSubmit.isInternal(value)) {
+          Utils.resolveURI(value).toString
+        } else {
+          value
+        }
+        isPython = SparkSubmit.isPython(value)
+        parse(tail)
+
+      case "--" :: tail =>
+        if (inSparkOpts) {
+          // Primary resource is specified with "--primary", "--" is considered as the separator of
+          // spark-submit options and user application options.
+          childArgs ++= tail
+        } else {
+          // Primary resource is specified as a positional argument, "--" is passed to the
+          // application as a normal argument.
+          childArgs += "--"
+          parse(tail)
+        }
+
       case value :: tail =>
         if (inSparkOpts) {
           value match {
@@ -322,6 +343,14 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
               val errMessage = s"Unrecognized option '$value'."
               SparkSubmit.printErrorAndExit(errMessage)
             case v =>
+              if (primaryResource != null) {
+                // Primary resource has already been specified by --primary. It's more likely that
+                // user forgot using -- to separate application options from spark-submit options.
+                SparkSubmit.printErrorAndExit(
+                  s"Unrecognized option '$value', " +
+                  "note that application options must appear after \"--\".")
+              }
+
               primaryResource =
                 if (!SparkSubmit.isShell(v) && !SparkSubmit.isInternal(v)) {
                   Utils.resolveURI(v).toString
@@ -349,7 +378,10 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
       outStream.println("Unknown/unsupported param " + unknownParam)
     }
     outStream.println(
-      """Usage: spark-submit [options] <app jar | python file> [app options]
+      """Usage:
+        |  spark-submit [options] <app jar | python file> [app options]
+        |  spark-submit [options] --primary <app jar | python file> -- [app options]
+        |
         |Options:
         |  --master MASTER_URL         spark://host:port, mesos://host:port, yarn, or local.
         |  --deploy-mode DEPLOY_MODE   Whether to launch the driver program locally ("client") or
@@ -377,8 +409,15 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
         |
         |  --executor-memory MEM       Memory per executor (e.g. 1000M, 2G) (Default: 1G).
         |
-        |  --help, -h                  Show this help message and exit
-        |  --verbose, -v               Print additional debug output
+        |  --help, -h                  Show this help message and exit.
+        |  --verbose, -v               Print additional debug output.
+        |
+        |  --primary                   The primary jar file or Python file of the application. Used
+        |                              in conjunction with "--" to pass arbitrary arguments to the
+        |                              application if any.
+        |  --                          A "--" signals the end of spark-submit options, everything
+        |                              after "--" are passed as command line arguments to the
+        |                              application. Only used in conjunction with "--primary".
         |
         | Spark standalone with cluster deploy mode only:
         |  --driver-cores NUM          Cores for driver (Default: 1).

diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -70,11 +70,11 @@ class SparkSubmitSuite extends FunSuite with Matchers {
   }
 
   test("prints usage on empty input") {
-    testPrematureExit(Array[String](), "Usage: spark-submit")
+    testPrematureExit(Array[String](), "Usage:\n  spark-submit")
   }
 
   test("prints usage with only --help") {
-    testPrematureExit(Array("--help"), "Usage: spark-submit")
+    testPrematureExit(Array("--help"), "Usage:\n  spark-submit")
   }
 
   test("prints error with unrecognized options") {
@@ -106,6 +106,50 @@ class SparkSubmitSuite extends FunSuite with Matchers {
     appArgs.childArgs should be (Seq("some", "--weird", "args"))
   }
 
+  test("handles arguments to user program with --primary and --") {
+    val clArgs =
+      """--name myApp
+        |--class Foo
+        |--primary userjar.jar
+        |--master local
+        |--
+        |some
+        |--weird args
+      """.stripMargin.split("\\s+").toSeq
+    val appArgs = new SparkSubmitArguments(clArgs)
+    appArgs.master should be ("local")
+    appArgs.mainClass should be ("Foo")
+    appArgs.childArgs should be (Seq("some", "--weird", "args"))
+  }
+
+  test("handles arguments to user program with --primary but no --") {
+    val clArgs =
+      """--name myApp
+        |--class Foo
+        |--primary userjar.jar
+        |--master local
+        |some
+        |--weird args
+      """.stripMargin.split("\\s+")
+    testPrematureExit(clArgs, "Unrecognized option 'some'")
+  }
+
+  test("handles arguments to user program with empty string") {
+    val clArgs =
+      """--name myApp
+        |--class Foo
+        |--primary userjar.jar
+        |--master local
+        |--
+      """.stripMargin.split("\\s+").toSeq :+ ""
+    val appArgs = new SparkSubmitArguments(clArgs)
+    val xx = clArgs.map(arg => s"[$arg]")
+    println(s"### $xx")
+    appArgs.master should be ("local")
+    appArgs.mainClass should be ("Foo")
+    appArgs.childArgs should be (Seq(""))
+  }
+
   test("handles YARN cluster mode") {
     val clArgs = Seq(
       "--deploy-mode", "cluster",

diff --git a/docs/submitting-applications.md b/docs/submitting-applications.md
@@ -35,17 +35,24 @@ dependencies, and can support different cluster managers and deploy modes that S
   --deploy-mode <deploy-mode> \
   --conf <key>=<value> \
   ... # other options
-  <application-jar> \
+  --primary <application-jar> \
+  -- \
   [application-arguments]
 {% endhighlight %}
 
+(**NOTE** As of Spark 1.1, `--` is used as a separator for user application options, such that
+anything that follows will be passed to the application as command line arguments. This includes
+those that were once swallowed by `spark-submit`, e.g. `--help` and `--conf`. Note that this must be
+used in conjunction with `--primary` to specify the main application jar. The old way of setting
+application jars and passing arguments to applications is still supported.)
+
 Some of the commonly used options are:
 
 * `--class`: The entry point for your application (e.g. `org.apache.spark.examples.SparkPi`)
 * `--master`: The [master URL](#master-urls) for the cluster (e.g. `spark://23.195.26.187:7077`)
 * `--deploy-mode`: Whether to deploy your driver on the worker nodes (`cluster`) or locally as an external client (`client`) (default: `client`)*
 * `--conf`: Arbitrary Spark configuration property in key=value format. For values that contain spaces wrap "key=value" in quotes (as shown).
-* `application-jar`: Path to a bundled jar including your application and all dependencies. The URL must be globally visible inside of your cluster, for instance, an `hdfs://` path or a `file://` path that is present on all nodes.
+* `--primary application-jar`: Path to a bundled jar including your application and all dependencies. The URL must be globally visible inside of your cluster, for instance, an `hdfs://` path or a `file://` path that is present on all nodes.
 * `application-arguments`: Arguments passed to the main method of your main class, if any
 
 *A common deployment strategy is to submit your application from a gateway machine that is
@@ -71,7 +78,8 @@ examples of common options:
 ./bin/spark-submit \
   --class org.apache.spark.examples.SparkPi \
   --master local[8] \
-  /path/to/examples.jar \
+  --primary /path/to/examples.jar \
+  -- \
   100
 
 # Run on a Spark standalone cluster
@@ -80,7 +88,8 @@ examples of common options:
   --master spark://207.184.161.138:7077 \
   --executor-memory 20G \
   --total-executor-cores 100 \
-  /path/to/examples.jar \
+  --primary /path/to/examples.jar \
+  -- \
   1000
 
 # Run on a YARN cluster
@@ -90,13 +99,15 @@ export HADOOP_CONF_DIR=XXX
   --master yarn-cluster \  # can also be `yarn-client` for client mode
   --executor-memory 20G \
   --num-executors 50 \
-  /path/to/examples.jar \
+  --primary /path/to/examples.jar \
+  -- \
   1000
 
 # Run a Python application on a cluster
 ./bin/spark-submit \
   --master spark://207.184.161.138:7077 \
-  examples/src/main/python/pi.py \
+  --primary examples/src/main/python/pi.py \
+  -- \
   1000
 {% endhighlight %}
 

diff --git a/sbin/start-thriftserver.sh b/sbin/start-thriftserver.sh
@@ -26,11 +26,18 @@ set -o posix
 # Figure out where Spark is installed
 FWDIR="$(cd `dirname $0`/..; pwd)"
 
-if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
-  echo "Usage: ./sbin/start-thriftserver [options]"
-  $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
+CLASS="org.apache.spark.sql.hive.thriftserver.HiveThriftServer2"
+
+if [[ "$@" = --help ]] || [[ "$@" = -h ]]; then
+  echo "Usage: ./sbin/start-thriftserver.sh [options] [--] [thrift server options]"
+  exec "$FWDIR"/bin/spark-submit --help 2>&1 | grep -v "^Usage\|\(  spark-submit\)" 1>&2
+  echo
+  echo "Thrift server options:"
+  exec "$FWDIR"/bin/spark-submit\
+    --primary spark-internal --class $CLASS\
+    -- --help 2>&1 | grep -v usage | tail -n +3 1>&2
+  echo
   exit 0
 fi
 
-CLASS="org.apache.spark.sql.hive.thriftserver.HiveThriftServer2"
-exec "$FWDIR"/bin/spark-submit --class $CLASS spark-internal $@
+exec "$FWDIR"/bin/spark-submit --class $CLASS --primary spark-internal $@
diff --git a/...hriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala b/...hriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
@@ -40,7 +40,6 @@ private[hive] object HiveThriftServer2 extends Logging {
     val optionsProcessor = new ServerOptionsProcessor("HiveThriftServer2")
 
     if (!optionsProcessor.process(args)) {
-      logger.warn("Error starting HiveThriftServer2 with given arguments")
       System.exit(-1)
     }