diff --git a/bin/beeline b/bin/beeline index 09fe366c609f..1bda4dba5060 100755 --- a/bin/beeline +++ b/bin/beeline @@ -17,29 +17,14 @@ # limitations under the License. # -# Figure out where Spark is installed -FWDIR="$(cd `dirname $0`/..; pwd)" +# +# Shell script for starting BeeLine -# Find the java binary -if [ -n "${JAVA_HOME}" ]; then - RUNNER="${JAVA_HOME}/bin/java" -else - if [ `command -v java` ]; then - RUNNER="java" - else - echo "JAVA_HOME is not set" >&2 - exit 1 - fi -fi +# Enter posix mode for bash +set -o posix -# Compute classpath using external script -classpath_output=$($FWDIR/bin/compute-classpath.sh) -if [[ "$?" != "0" ]]; then - echo "$classpath_output" - exit 1 -else - CLASSPATH=$classpath_output -fi +# Figure out where Spark is installed +FWDIR="$(cd `dirname $0`/..; pwd)" CLASS="org.apache.hive.beeline.BeeLine" -exec "$RUNNER" -cp "$CLASSPATH" $CLASS "$@" +exec "$FWDIR/bin/spark-class" $CLASS "$@" diff --git a/bin/pyspark b/bin/pyspark index 39a20e2a24a3..02a709ab71e1 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -27,7 +27,7 @@ SCALA_VERSION=2.10 if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then echo "Usage: ./bin/pyspark [options]" 1>&2 - $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2 + exec "$FWDIR"/bin/spark-submit --help 2>&1 | grep -v "^Usage\|\( spark-submit\)" 1>&2 exit 0 fi diff --git a/bin/spark-shell b/bin/spark-shell index 756c8179d12b..300ab87aff13 100755 --- a/bin/spark-shell +++ b/bin/spark-shell @@ -33,7 +33,7 @@ FWDIR="$(cd `dirname $0`/..; pwd)" if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then echo "Usage: ./bin/spark-shell [options]" - $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2 + exec "$FWDIR"/bin/spark-submit --help 2>&1 | grep -v "^Usage\|\( spark-submit\)" 1>&2 exit 0 fi diff --git a/bin/spark-sql b/bin/spark-sql index bba7f897b19b..f74eb455ea49 100755 --- a/bin/spark-sql +++ b/bin/spark-sql @@ -26,11 +26,18 @@ set -o posix # Figure out where Spark is installed FWDIR="$(cd `dirname $0`/..; pwd)" -if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then - echo "Usage: ./sbin/spark-sql [options]" - $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2 +CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver" + +if [[ "$@" = --help ]] || [[ "$@" = -h ]]; then + echo "Usage: ./sbin/spark-sql [options] [--] [CLI options]" + exec "$FWDIR"/bin/spark-submit --help 2>&1 | grep -v "^Usage\|\( spark-submit\)" 1>&2 + echo + echo "CLI options:" + exec "$FWDIR"/bin/spark-submit\ + --primary spark-internal --class $CLASS\ + -- --help 2>&1 | grep -v -- "-H,--help" | tail -n +3 1>&2 + echo exit 0 fi -CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver" -exec "$FWDIR"/bin/spark-submit --class $CLASS spark-internal $@ +exec "$FWDIR"/bin/spark-submit --class $CLASS --primary spark-internal $@ diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index dd044e629876..486a5378d847 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -311,6 +311,27 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { verbose = true parse(tail) + case ("--primary" | "-p") :: value :: tail => + primaryResource = if (!SparkSubmit.isShell(value) && !SparkSubmit.isInternal(value)) { + Utils.resolveURI(value).toString + } else { + value + } + isPython = SparkSubmit.isPython(value) + parse(tail) + + case "--" :: tail => + if (inSparkOpts) { + // Primary resource is specified with "--primary", "--" is considered as the separator of + // spark-submit options and user application options. + childArgs ++= tail + } else { + // Primary resource is specified as a positional argument, "--" is passed to the + // application as a normal argument. + childArgs += "--" + parse(tail) + } + case value :: tail => if (inSparkOpts) { value match { @@ -322,6 +343,14 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { val errMessage = s"Unrecognized option '$value'." SparkSubmit.printErrorAndExit(errMessage) case v => + if (primaryResource != null) { + // Primary resource has already been specified by --primary. It's more likely that + // user forgot using -- to separate application options from spark-submit options. + SparkSubmit.printErrorAndExit( + s"Unrecognized option '$value', " + + "note that application options must appear after \"--\".") + } + primaryResource = if (!SparkSubmit.isShell(v) && !SparkSubmit.isInternal(v)) { Utils.resolveURI(v).toString @@ -349,7 +378,10 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { outStream.println("Unknown/unsupported param " + unknownParam) } outStream.println( - """Usage: spark-submit [options] [app options] + """Usage: + | spark-submit [options] [app options] + | spark-submit [options] --primary -- [app options] + | |Options: | --master MASTER_URL spark://host:port, mesos://host:port, yarn, or local. | --deploy-mode DEPLOY_MODE Whether to launch the driver program locally ("client") or @@ -377,8 +409,15 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) { | | --executor-memory MEM Memory per executor (e.g. 1000M, 2G) (Default: 1G). | - | --help, -h Show this help message and exit - | --verbose, -v Print additional debug output + | --help, -h Show this help message and exit. + | --verbose, -v Print additional debug output. + | + | --primary The primary jar file or Python file of the application. Used + | in conjunction with "--" to pass arbitrary arguments to the + | application if any. + | -- A "--" signals the end of spark-submit options, everything + | after "--" are passed as command line arguments to the + | application. Only used in conjunction with "--primary". | | Spark standalone with cluster deploy mode only: | --driver-cores NUM Cores for driver (Default: 1). diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index 9190b05e2dba..2813ab20d761 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -70,11 +70,11 @@ class SparkSubmitSuite extends FunSuite with Matchers { } test("prints usage on empty input") { - testPrematureExit(Array[String](), "Usage: spark-submit") + testPrematureExit(Array[String](), "Usage:\n spark-submit") } test("prints usage with only --help") { - testPrematureExit(Array("--help"), "Usage: spark-submit") + testPrematureExit(Array("--help"), "Usage:\n spark-submit") } test("prints error with unrecognized options") { @@ -106,6 +106,50 @@ class SparkSubmitSuite extends FunSuite with Matchers { appArgs.childArgs should be (Seq("some", "--weird", "args")) } + test("handles arguments to user program with --primary and --") { + val clArgs = + """--name myApp + |--class Foo + |--primary userjar.jar + |--master local + |-- + |some + |--weird args + """.stripMargin.split("\\s+").toSeq + val appArgs = new SparkSubmitArguments(clArgs) + appArgs.master should be ("local") + appArgs.mainClass should be ("Foo") + appArgs.childArgs should be (Seq("some", "--weird", "args")) + } + + test("handles arguments to user program with --primary but no --") { + val clArgs = + """--name myApp + |--class Foo + |--primary userjar.jar + |--master local + |some + |--weird args + """.stripMargin.split("\\s+") + testPrematureExit(clArgs, "Unrecognized option 'some'") + } + + test("handles arguments to user program with empty string") { + val clArgs = + """--name myApp + |--class Foo + |--primary userjar.jar + |--master local + |-- + """.stripMargin.split("\\s+").toSeq :+ "" + val appArgs = new SparkSubmitArguments(clArgs) + val xx = clArgs.map(arg => s"[$arg]") + println(s"### $xx") + appArgs.master should be ("local") + appArgs.mainClass should be ("Foo") + appArgs.childArgs should be (Seq("")) + } + test("handles YARN cluster mode") { val clArgs = Seq( "--deploy-mode", "cluster", diff --git a/docs/submitting-applications.md b/docs/submitting-applications.md index 45b70b1a5457..d5efd907ef36 100644 --- a/docs/submitting-applications.md +++ b/docs/submitting-applications.md @@ -35,17 +35,24 @@ dependencies, and can support different cluster managers and deploy modes that S --deploy-mode \ --conf = \ ... # other options - \ + --primary \ + -- \ [application-arguments] {% endhighlight %} +(**NOTE** As of Spark 1.1, `--` is used as a separator for user application options, such that +anything that follows will be passed to the application as command line arguments. This includes +those that were once swallowed by `spark-submit`, e.g. `--help` and `--conf`. Note that this must be +used in conjunction with `--primary` to specify the main application jar. The old way of setting +application jars and passing arguments to applications is still supported.) + Some of the commonly used options are: * `--class`: The entry point for your application (e.g. `org.apache.spark.examples.SparkPi`) * `--master`: The [master URL](#master-urls) for the cluster (e.g. `spark://23.195.26.187:7077`) * `--deploy-mode`: Whether to deploy your driver on the worker nodes (`cluster`) or locally as an external client (`client`) (default: `client`)* * `--conf`: Arbitrary Spark configuration property in key=value format. For values that contain spaces wrap "key=value" in quotes (as shown). -* `application-jar`: Path to a bundled jar including your application and all dependencies. The URL must be globally visible inside of your cluster, for instance, an `hdfs://` path or a `file://` path that is present on all nodes. +* `--primary application-jar`: Path to a bundled jar including your application and all dependencies. The URL must be globally visible inside of your cluster, for instance, an `hdfs://` path or a `file://` path that is present on all nodes. * `application-arguments`: Arguments passed to the main method of your main class, if any *A common deployment strategy is to submit your application from a gateway machine that is @@ -71,7 +78,8 @@ examples of common options: ./bin/spark-submit \ --class org.apache.spark.examples.SparkPi \ --master local[8] \ - /path/to/examples.jar \ + --primary /path/to/examples.jar \ + -- \ 100 # Run on a Spark standalone cluster @@ -80,7 +88,8 @@ examples of common options: --master spark://207.184.161.138:7077 \ --executor-memory 20G \ --total-executor-cores 100 \ - /path/to/examples.jar \ + --primary /path/to/examples.jar \ + -- \ 1000 # Run on a YARN cluster @@ -90,13 +99,15 @@ export HADOOP_CONF_DIR=XXX --master yarn-cluster \ # can also be `yarn-client` for client mode --executor-memory 20G \ --num-executors 50 \ - /path/to/examples.jar \ + --primary /path/to/examples.jar \ + -- \ 1000 # Run a Python application on a cluster ./bin/spark-submit \ --master spark://207.184.161.138:7077 \ - examples/src/main/python/pi.py \ + --primary examples/src/main/python/pi.py \ + -- \ 1000 {% endhighlight %} diff --git a/sbin/start-thriftserver.sh b/sbin/start-thriftserver.sh index 8398e6f19b51..7a2d224dc294 100755 --- a/sbin/start-thriftserver.sh +++ b/sbin/start-thriftserver.sh @@ -26,11 +26,18 @@ set -o posix # Figure out where Spark is installed FWDIR="$(cd `dirname $0`/..; pwd)" -if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then - echo "Usage: ./sbin/start-thriftserver [options]" - $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2 +CLASS="org.apache.spark.sql.hive.thriftserver.HiveThriftServer2" + +if [[ "$@" = --help ]] || [[ "$@" = -h ]]; then + echo "Usage: ./sbin/start-thriftserver.sh [options] [--] [thrift server options]" + exec "$FWDIR"/bin/spark-submit --help 2>&1 | grep -v "^Usage\|\( spark-submit\)" 1>&2 + echo + echo "Thrift server options:" + exec "$FWDIR"/bin/spark-submit\ + --primary spark-internal --class $CLASS\ + -- --help 2>&1 | grep -v usage | tail -n +3 1>&2 + echo exit 0 fi -CLASS="org.apache.spark.sql.hive.thriftserver.HiveThriftServer2" -exec "$FWDIR"/bin/spark-submit --class $CLASS spark-internal $@ +exec "$FWDIR"/bin/spark-submit --class $CLASS --primary spark-internal $@ diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala index ddbc2a79fb51..2361ec51ed4a 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala @@ -40,7 +40,6 @@ private[hive] object HiveThriftServer2 extends Logging { val optionsProcessor = new ServerOptionsProcessor("HiveThriftServer2") if (!optionsProcessor.process(args)) { - logger.warn("Error starting HiveThriftServer2 with given arguments") System.exit(-1) }