From 196df1c9fa0c423a30f3b118bf1dd58480cb2fee Mon Sep 17 00:00:00 2001 From: DB Tsai Date: Tue, 27 May 2014 16:07:27 -0700 Subject: [PATCH 1/3] Allow users to programmatically set the spark jar. --- .../main/scala/org/apache/spark/deploy/yarn/ClientBase.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala index aeb3f0062df3b..38baa02a02b01 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala @@ -386,7 +386,8 @@ object ClientBase { val LOCAL_SCHEME = "local" val CONF_SPARK_YARN_SECONDARY_JARS = "spark.yarn.secondary.jars" - def getSparkJar = sys.env.get("SPARK_JAR").getOrElse(SparkContext.jarOfClass(this.getClass).head) + def getSparkJar = sys.props.get("SPARK_JAR").orElse(sys.env.get("SPARK_JAR")) + .getOrElse(SparkContext.jarOfClass(this.getClass).head) // Based on code from org.apache.hadoop.mapreduce.v2.util.MRApps def populateHadoopClasspath(conf: Configuration, env: HashMap[String, String]) { From bdff88ac46bff5aea63e23c24d5d5f00a4e83023 Mon Sep 17 00:00:00 2001 From: DB Tsai Date: Thu, 5 Jun 2014 15:43:09 -0700 Subject: [PATCH 2/3] Doc update --- docs/running-on-yarn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 66c330fdee734..22e9385911086 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -30,7 +30,7 @@ System Properties: * `spark.yarn.max.executor.failures`, the maximum number of executor failures before failing the application. Default is the number of executors requested times 2 with minimum of 3. * `spark.yarn.historyServer.address`, the address of the Spark history server (i.e. host.com:18080). The address should not contain a scheme (http://). Defaults to not being set since the history server is an optional service. This address is given to the Yarn ResourceManager when the Spark application finishes to link the application from the ResourceManager UI to the Spark history server UI. -By default, Spark on YARN will use a Spark jar installed locally, but the Spark jar can also be in a world-readable location on HDFS. This allows YARN to cache it on nodes so that it doesn't need to be distributed each time an application runs. To point to a jar on HDFS, export SPARK_JAR=hdfs:///some/path. +By default, Spark on YARN will use a Spark jar installed locally, but the location of Spark jar can also be explicitly specified by users. For example, if the local Spark jar is not correctly in the classpath, users may want to point the location of the jar. Users can also have the Spark jar available in a world-readable location on HDFS, and this allows YARN to cache it on nodes so that it doesn't need to be distributed each time an application runs. To point to a jar, export SPARK_JAR=URLtoJar or programmatically set it through Java Properties with key `SPARK_JAR` and value `URLtoJar`. `URLtoJar` can be in local filesystem like `file:///some/path` or in HDFS like hdfs:///some/path`. # Launching Spark on YARN From a3f0815e84ea4a72bc973f3d32e1b5c9f59c39fb Mon Sep 17 00:00:00 2001 From: DB Tsai Date: Thu, 5 Jun 2014 15:49:40 -0700 Subject: [PATCH 3/3] Update doc --- docs/running-on-yarn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 22e9385911086..dd9643f2e6d31 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -30,7 +30,7 @@ System Properties: * `spark.yarn.max.executor.failures`, the maximum number of executor failures before failing the application. Default is the number of executors requested times 2 with minimum of 3. * `spark.yarn.historyServer.address`, the address of the Spark history server (i.e. host.com:18080). The address should not contain a scheme (http://). Defaults to not being set since the history server is an optional service. This address is given to the Yarn ResourceManager when the Spark application finishes to link the application from the ResourceManager UI to the Spark history server UI. -By default, Spark on YARN will use a Spark jar installed locally, but the location of Spark jar can also be explicitly specified by users. For example, if the local Spark jar is not correctly in the classpath, users may want to point the location of the jar. Users can also have the Spark jar available in a world-readable location on HDFS, and this allows YARN to cache it on nodes so that it doesn't need to be distributed each time an application runs. To point to a jar, export SPARK_JAR=URLtoJar or programmatically set it through Java Properties with key `SPARK_JAR` and value `URLtoJar`. `URLtoJar` can be in local filesystem like `file:///some/path` or in HDFS like hdfs:///some/path`. +By default, Spark on YARN will use a Spark jar installed locally, but the location of Spark jar can also be explicitly specified by users. For example, if the local Spark jar is not correctly in the classpath, users may want to point the location of the jar. Users can also have the Spark jar available in a world-readable location on HDFS, and this allows YARN to cache it on nodes so that it doesn't need to be distributed each time an application runs. To point to a jar, export SPARK_JAR=URLtoJar or programmatically set it through JVM system properties with key `SPARK_JAR` and value `URLtoJar`. `URLtoJar` can be in local filesystem like `file:///some/path` or in HDFS like hdfs:///some/path`. # Launching Spark on YARN