diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 4243ef480ba39..fecd8f2cc2d48 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -68,15 +68,29 @@ Most of the configs are the same for Spark on YARN as for other deployment modes
- spark.yarn.executor.memoryOverhead |
- 384 |
+ spark.yarn.dist.archives |
+ (none) |
+
+ Comma separated list of archives to be extracted into the working directory of each executor.
+ |
+
+
+ spark.yarn.dist.files |
+ (none) |
+
+ Comma-separated list of files to be placed in the working directory of each executor.
+ |
+ |
+
+ spark.yarn.executor.memoryOverhead |
+ 384 |
The amount of off heap memory (in megabytes) to be allocated per executor. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc.
|
spark.yarn.driver.memoryOverhead |
- 384 |
+ 384 |
The amount of off heap memory (in megabytes) to be allocated per driver. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc.
|
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
index fd3ef9e1fa2de..62f9b3cf5ab88 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
@@ -21,8 +21,7 @@ import scala.collection.mutable.{ArrayBuffer, HashMap}
import org.apache.spark.SparkConf
import org.apache.spark.scheduler.InputFormatInfo
-import org.apache.spark.util.IntParam
-import org.apache.spark.util.MemoryParam
+import org.apache.spark.util.{Utils, IntParam, MemoryParam}
// TODO: Add code and support for ensuring that yarn resource 'tasks' are location aware !
@@ -45,6 +44,18 @@ class ClientArguments(val args: Array[String], val sparkConf: SparkConf) {
parseArgs(args.toList)
+ // env variable SPARK_YARN_DIST_ARCHIVES/SPARK_YARN_DIST_FILES set in yarn-client then
+ // it should default to hdfs://
+ files = Option(files).getOrElse(sys.env.get("SPARK_YARN_DIST_FILES").orNull)
+ archives = Option(archives).getOrElse(sys.env.get("SPARK_YARN_DIST_ARCHIVES").orNull)
+
+ // spark.yarn.dist.archives/spark.yarn.dist.files defaults to use file:// if not specified,
+ // for both yarn-client and yarn-cluster
+ files = Option(files).getOrElse(sparkConf.getOption("spark.yarn.dist.files").
+ map(p => Utils.resolveURIs(p)).orNull)
+ archives = Option(archives).getOrElse(sparkConf.getOption("spark.yarn.dist.archives").
+ map(p => Utils.resolveURIs(p)).orNull)
+
private def parseArgs(inputArgs: List[String]): Unit = {
val userArgsBuffer: ArrayBuffer[String] = new ArrayBuffer[String]()
val inputFormatMap: HashMap[String, InputFormatInfo] = new HashMap[String, InputFormatInfo]()
diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
index 858bcaa95b409..8f2267599914c 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
@@ -162,7 +162,7 @@ trait ClientBase extends Logging {
val fs = FileSystem.get(conf)
val remoteFs = originalPath.getFileSystem(conf)
var newPath = originalPath
- if (! compareFs(remoteFs, fs)) {
+ if (!compareFs(remoteFs, fs)) {
newPath = new Path(dstDir, originalPath.getName())
logInfo("Uploading " + originalPath + " to " + newPath)
FileUtil.copy(remoteFs, originalPath, fs, newPath, false, conf)
@@ -250,6 +250,7 @@ trait ClientBase extends Logging {
}
}
}
+ logInfo("Prepared Local resources " + localResources)
sparkConf.set(ClientBase.CONF_SPARK_YARN_SECONDARY_JARS, cachedSecondaryJarLinks.mkString(","))
UserGroupInformation.getCurrentUser().addCredentials(credentials)
diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index 039cf4f276119..412dfe38d55eb 100644
--- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -70,9 +70,7 @@ private[spark] class YarnClientSchedulerBackend(
("--executor-cores", "SPARK_WORKER_CORES", "spark.executor.cores"),
("--executor-cores", "SPARK_EXECUTOR_CORES", "spark.executor.cores"),
("--queue", "SPARK_YARN_QUEUE", "spark.yarn.queue"),
- ("--name", "SPARK_YARN_APP_NAME", "spark.app.name"),
- ("--files", "SPARK_YARN_DIST_FILES", "spark.yarn.dist.files"),
- ("--archives", "SPARK_YARN_DIST_ARCHIVES", "spark.yarn.dist.archives"))
+ ("--name", "SPARK_YARN_APP_NAME", "spark.app.name"))
.foreach { case (optName, envVar, sysProp) => addArg(optName, envVar, sysProp, argsArrayBuf) }
logDebug("ClientArguments called with: " + argsArrayBuf)