Skip to content

Commit bce0897

Browse files
witgotgravescs
authored andcommitted
[SPARK-2051]In yarn.ClientBase spark.yarn.dist.* do not work
Author: witgo <[email protected]> Closes apache#969 from witgo/yarn_ClientBase and squashes the following commits: 8117765 [witgo] review commit 3bdbc52 [witgo] Merge branch 'master' of https://github.com/apache/spark into yarn_ClientBase 5261b6c [witgo] fix sys.props.get("SPARK_YARN_DIST_FILES") e3c1107 [witgo] update docs b6a9aa1 [witgo] merge master c8b4554 [witgo] review commit 2f48789 [witgo] Merge branch 'master' of https://github.com/apache/spark into yarn_ClientBase 8d7b82f [witgo] Merge branch 'master' of https://github.com/apache/spark into yarn_ClientBase 1048549 [witgo] remove Utils.resolveURIs 871f1db [witgo] add spark.yarn.dist.* documentation 41bce59 [witgo] review commit 35d6fa0 [witgo] move to ClientArguments 55d72fc [witgo] Merge branch 'master' of https://github.com/apache/spark into yarn_ClientBase 9cdff16 [witgo] review commit 8bc2f4b [witgo] review commit 20e667c [witgo] Merge branch 'master' into yarn_ClientBase 0961151 [witgo] merge master ce609fc [witgo] Merge branch 'master' into yarn_ClientBase 8362489 [witgo] yarn.ClientBase spark.yarn.dist.* do not work
1 parent 67fca18 commit bce0897

File tree

4 files changed

+33
-9
lines changed

4 files changed

+33
-9
lines changed

docs/running-on-yarn.md

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,15 +68,29 @@ Most of the configs are the same for Spark on YARN as for other deployment modes
6868
</td>
6969
</tr>
7070
<tr>
71-
<td><code>spark.yarn.executor.memoryOverhead</code></td>
72-
<td>384</code></td>
71+
<td><code>spark.yarn.dist.archives</code></td>
72+
<td>(none)</td>
73+
<td>
74+
Comma separated list of archives to be extracted into the working directory of each executor.
75+
</td>
76+
</tr>
77+
<tr>
78+
<td><code>spark.yarn.dist.files</code></td>
79+
<td>(none)</td>
80+
<td>
81+
Comma-separated list of files to be placed in the working directory of each executor.
82+
<td>
83+
</tr>
84+
<tr>
85+
<td><code>spark.yarn.executor.memoryOverhead</code></td>
86+
<td>384</td>
7387
<td>
7488
The amount of off heap memory (in megabytes) to be allocated per executor. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc.
7589
</td>
7690
</tr>
7791
<tr>
7892
<td><code>spark.yarn.driver.memoryOverhead</code></td>
79-
<td>384</code></td>
93+
<td>384</td>
8094
<td>
8195
The amount of off heap memory (in megabytes) to be allocated per driver. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc.
8296
</td>

yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,7 @@ import scala.collection.mutable.{ArrayBuffer, HashMap}
2121

2222
import org.apache.spark.SparkConf
2323
import org.apache.spark.scheduler.InputFormatInfo
24-
import org.apache.spark.util.IntParam
25-
import org.apache.spark.util.MemoryParam
24+
import org.apache.spark.util.{Utils, IntParam, MemoryParam}
2625

2726

2827
// TODO: Add code and support for ensuring that yarn resource 'tasks' are location aware !
@@ -45,6 +44,18 @@ class ClientArguments(val args: Array[String], val sparkConf: SparkConf) {
4544

4645
parseArgs(args.toList)
4746

47+
// env variable SPARK_YARN_DIST_ARCHIVES/SPARK_YARN_DIST_FILES set in yarn-client then
48+
// it should default to hdfs://
49+
files = Option(files).getOrElse(sys.env.get("SPARK_YARN_DIST_FILES").orNull)
50+
archives = Option(archives).getOrElse(sys.env.get("SPARK_YARN_DIST_ARCHIVES").orNull)
51+
52+
// spark.yarn.dist.archives/spark.yarn.dist.files defaults to use file:// if not specified,
53+
// for both yarn-client and yarn-cluster
54+
files = Option(files).getOrElse(sparkConf.getOption("spark.yarn.dist.files").
55+
map(p => Utils.resolveURIs(p)).orNull)
56+
archives = Option(archives).getOrElse(sparkConf.getOption("spark.yarn.dist.archives").
57+
map(p => Utils.resolveURIs(p)).orNull)
58+
4859
private def parseArgs(inputArgs: List[String]): Unit = {
4960
val userArgsBuffer: ArrayBuffer[String] = new ArrayBuffer[String]()
5061
val inputFormatMap: HashMap[String, InputFormatInfo] = new HashMap[String, InputFormatInfo]()

yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ trait ClientBase extends Logging {
162162
val fs = FileSystem.get(conf)
163163
val remoteFs = originalPath.getFileSystem(conf)
164164
var newPath = originalPath
165-
if (! compareFs(remoteFs, fs)) {
165+
if (!compareFs(remoteFs, fs)) {
166166
newPath = new Path(dstDir, originalPath.getName())
167167
logInfo("Uploading " + originalPath + " to " + newPath)
168168
FileUtil.copy(remoteFs, originalPath, fs, newPath, false, conf)
@@ -250,6 +250,7 @@ trait ClientBase extends Logging {
250250
}
251251
}
252252
}
253+
logInfo("Prepared Local resources " + localResources)
253254
sparkConf.set(ClientBase.CONF_SPARK_YARN_SECONDARY_JARS, cachedSecondaryJarLinks.mkString(","))
254255

255256
UserGroupInformation.getCurrentUser().addCredentials(credentials)

yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,7 @@ private[spark] class YarnClientSchedulerBackend(
7070
("--executor-cores", "SPARK_WORKER_CORES", "spark.executor.cores"),
7171
("--executor-cores", "SPARK_EXECUTOR_CORES", "spark.executor.cores"),
7272
("--queue", "SPARK_YARN_QUEUE", "spark.yarn.queue"),
73-
("--name", "SPARK_YARN_APP_NAME", "spark.app.name"),
74-
("--files", "SPARK_YARN_DIST_FILES", "spark.yarn.dist.files"),
75-
("--archives", "SPARK_YARN_DIST_ARCHIVES", "spark.yarn.dist.archives"))
73+
("--name", "SPARK_YARN_APP_NAME", "spark.app.name"))
7674
.foreach { case (optName, envVar, sysProp) => addArg(optName, envVar, sysProp, argsArrayBuf) }
7775

7876
logDebug("ClientArguments called with: " + argsArrayBuf)

0 commit comments

Comments
 (0)