diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index d78b9f1b2968..601d24191eec 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -699,8 +699,10 @@ private[spark] object SparkConf extends Logging {
"spark.rpc.message.maxSize" -> Seq(
AlternateConfig("spark.akka.frameSize", "1.6")),
"spark.yarn.jars" -> Seq(
- AlternateConfig("spark.yarn.jar", "2.0"))
- )
+ AlternateConfig("spark.yarn.jar", "2.0")),
+ "spark.yarn.access.hadoopFileSystems" -> Seq(
+ AlternateConfig("spark.yarn.access.namenodes", "2.2"))
+ )
/**
* A view of `configsWithAlternatives` that makes it more efficient to look up deprecated
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index f7513454c785..051f64e1beab 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -276,15 +276,16 @@ To use a custom metrics.properties for the application master and executors, upd
- spark.yarn.access.namenodes |
+ spark.yarn.access.hadoopFileSystems |
(none) |
- A comma-separated list of secure HDFS namenodes your Spark application is going to access. For
- example, spark.yarn.access.namenodes=hdfs://nn1.com:8032,hdfs://nn2.com:8032,
- webhdfs://nn3.com:50070. The Spark application must have access to the namenodes listed
+ A comma-separated list of secure Hadoop filesystems your Spark application is going to access. For
+ example, spark.yarn.access.hadoopFileSystems=hdfs://nn1.com:8032,hdfs://nn2.com:8032,
+ webhdfs://nn3.com:50070. The Spark application must have access to the filesystems listed
and Kerberos must be properly configured to be able to access them (either in the same realm
- or in a trusted realm). Spark acquires security tokens for each of the namenodes so that
- the Spark application can access those remote HDFS clusters.
+ or in a trusted realm). Spark acquires security tokens for each of the filesystems so that
+ the Spark application can access those remote Hadoop filesystems. spark.yarn.access.namenodes
+ is deprecated, please use this instead.
|
@@ -496,10 +497,10 @@ includes a URI of the metadata store in `"hive.metastore.uris`, and
If an application needs to interact with other secure Hadoop filesystems, then
the tokens needed to access these clusters must be explicitly requested at
-launch time. This is done by listing them in the `spark.yarn.access.namenodes` property.
+launch time. This is done by listing them in the `spark.yarn.access.hadoopFileSystems` property.
```
-spark.yarn.access.namenodes hdfs://ireland.example.org:8020/,webhdfs://frankfurt.example.org:50070/
+spark.yarn.access.hadoopFileSystems hdfs://ireland.example.org:8020/,webhdfs://frankfurt.example.org:50070/
```
Spark supports integrating with other security-aware services through Java Services mechanism (see
@@ -574,7 +575,7 @@ spark.yarn.security.credentials.hive.enabled false
spark.yarn.security.credentials.hbase.enabled false
```
-The configuration option `spark.yarn.access.namenodes` must be unset.
+The configuration option `spark.yarn.access.hadoopFileSystems` must be unset.
## Troubleshooting Kerberos
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
index 666cb456a9be..f19a5b22a757 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
@@ -248,6 +248,11 @@ package object config {
.toSequence
.createWithDefault(Nil)
+ private[spark] val FILESYSTEMS_TO_ACCESS = ConfigBuilder("spark.yarn.access.hadoopFileSystems")
+ .doc("Extra Hadoop filesystem URLs for which to request delegation tokens. The filesystem " +
+ "that hosts fs.defaultFS does not need to be listed here.")
+ .fallbackConf(NAMENODES_TO_ACCESS)
+
/* Rolled log aggregation configuration. */
private[spark] val ROLLED_LOG_INCLUDE_PATTERN =
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HadoopFSCredentialProvider.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HadoopFSCredentialProvider.scala
index b4fb4a790adc..f65c886db944 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HadoopFSCredentialProvider.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HadoopFSCredentialProvider.scala
@@ -47,7 +47,7 @@ private[security] class HadoopFSCredentialProvider
// NameNode to access, used to get tokens from different FileSystems
val tmpCreds = new Credentials()
val tokenRenewer = getTokenRenewer(hadoopConf)
- nnsToAccess(hadoopConf, sparkConf).foreach { dst =>
+ hadoopFSsToAccess(hadoopConf, sparkConf).foreach { dst =>
val dstFs = dst.getFileSystem(hadoopConf)
logInfo("getting token for: " + dst)
dstFs.addDelegationTokens(tokenRenewer, tmpCreds)
@@ -80,7 +80,7 @@ private[security] class HadoopFSCredentialProvider
// user as renewer.
sparkConf.get(PRINCIPAL).flatMap { renewer =>
val creds = new Credentials()
- nnsToAccess(hadoopConf, sparkConf).foreach { dst =>
+ hadoopFSsToAccess(hadoopConf, sparkConf).foreach { dst =>
val dstFs = dst.getFileSystem(hadoopConf)
dstFs.addDelegationTokens(renewer, creds)
}
@@ -112,8 +112,8 @@ private[security] class HadoopFSCredentialProvider
delegTokenRenewer
}
- private def nnsToAccess(hadoopConf: Configuration, sparkConf: SparkConf): Set[Path] = {
- sparkConf.get(NAMENODES_TO_ACCESS).map(new Path(_)).toSet +
+ private def hadoopFSsToAccess(hadoopConf: Configuration, sparkConf: SparkConf): Set[Path] = {
+ sparkConf.get(FILESYSTEMS_TO_ACCESS).map(new Path(_)).toSet +
sparkConf.get(STAGING_DIR).map(new Path(_))
.getOrElse(FileSystem.get(hadoopConf).getHomeDirectory)
}