Use token.renew to get token's renewal interval rather than using hdfs-site.xml

harishreedharan · harishreedharan · commit 09fe2244f82b · 2015-04-28T18:31:56.000-07:00
diff --git a/core/src/main/scala/org/apache/spark/deploy/ExecutorDelegationTokenUpdater.scala b/core/src/main/scala/org/apache/spark/deploy/ExecutorDelegationTokenUpdater.scala
@@ -70,7 +70,7 @@ private[spark] class ExecutorDelegationTokenUpdater(
       }
       val timeFromNowToRenewal =
         SparkHadoopUtil.get.getTimeFromNowToRenewal(
-          0.8, UserGroupInformation.getCurrentUser.getCredentials)
+          sparkConf, 0.8, UserGroupInformation.getCurrentUser.getCredentials)
       if (timeFromNowToRenewal <= 0) {
         executorUpdaterRunnable.run()
       } else {
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -31,7 +31,7 @@ import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.mapreduce.JobContext
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 
-import org.apache.spark._
+import org.apache.spark.{Logging, SparkConf, SparkException}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.util.Utils
 
@@ -47,8 +47,6 @@ class SparkHadoopUtil extends Logging {
   private val sparkConf = new SparkConf()
   val conf: Configuration = newConfiguration(sparkConf)
   UserGroupInformation.setConfiguration(conf)
-  private lazy val renewalInterval =
-    conf.getLong("dfs.namenode.delegation.token.renew-interval", (24 hours).toMillis)
 
   /**
    * Runs the given function with a Hadoop UserGroupInformation as a thread local variable
@@ -213,9 +211,6 @@ class SparkHadoopUtil extends Logging {
    * Lists all the files in a directory with the specified prefix, and does not end with the
    * given suffix. The returned {{FileStatus}} instances are sorted by the modification times of
    * the respective files.
-   * @param remoteFs
-   * @param prefix
-   * @return
    */
   def listFilesSorted(
       remoteFs: FileSystem,
@@ -242,8 +237,12 @@ class SparkHadoopUtil extends Logging {
    * is valid the latest)?
    * This will return -ve (or 0) value if the fraction of validity has already expired.
    */
-  def getTimeFromNowToRenewal(fraction: Double, credentials: Credentials): Long = {
+  def getTimeFromNowToRenewal(
+      sparkConf: SparkConf,
+      fraction: Double,
+      credentials: Credentials): Long = {
     val now = System.currentTimeMillis()
+    val renewalInterval = sparkConf.getLong("spark.yarn.renewal.interval", (24 hours).toMillis)
     credentials.getAllTokens.filter(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
       .map { t =>
       val identifier = new DelegationTokenIdentifier()
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -17,15 +17,12 @@
 
 package org.apache.spark.executor
 
-import java.io.{ByteArrayInputStream, DataInputStream}
 import java.net.URL
 import java.nio.ByteBuffer
 
 import scala.collection.mutable
 import scala.util.{Failure, Success}
 
-import org.apache.hadoop.security.{Credentials, UserGroupInformation}
-
 import org.apache.spark.rpc._
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
@@ -77,7 +74,7 @@ private[spark] class CoarseGrainedExecutorBackend(
   }
 
   override def receive: PartialFunction[Any, Unit] = {
-    case RegisteredExecutor=>
+    case RegisteredExecutor =>
       logInfo("Successfully registered with driver")
       val (hostname, _) = Utils.parseHostPort(hostPort)
       executor = new Executor(executorId, hostname, env, userClassPath, isLocal = false)
@@ -195,7 +192,6 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
       workerUrl.foreach { url =>
         env.rpcEnv.setupEndpoint("WorkerWatcher", new WorkerWatcher(env.rpcEnv, url))
       }
-      env.actorSystem.awaitTermination()
       tokenUpdaterOption.foreach(_.stop())
       env.rpcEnv.awaitTermination()
     }
diff --git a/docs/security.md b/docs/security.md
@@ -32,7 +32,7 @@ SSL must be configured on each node and configured for each component involved i
 ### YARN mode
 The key-store can be prepared on the client side and then distributed and used by the executors as the part of the application. It is possible because the user is able to deploy files before the application is started in YARN by using `spark.yarn.dist.files` or `spark.yarn.dist.archives` configuration settings. The responsibility for encryption of transferring these files is on YARN side and has nothing to do with Spark.
 
-For long-running apps like Spark Streaming apps to be able to write to HDFS, it is possible to pass a principal and keytab to `spark-submit` via the `--principal` and `--keytab` parameters respectively. The keytab passed in will be copied over to the machine running the Application Master via the Hadoop Distributed Cache (securely - if YARN is configured with SSL and HDFS encryption is enabled). The Kerberos login will be periodically renewed using this principal and keytab and the delegation tokens required for HDFS will be generated periodically so the application can continue writing to HDFS. Please note that the HDFS client configuration file, `hdfs-site.xml` on each executor node must have the value of `dfs.namenode.delegation.token.renew-interval` be the same as it is on the HDFS Namenode for this functionality.
+For long-running apps like Spark Streaming apps to be able to write to HDFS, it is possible to pass a principal and keytab to `spark-submit` via the `--principal` and `--keytab` parameters respectively. The keytab passed in will be copied over to the machine running the Application Master via the Hadoop Distributed Cache (securely - if YARN is configured with SSL and HDFS encryption is enabled). The Kerberos login will be periodically renewed using this principal and keytab and the delegation tokens required for HDFS will be generated periodically so the application can continue writing to HDFS.
 
 ### Standalone mode
 The user needs to provide key-stores and configuration options for master and workers. They have to be set by attaching appropriate Java system properties in `SPARK_MASTER_OPTS` and in `SPARK_WORKER_OPTS` environment variables, or just in `SPARK_DAEMON_JAVA_OPTS`. In this mode, the user may allow the executors to use the SSL settings inherited from the worker which spawned that executor. It can be accomplished by setting `spark.ssl.useNodeLocalConf` to `true`. If that parameter is set, the settings provided by user on the client side, are not used by the executors.
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
@@ -32,12 +32,12 @@ import org.apache.spark.util.ThreadUtils
  * Streaming apps can run without interruption while writing to secure HDFS. The
  * scheduleLoginFromKeytab method is called on the driver when the
  * CoarseGrainedScheduledBackend starts up. This method wakes up a thread that logs into the KDC
- * once 75% of the expiry time of the original delegation tokens used for the container
+ * once 75% of the renewal interval of the original delegation tokens used for the container
  * has elapsed. It then creates new delegation tokens and writes them to HDFS in a
  * pre-specified location - the prefix of which is specified in the sparkConf by
  * spark.yarn.credentials.file (so the file(s) would be named c-1, c-2 etc. - each update goes
  * to a new file, with a monotonically increasing suffix). After this, the credentials are
- * updated once 75% of the new tokens validity has elapsed.
+ * updated once 75% of the new tokens renewal interval has elapsed.
  *
  * On the executor side, the updateCredentialsIfRequired method is called once 80% of the
  * validity of the original tokens has elapsed. At that time the executor finds the
@@ -72,13 +72,12 @@ private[yarn] class AMDelegationTokenRenewer(
     val keytab = sparkConf.get("spark.yarn.keytab")
 
     /**
-     * Schedule the renewal of the tokens. If tokens have already expired, this method will
-     * synchronously renew them.
-     * @param runnable
+     * Schedule re-login and creation of new tokens. If tokens have already expired, this method
+     * will synchronously create new ones.
      */
     def scheduleRenewal(runnable: Runnable): Unit = {
       val credentials = UserGroupInformation.getCurrentUser.getCredentials
-      val renewalInterval = hadoopUtil.getTimeFromNowToRenewal(0.75, credentials)
+      val renewalInterval = hadoopUtil.getTimeFromNowToRenewal(sparkConf, 0.75, credentials)
       // Run now!
       if (renewalInterval <= 0) {
         logInfo("HDFS tokens have expired, creating new tokens now.")
@@ -164,7 +163,7 @@ private[yarn] class AMDelegationTokenRenewer(
       // Get a copy of the credentials
       override def run(): Void = {
         val nns = YarnSparkHadoopUtil.get.getNameNodesToAccess(sparkConf) + dst
-        hadoopUtil.obtainTokensForNamenodes(nns, hadoopConf, tempCreds, replaceExisting = true)
+        hadoopUtil.obtainTokensForNamenodes(nns, hadoopConf, tempCreds)
         null
       }
     })
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -283,9 +283,6 @@ private[spark] class ApplicationMaster(
     rpcEnv = RpcEnv.create("sparkYarnAM", Utils.localHostName, 0, sparkConf, securityMgr)
     waitForSparkDriver()
     addAmIpFilter()
-    // If a principal and keytab have been set, use that to create new credentials for executors
-    // periodically
-    delegationTokenRenewerOption.foreach(_.scheduleLoginFromKeytab())
     registerAM(sparkConf.get("spark.driver.appUIAddress", ""), securityMgr)
 
     // In client mode the actor will stop the reporter thread.
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -17,9 +17,10 @@
 
 package org.apache.spark.deploy.yarn
 
-import java.io.{File, FileOutputStream}
+import java.io.{ByteArrayInputStream, DataInputStream, File, FileOutputStream}
 import java.net.{InetAddress, UnknownHostException, URI, URISyntaxException}
 import java.nio.ByteBuffer
+import java.security.PrivilegedExceptionAction
 import java.util.UUID
 import java.util.zip.{ZipEntry, ZipOutputStream}
 
@@ -402,6 +403,27 @@ private[spark] class Client(
     }
   }
 
+  /**
+   * Get the renewal interval for tokens.
+   */
+  private def getTokenRenewalInterval(stagingDirPath: Path): Long = {
+    // We cannot use the tokens generated above since those have renewer yarn. Trying to renew
+    // those will fail with an access control issue. So create new tokens with the logged in
+    // user as renewer.
+    val creds = new Credentials()
+    YarnSparkHadoopUtil.get.obtainTokensForNamenodes(Set(stagingDirPath), hadoopConf, creds,
+      Some(sparkConf.get("spark.yarn.principal")))
+    val t = creds.getAllTokens
+      .filter(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
+      .head
+    val newExpiration = t.renew(hadoopConf)
+    val identifier = new DelegationTokenIdentifier()
+    identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier)))
+    val interval = newExpiration - identifier.getIssueDate
+    logInfo(s"Renewal Interval set to $interval")
+    interval
+  }
+
   /**
    * Set up the environment for launching our ApplicationMaster container.
    */
@@ -420,8 +442,9 @@ private[spark] class Client(
       sparkConf.set(
         "spark.yarn.credentials.file", new Path(stagingDirPath, credentialsFile).toString)
       logInfo(s"Credentials file set to: $credentialsFile")
+      val renewalInterval = getTokenRenewalInterval(stagingDirPath)
+      sparkConf.set("spark.yarn.renewal.interval", renewalInterval.toString)
     }
-
     // Set the environment variables to be passed on to the executors.
     distCacheMgr.setDistFilesEnv(env)
     distCacheMgr.setDistArchivesEnv(env)
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -21,9 +21,6 @@ import java.io._
 import java.util.regex.Matcher
 import java.util.regex.Pattern
 
-import org.apache.hadoop.security.token.TokenIdentifier
-
-import scala.collection.JavaConversions._
 import scala.collection.mutable.HashMap
 import scala.util.Try
 
@@ -116,10 +113,10 @@ class YarnSparkHadoopUtil extends SparkHadoopUtil {
     paths: Set[Path],
     conf: Configuration,
     creds: Credentials,
-    replaceExisting: Boolean = false
+    renewer: Option[String] = None
   ): Unit = {
     if (UserGroupInformation.isSecurityEnabled()) {
-      val delegTokenRenewer = getTokenRenewer(conf)
+      val delegTokenRenewer = renewer.getOrElse(getTokenRenewer(conf))
       paths.foreach { dst =>
         val dstFs = dst.getFileSystem(conf)
         logInfo("getting token for namenode: " + dst)

Original file line number	Diff line number	Diff line change
`@@ -70,7 +70,7 @@ private[spark] class ExecutorDelegationTokenUpdater(`
`70`	`70`	`}`
`71`	`71`	`val timeFromNowToRenewal =`
`72`	`72`	`SparkHadoopUtil.get.getTimeFromNowToRenewal(`
`73`		`- 0.8, UserGroupInformation.getCurrentUser.getCredentials)`
	`73`	`+ sparkConf, 0.8, UserGroupInformation.getCurrentUser.getCredentials)`
`74`	`74`	`if (timeFromNowToRenewal <= 0) {`
`75`	`75`	`executorUpdaterRunnable.run()`
`76`	`76`	`} else {`