Remove RPC part. Refactor and move methods around, use renewal interval rather than max lifetime to create new tokens.

harishreedharan · harishreedharan · commit 42eead40369d · 2015-04-27T17:22:16.000-07:00
diff --git a/core/src/main/scala/org/apache/spark/deploy/ExecutorDelegationTokenUpdater.scala b/core/src/main/scala/org/apache/spark/deploy/ExecutorDelegationTokenUpdater.scala
@@ -25,6 +25,8 @@ import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 import org.apache.spark.{Logging, SparkConf}
 import org.apache.spark.util.Utils
 
+import scala.util.control.NonFatal
+
 private[spark] class ExecutorDelegationTokenUpdater(
     sparkConf: SparkConf,
     hadoopConf: Configuration) extends Logging {
@@ -45,39 +47,41 @@ private[spark] class ExecutorDelegationTokenUpdater(
 
   def updateCredentialsIfRequired(): Unit = {
     try {
-      val credentials = UserGroupInformation.getCurrentUser.getCredentials
       val credentialsFilePath = new Path(credentialsFile)
       val remoteFs = FileSystem.get(hadoopConf)
       SparkHadoopUtil.get.listFilesSorted(
-        remoteFs, credentialsFilePath.getParent, credentialsFilePath.getName, ".tmp")
-        .lastOption
-        .foreach { credentialsStatus =>
-        val suffix = getSuffixForCredentialsPath(credentialsStatus)
+        remoteFs, credentialsFilePath.getParent,
+        credentialsFilePath.getName, SparkHadoopUtil.SPARK_YARN_CREDS_TEMP_EXTENSION)
+        .lastOption.foreach { credentialsStatus =>
+        val suffix = SparkHadoopUtil.get.getSuffixForCredentialsPath(credentialsStatus.getPath)
         if (suffix > lastCredentialsFileSuffix) {
           logInfo("Reading new delegation tokens from " + credentialsStatus.getPath)
           val newCredentials = getCredentialsFromHDFSFile(remoteFs, credentialsStatus.getPath)
           lastCredentialsFileSuffix = suffix
           UserGroupInformation.getCurrentUser.addCredentials(newCredentials)
-          val totalValidity = SparkHadoopUtil.get.getLatestTokenValidity(credentials) -
-            credentialsStatus.getModificationTime
-          val timeToRunRenewal =
-            credentialsStatus.getModificationTime + (0.8 * totalValidity).toLong
-          val timeFromNowToRenewal = timeToRunRenewal - System.currentTimeMillis()
-          logInfo("Updated delegation tokens, will check for new tokens in " +
-            timeFromNowToRenewal + " millis")
-          delegationTokenRenewer.schedule(
-            executorUpdaterRunnable, timeFromNowToRenewal, TimeUnit.MILLISECONDS)
+          logInfo("Tokens updated from credentials file.")
         } else {
           // Check every hour to see if new credentials arrived.
           logInfo("Updated delegation tokens were expected, but the driver has not updated the " +
             "tokens yet, will check again in an hour.")
           delegationTokenRenewer.schedule(executorUpdaterRunnable, 1, TimeUnit.HOURS)
+          return
         }
       }
+      val timeFromNowToRenewal =
+        SparkHadoopUtil.get.getTimeFromNowToRenewal(
+          0.8, UserGroupInformation.getCurrentUser.getCredentials)
+      if (timeFromNowToRenewal <= 0) {
+        executorUpdaterRunnable.run()
+      } else {
+        logInfo(s"Scheduling token refresh from HDFS in $timeFromNowToRenewal millis.")
+        delegationTokenRenewer.schedule(
+          executorUpdaterRunnable, timeFromNowToRenewal, TimeUnit.MILLISECONDS)
+      }
     } catch {
       // Since the file may get deleted while we are reading it, catch the Exception and come
       // back in an hour to try again
-      case e: Exception =>
+      case NonFatal(e) =>
         logWarning("Error while trying to update credentials, will try again in 1 hour", e)
         delegationTokenRenewer.schedule(executorUpdaterRunnable, 1, TimeUnit.HOURS)
     }
@@ -87,7 +91,7 @@ private[spark] class ExecutorDelegationTokenUpdater(
     val stream = remoteFs.open(tokenPath)
     try {
       val newCredentials = new Credentials()
-      newCredentials.readFields(stream)
+      newCredentials.readTokenStorageStream(stream)
       newCredentials
     } finally {
       stream.close()
@@ -98,8 +102,4 @@ private[spark] class ExecutorDelegationTokenUpdater(
     delegationTokenRenewer.shutdown()
   }
 
-  private def getSuffixForCredentialsPath(credentialsStatus: FileStatus): Int = {
-    val fileName = credentialsStatus.getPath.getName
-    fileName.substring(fileName.lastIndexOf("-") + 1).toInt
-  }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -36,6 +36,7 @@ import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.util.Utils
 
 import scala.collection.JavaConversions._
+import scala.concurrent.duration._
 
 /**
  * :: DeveloperApi ::
@@ -46,6 +47,8 @@ class SparkHadoopUtil extends Logging {
   private val sparkConf = new SparkConf()
   val conf: Configuration = newConfiguration(sparkConf)
   UserGroupInformation.setConfiguration(conf)
+  private lazy val renewalInterval =
+    conf.getLong("dfs.namenode.delegation.token.renew-interval", (24 hours).toMillis)
 
   /**
    * Runs the given function with a Hadoop UserGroupInformation as a thread local variable
@@ -235,19 +238,28 @@ class SparkHadoopUtil extends Logging {
   }
 
   /**
-   * Get the latest validity of the HDFS token in the Credentials object.
-   * @param credentials
-   * @return
+   * How much time is remaining (in millis) from now to (fraction * renewal time for the token that
+   * is valid the latest)?
+   * This will return -ve (or 0) value if the fraction of validity has already expired.
    */
-  def getLatestTokenValidity(credentials: Credentials): Long = {
+  def getTimeFromNowToRenewal(fraction: Double, credentials: Credentials): Long = {
+    val now = System.currentTimeMillis()
     credentials.getAllTokens.filter(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
       .map { t =>
       val identifier = new DelegationTokenIdentifier()
       identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier)))
-      identifier.getMaxDate
+      (identifier.getIssueDate + fraction * renewalInterval).toLong - now
     }.foldLeft(0L)(math.max)
   }
 
+
+  private[spark] def getSuffixForCredentialsPath(credentialsPath: Path): Int = {
+    val fileName = credentialsPath.getName
+    fileName.substring(
+      fileName.lastIndexOf(SparkHadoopUtil.SPARK_YARN_CREDS_COUNTER_DELIM) + 1).toInt
+  }
+
+
   private val HADOOP_CONF_PATTERN = "(\\$\\{hadoopconf-[^\\}\\$\\s]+\\})".r.unanchored
 
   /**
@@ -298,6 +310,10 @@ object SparkHadoopUtil {
     }
   }
 
+  val SPARK_YARN_CREDS_TEMP_EXTENSION = ".tmp"
+
+  val SPARK_YARN_CREDS_COUNTER_DELIM = "-"
+
   def get: SparkHadoopUtil = {
     hadoop
   }
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -55,7 +55,7 @@ private[spark] class CoarseGrainedExecutorBackend(
     logInfo("Connecting to driver: " + driverUrl)
     rpcEnv.asyncSetupEndpointRefByURI(driverUrl).flatMap { ref =>
       driver = Some(ref)
-      ref.sendWithReply[RegisteredExecutor](
+      ref.sendWithReply[RegisteredExecutor.type](
         RegisterExecutor(executorId, self, hostPort, cores, extractLogUrls))
     } onComplete {
       case Success(msg) => Utils.tryLogNonFatalError {
@@ -72,17 +72,10 @@ private[spark] class CoarseGrainedExecutorBackend(
   }
 
   override def receive: PartialFunction[Any, Unit] = {
-    case RegisteredExecutor(tokens) =>
+    case RegisteredExecutor=>
       logInfo("Successfully registered with driver")
       val (hostname, _) = Utils.parseHostPort(hostPort)
       executor = new Executor(executorId, hostname, env, userClassPath, isLocal = false)
-      tokens.foreach { tokenBuffer =>
-        val inStream = new DataInputStream(new ByteArrayInputStream(tokenBuffer.value.array()))
-        val creds = new Credentials()
-        creds.readFields(inStream)
-        inStream.close()
-        UserGroupInformation.getCurrentUser.addCredentials(creds)
-      }
 
     case RegisterExecutorFailed(message) =>
       logError("Slave registration failed: " + message)
@@ -175,7 +168,9 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
         }
       }
       var tokenUpdaterOption: Option[ExecutorDelegationTokenUpdater] = None
-      if(driverConf.contains("spark.yarn.credentials.file")) {
+      if (driverConf.contains("spark.yarn.credentials.file")) {
+        logInfo("Will periodically update credentials from: " +
+          driverConf.get("spark.yarn.credentials.file"))
         // Periodically update the credentials for this user to ensure HDFS tokens get updated.
         tokenUpdaterOption =
           Some(new ExecutorDelegationTokenUpdater(driverConf, SparkHadoopUtil.get.conf))
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
@@ -35,10 +35,7 @@ private[spark] object CoarseGrainedClusterMessages {
   case class KillTask(taskId: Long, executor: String, interruptThread: Boolean)
     extends CoarseGrainedClusterMessage
 
-  case class RegisteredExecutor(tokens: Option[SerializableBuffer])
-    extends CoarseGrainedClusterMessage
-
-  case class NewTokens(tokens: SerializableBuffer) extends CoarseGrainedClusterMessage
+  case object RegisteredExecutor extends CoarseGrainedClusterMessage
 
   case class RegisterExecutorFailed(message: String) extends CoarseGrainedClusterMessage
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -66,7 +66,6 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   // Executors we have requested the cluster manager to kill that have not died yet
   private val executorsPendingToRemove = new HashSet[String]
 
-  private var latestTokens: Option[SerializableBuffer] = None
   class DriverEndpoint(override val rpcEnv: RpcEnv, sparkProperties: Seq[(String, String)])
     extends ThreadSafeRpcEndpoint with Logging {
 
@@ -115,7 +114,6 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
             logWarning(s"Attempted to kill task $taskId for unknown executor $executorId.")
         }
 
-      case NewTokens(tokens) => latestTokens = Option(tokens)
     }
 
     override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
@@ -125,7 +123,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
           context.reply(RegisterExecutorFailed("Duplicate executor ID: " + executorId))
         } else {
           logInfo("Registered executor: " + executorRef + " with ID " + executorId)
-          context.reply(RegisteredExecutor(latestTokens))
+          context.reply(RegisteredExecutor)
           addressToExecutorId(executorRef.address) = executorId
           totalCoreCount.addAndGet(cores)
           totalRegisteredExecutors.addAndGet(1)
diff --git a/docs/security.md b/docs/security.md
@@ -32,7 +32,7 @@ SSL must be configured on each node and configured for each component involved i
 ### YARN mode
 The key-store can be prepared on the client side and then distributed and used by the executors as the part of the application. It is possible because the user is able to deploy files before the application is started in YARN by using `spark.yarn.dist.files` or `spark.yarn.dist.archives` configuration settings. The responsibility for encryption of transferring these files is on YARN side and has nothing to do with Spark.
 
-For long-running apps like Spark Streaming apps to be able to write to HDFS, it is possible to pass a principal and keytab to `spark-submit` via the `--principal` and `--keytab` parameters respectively. The keytab passed in will be copied over to the machine running the Application Master via the Hadoop Distributed Cache (securely - if YARN is configured with SSL and HDFS encryption is enabled). The Kerberos login will be periodically renewed using this principal and keytab and the delegation tokens required for HDFS will be generated periodically so the application can continue writing to HDFS.
+For long-running apps like Spark Streaming apps to be able to write to HDFS, it is possible to pass a principal and keytab to `spark-submit` via the `--principal` and `--keytab` parameters respectively. The keytab passed in will be copied over to the machine running the Application Master via the Hadoop Distributed Cache (securely - if YARN is configured with SSL and HDFS encryption is enabled). The Kerberos login will be periodically renewed using this principal and keytab and the delegation tokens required for HDFS will be generated periodically so the application can continue writing to HDFS. Please note that the HDFS client configuration file, `hdfs-site.xml` on each executor node must have the value of `dfs.namenode.delegation.token.renew-interval` be the same as it is on the HDFS Namenode for this functionality.
 
 ### Standalone mode
 The user needs to provide key-stores and configuration options for master and workers. They have to be set by attaching appropriate Java system properties in `SPARK_MASTER_OPTS` and in `SPARK_WORKER_OPTS` environment variables, or just in `SPARK_DAEMON_JAVA_OPTS`. In this mode, the user may allow the executors to use the SSL settings inherited from the worker which spawned that executor. It can be accomplished by setting `spark.ssl.useNodeLocalConf` to `true`. If that parameter is set, the settings provided by user on the client side, are not used by the executors.
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala