Got rid of Akka communication to renew, instead the executors check a known file's

harishreedharan · harishreedharan · commit f6954dab2c1d · 2015-03-05T12:30:06.000-08:00
modification time to read the credentials.
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -55,13 +55,16 @@ class SparkHadoopUtil extends Logging {
   def runAsSparkUser(func: () => Unit) {
     val user = Utils.getCurrentUserName()
     logDebug("running as user: " + user)
+    updateCredentialsIfRequired()
     val ugi = UserGroupInformation.createRemoteUser(user)
     transferCredentials(UserGroupInformation.getCurrentUser(), ugi)
     ugi.doAs(new PrivilegedExceptionAction[Unit] {
       def run: Unit = func()
     })
   }
 
+  def updateCredentialsIfRequired(): Unit = {}
+
   def transferCredentials(source: UserGroupInformation, dest: UserGroupInformation) {
     for (token <- source.getTokens()) {
       dest.addToken(token)
@@ -122,9 +125,13 @@ class SparkHadoopUtil extends Logging {
     UserGroupInformation.loginUserFromKeytab(principalName, keytabFilename)
   }
 
-  def setPrincipalAndKeytabForLogin(principal: String, keytab: String): Unit = ???
-
-  private[spark] def scheduleLoginFromKeytab(callback: (String) => Unit): Unit = {}
+  /**
+   * Schedule a login from the keytab and principal set using the --principal and --keytab
+   * arguments to spark-submit. This login happens only when the credentials of the current user
+   * are about to expire. This method reads SPARK_PRINCIPAL and SPARK_KEYTAB from the environment
+   * to do the login. This method is a no-op in non-YARN mode.
+   */
+  private[spark] def scheduleLoginFromKeytab(): Unit = {}
 
   /**
    * Returns a function that can be called to find Hadoop FileSystem bytes read. If
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -109,15 +109,6 @@ private[spark] class CoarseGrainedExecutorBackend(
       context.stop(self)
       context.system.shutdown()
 
-    // Add new credentials received from the driver to the current user.
-    case UpdateCredentials(newCredentialsPath) =>
-      logInfo("New credentials received from driver, adding the credentials to the current user")
-      val credentials = new Credentials()
-      val remoteFs = FileSystem.get(SparkHadoopUtil.get.conf)
-      val inStream = remoteFs.open(new Path(newCredentialsPath))
-      credentials.readTokenStorageStream(inStream)
-      SparkHadoopUtil.get.addCurrentUserCredentials(credentials)
-      inStream.close()
   }
 
   override def statusUpdate(taskId: Long, state: TaskState, data: ByteBuffer) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
@@ -51,11 +51,6 @@ private[spark] object CoarseGrainedClusterMessages {
   case class StatusUpdate(executorId: String, taskId: Long, state: TaskState,
     data: SerializableBuffer) extends CoarseGrainedClusterMessage
 
-  // When the delegation tokens are about expire, the driver creates new tokens and sends them to
-  // the executors via this message.
-  case class UpdateCredentials(newCredentialsLocation: String)
-    extends CoarseGrainedClusterMessage
-
   object StatusUpdate {
     /** Alternate factory method that takes a ByteBuffer directly for the data field */
     def apply(executorId: String, taskId: Long, state: TaskState, data: ByteBuffer)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -73,16 +73,6 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val actorSyste
   // Executors we have requested the cluster manager to kill that have not died yet
   private val executorsPendingToRemove = new HashSet[String]
 
-  /**
-   * Send new credentials to executors. This is the method that is called when the scheduled
-   * login completes, so the new credentials can be sent to the executors.
-   * @param credentialsPath
-   */
-  def sendNewCredentialsToExecutors(credentialsPath: String): Unit = {
-    // We don't care about the reply, so going to deadLetters is fine.
-    executorDataMap.values.foreach(_.executorActor ! UpdateCredentials(credentialsPath))
-  }
-
   class DriverActor(sparkProperties: Seq[(String, String)]) extends Actor with ActorLogReceive {
     override protected def log = CoarseGrainedSchedulerBackend.this.log
     private val addressToExecutorId = new HashMap[Address, String]
@@ -253,7 +243,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val actorSyste
 
     // If a principal and keytab have been set, use that to create new credentials for executors
     // periodically
-    SparkHadoopUtil.get.scheduleLoginFromKeytab(sendNewCredentialsToExecutors _)
+    SparkHadoopUtil.get.scheduleLoginFromKeytab()
   }
 
   def stopExecutors() {
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -257,11 +257,6 @@ private[spark] class ApplicationMaster(
   private def runDriver(securityMgr: SecurityManager): Unit = {
     addAmIpFilter()
 
-    // This must be done before SparkContext is initialized, since the CoarseGrainedSchedulerBackend
-    // is started at that time. That is what schedules the re-logins. It is scheduled only if the
-    // principal is actually setup. So we make sure it is available.
-    SparkHadoopUtil.get.setPrincipalAndKeytabForLogin(
-      System.getenv("SPARK_PRINCIPAL"), System.getenv("SPARK_KEYTAB"))
     userClassThread = startUserApplication()
 
     // This a bit hacky, but we need to wait until the spark.driver.port property has
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -246,14 +246,12 @@ private[spark] class Client(
     // If we passed in a keytab, make sure we copy the keytab to the staging directory on
     // HDFS, and setup the relevant environment vars, so the AM can login again.
     if (loginFromKeytab) {
-      val fs = FileSystem.get(hadoopConf)
-      val stagingDirPath = new Path(fs.getHomeDirectory, appStagingDir)
       val localUri = new URI(args.keytab)
       val localPath = getQualifiedLocalPath(localUri, hadoopConf)
-      val destinationPath = new Path(stagingDirPath, keytabFileName)
-      copyFileToRemote(destinationPath, localPath, replication)
+      val destinationPath = copyFileToRemote(dst, localPath, replication)
+      val destFs = FileSystem.get(destinationPath.toUri(), hadoopConf)
       distCacheMgr.addResource(
-        fs, hadoopConf, destinationPath, localResources, LocalResourceType.FILE, keytabFileName,
+        destFs, hadoopConf, destinationPath, localResources, LocalResourceType.FILE, keytabFileName,
         statCache, appMasterOnly = true)
     }
 
@@ -577,10 +575,12 @@ private[spark] class Client(
       val f = new File(args.keytab)
       // Generate a file name that can be used for the keytab file, that does not conflict
       // with any user file.
-      keytabFileName = f.getName + "-" + UUID.randomUUID()
+      keytabFileName = f.getName + "-" + UUID.randomUUID().toString
       val ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI(args.principal, args.keytab)
       credentials = ugi.getCredentials
       loginFromKeytab = true
+      val credentialsFile = "credentials-" + UUID.randomUUID().toString
+      sparkConf.set("spark.yarn.credentials.file", credentialsFile)
       logInfo("Successfully logged into Kerberos.")
     } else {
       credentials = UserGroupInformation.getCurrentUser.getCredentials
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -20,14 +20,17 @@ package org.apache.spark.deploy.yarn
 import java.io._
 import java.net.URI
 import java.nio.ByteBuffer
-import java.util.concurrent.{TimeUnit, Executors}
+import java.util.concurrent.{ TimeUnit, Executors}
 import java.util.regex.Matcher
 import java.util.regex.Pattern
 
 import scala.collection.mutable.HashMap
+import scala.collection.JavaConversions._
 import scala.util.Try
 
+import org.apache.hadoop.fs.Options.Rename
 import org.apache.hadoop.fs.{FileUtil, Path, FileSystem}
+import org.apache.hadoop.hdfs.DistributedFileSystem
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier
 import org.apache.hadoop.io.Text
 import org.apache.hadoop.mapred.{Master, JobConf}
@@ -41,7 +44,7 @@ import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark.{SparkException, SecurityManager, SparkConf}
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.util.{SerializableBuffer, Utils}
+import org.apache.spark.util.Utils
 
 /**
  * Contains util methods to interact with Hadoop from spark.
@@ -52,6 +55,13 @@ class YarnSparkHadoopUtil extends SparkHadoopUtil {
   private var principal: String = null
   @volatile private var loggedInViaKeytab = false
   @volatile private var loggedInUGI: UserGroupInformation = null
+  @volatile private var lastCredentialsRefresh = 0l
+  private lazy val delegationTokenRenewer =
+    Executors.newSingleThreadScheduledExecutor(
+      Utils.namedThreadFactory("Delegation Token Refresh Thread"))
+  private lazy val delegationTokenExecuterUpdaterThread = new Runnable {
+    override def run(): Unit = updateCredentialsIfRequired()
+  }
 
   override def transferCredentials(source: UserGroupInformation, dest: UserGroupInformation) {
     dest.addCredentials(source.getCredentials())
@@ -92,57 +102,118 @@ class YarnSparkHadoopUtil extends SparkHadoopUtil {
     if (credentials != null) credentials.getSecretKey(new Text(key)) else null
   }
 
-  override def setPrincipalAndKeytabForLogin(principal: String, keytab: String): Unit = {
-    this.principal = principal
-    this.keytab = keytab
+  private[spark] override def scheduleLoginFromKeytab(): Unit = {
+    val principal = System.getenv("SPARK_PRINCIPAL")
+    val keytab = System.getenv("SPARK_KEYTAB")
+    if (principal != null) {
+      val delegationTokenRenewerThread =
+        new Runnable {
+          override def run(): Unit = {
+            if (!loggedInViaKeytab) {
+              // Keytab is copied by YARN to the working directory of the AM, so full path is
+              // not needed.
+              loggedInUGI = UserGroupInformation.loginUserFromKeytabAndReturnUGI(
+                principal, keytab)
+              loggedInViaKeytab = true
+            }
+            val nns = getNameNodesToAccess(sparkConf)
+            val newCredentials = loggedInUGI.getCredentials
+            obtainTokensForNamenodes(nns, conf, newCredentials)
+            val remoteFs = FileSystem.get(conf)
+            val stagingDirPath =
+              new Path(remoteFs.getHomeDirectory, System.getenv("SPARK_YARN_STAGING_DIR"))
+            val tokenPathStr = sparkConf.get("spark.yarn.credentials.file")
+            val tokenPath = new Path(stagingDirPath.toString, tokenPathStr)
+            val tempTokenPath = new Path(stagingDirPath.toString, tokenPathStr + ".tmp")
+            val stream = remoteFs.create(tempTokenPath, true)
+            // Now write this out to HDFS
+            newCredentials.writeTokenStorageToStream(stream)
+            stream.hflush()
+            stream.close()
+            remoteFs.delete(tokenPath, true)
+            remoteFs.rename(tempTokenPath, tokenPath)
+            delegationTokenRenewer.schedule(
+              this, (0.75 * (getLatestValidity - System.currentTimeMillis())).toLong,
+              TimeUnit.MILLISECONDS)
+          }
+        }
+      val timeToRenewal = (0.75 * (getLatestValidity - System.currentTimeMillis())).toLong
+      delegationTokenRenewer.schedule(
+        delegationTokenRenewerThread, timeToRenewal, TimeUnit.MILLISECONDS)
+    }
   }
 
-  private[spark] override def scheduleLoginFromKeytab(
-    callback: (String) => Unit): Unit = {
-    if (principal != null) {
-      // Get the current credentials, find out when they expire.
-      val creds = {
-        if (loggedInUGI == null) {
-          UserGroupInformation.getCurrentUser.getCredentials
-        } else {
-          loggedInUGI.getCredentials
+  override def updateCredentialsIfRequired(): Unit = {
+    try {
+      val credentialsFile = sparkConf.get("spark.yarn.credentials.file")
+      if (credentialsFile != null && !credentialsFile.isEmpty) {
+        val remoteFs = FileSystem.get(conf)
+        val sparkStagingDir = System.getenv("SPARK_YARN_STAGING_DIR")
+        val stagingDirPath = new Path(remoteFs.getHomeDirectory, sparkStagingDir)
+        val credentialsFilePath = new Path(stagingDirPath, credentialsFile)
+        if (remoteFs.exists(credentialsFilePath)) {
+          val status = remoteFs.getFileStatus(credentialsFilePath)
+          val modTimeAtStart = status.getModificationTime
+          if (modTimeAtStart > lastCredentialsRefresh) {
+            val newCredentials = getCredentialsFromHDFSFile(remoteFs, credentialsFilePath)
+            val newStatus = remoteFs.getFileStatus(credentialsFilePath)
+            // File was updated after we started reading it, lets come back later and try to read it.
+            if (newStatus.getModificationTime != modTimeAtStart) {
+              delegationTokenRenewer
+                .schedule(delegationTokenExecuterUpdaterThread, 1, TimeUnit.HOURS)
+            } else {
+              UserGroupInformation.getCurrentUser.addCredentials(newCredentials)
+              lastCredentialsRefresh = status.getModificationTime
+              val totalValidity = getLatestValidity - lastCredentialsRefresh
+              val timeToRunRenewal = lastCredentialsRefresh + (0.8 * totalValidity).toLong
+              val timeFromNowToRenewal = timeToRunRenewal - System.currentTimeMillis()
+              delegationTokenRenewer.schedule(delegationTokenExecuterUpdaterThread,
+                timeFromNowToRenewal, TimeUnit.MILLISECONDS)
+            }
+          } else {
+            // Check every hour to see if new credentials arrived.
+            delegationTokenRenewer.schedule(delegationTokenExecuterUpdaterThread, 1, TimeUnit.HOURS)
+          }
         }
       }
-      val credStream = new ByteArrayOutputStream()
-      creds.writeTokenStorageToStream(new DataOutputStream(credStream))
-      val in = new DataInputStream(new ByteArrayInputStream(credStream.toByteArray))
-      val tokenIdentifier = new DelegationTokenIdentifier()
-      tokenIdentifier.readFields(in)
-      val timeToRenewal = (0.6 * (tokenIdentifier.getMaxDate - System.currentTimeMillis())).toLong
-      Executors.newSingleThreadScheduledExecutor(
-        Utils.namedThreadFactory("Delegation Token Refresh Thread")).scheduleWithFixedDelay(
-          new Runnable {
-            override def run(): Unit = {
-              if (!loggedInViaKeytab) {
-                // Keytab is copied by YARN to the working directory of the AM, so full path is
-                // not needed.
-                loggedInUGI = UserGroupInformation.loginUserFromKeytabAndReturnUGI(
-                  principal, keytab)
-                loggedInViaKeytab = true
-              }
-              val nns = getNameNodesToAccess(sparkConf)
-              val newCredentials = loggedInUGI.getCredentials
-              obtainTokensForNamenodes(nns, conf, newCredentials)
-              val remoteFs = FileSystem.get(conf)
-              val stagingDir = System.getenv("SPARK_YARN_STAGING_DIR")
-              val tokenPath = new Path(remoteFs.getHomeDirectory, stagingDir + Path.SEPARATOR +
-                "credentials - " + System.currentTimeMillis())
-              val stream = remoteFs.create(tokenPath, true)
-              // Now write this out via Akka to executors.
-              newCredentials.writeTokenStorageToStream(stream)
-              stream.hflush()
-              stream.close()
-              callback(tokenPath.toString)
-            }
-          }, timeToRenewal, timeToRenewal, TimeUnit.MILLISECONDS)
+    } catch {
+      // Since the file may get deleted while we are reading it,
+      case e: Exception =>
+        logWarning(
+          "Error encountered while trying to update credentials, will try again in 1 hour", e)
+        delegationTokenRenewer.schedule(delegationTokenExecuterUpdaterThread, 1, TimeUnit.HOURS)
     }
   }
 
+  private[spark] def getCredentialsFromHDFSFile(
+    remoteFs: FileSystem,
+    tokenPath: Path
+  ): Credentials = {
+    val stream = remoteFs.open(tokenPath)
+    val newCredentials = new Credentials()
+    newCredentials.readFields(stream)
+    newCredentials
+  }
+
+  private[spark] def getLatestValidity: Long = {
+    val creds = UserGroupInformation.getCurrentUser.getCredentials
+    var latestValidity: Long = 0
+    creds.getAllTokens
+      .filter(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
+      .foreach { t =>
+        val identifier = new DelegationTokenIdentifier()
+        identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier)))
+        latestValidity = {
+          if (latestValidity < identifier.getMaxDate) {
+            identifier.getMaxDate
+          } else {
+            latestValidity
+          }
+        }
+      }
+    latestValidity
+  }
+
   /**
    * Get the list of namenodes the user may access.
    */
@@ -172,7 +243,8 @@ class YarnSparkHadoopUtil extends SparkHadoopUtil {
   def obtainTokensForNamenodes(
     paths: Set[Path],
     conf: Configuration,
-    creds: Credentials): Unit = {
+    creds: Credentials
+  ): Unit = {
     if (UserGroupInformation.isSecurityEnabled()) {
       val delegTokenRenewer = getTokenRenewer(conf)
       paths.foreach { dst =>