Overload RegisteredExecutor to send tokens. Minor doc updates.

harishreedharan · harishreedharan · commit bc083e34108d · 2015-04-20T22:38:44.000-07:00
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -55,7 +55,7 @@ private[spark] class CoarseGrainedExecutorBackend(
     logInfo("Connecting to driver: " + driverUrl)
     rpcEnv.asyncSetupEndpointRefByURI(driverUrl).flatMap { ref =>
       driver = Some(ref)
-      ref.sendWithReply[RegisteredExecutor.type](
+      ref.sendWithReply[RegisteredExecutor](
         RegisterExecutor(executorId, self, hostPort, cores, extractLogUrls))
     } onComplete {
       case Success(msg) => Utils.tryLogNonFatalError {
@@ -72,17 +72,17 @@ private[spark] class CoarseGrainedExecutorBackend(
   }
 
   override def receive: PartialFunction[Any, Unit] = {
-    case RegisteredExecutor =>
+    case RegisteredExecutor(tokens) =>
       logInfo("Successfully registered with driver")
       val (hostname, _) = Utils.parseHostPort(hostPort)
       executor = new Executor(executorId, hostname, env, userClassPath, isLocal = false)
-
-    case NewTokens(tokens) =>
-      val inStream = new DataInputStream(new ByteArrayInputStream(tokens.value.array()))
-      val creds = new Credentials()
-      creds.readFields(inStream)
-      inStream.close()
-      UserGroupInformation.getCurrentUser.addCredentials(creds)
+      tokens.foreach { tokenBuffer =>
+        val inStream = new DataInputStream(new ByteArrayInputStream(tokenBuffer.value.array()))
+        val creds = new Credentials()
+        creds.readFields(inStream)
+        inStream.close()
+        UserGroupInformation.getCurrentUser.addCredentials(creds)
+      }
 
     case RegisterExecutorFailed(message) =>
       logError("Slave registration failed: " + message)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
@@ -35,7 +35,8 @@ private[spark] object CoarseGrainedClusterMessages {
   case class KillTask(taskId: Long, executor: String, interruptThread: Boolean)
     extends CoarseGrainedClusterMessage
 
-  case object RegisteredExecutor extends CoarseGrainedClusterMessage
+  case class RegisteredExecutor(tokens: Option[SerializableBuffer])
+    extends CoarseGrainedClusterMessage
 
   case class NewTokens(tokens: SerializableBuffer) extends CoarseGrainedClusterMessage
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -125,8 +125,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
           context.reply(RegisterExecutorFailed("Duplicate executor ID: " + executorId))
         } else {
           logInfo("Registered executor: " + executorRef + " with ID " + executorId)
-          context.reply(RegisteredExecutor)
-          latestTokens.foreach(x => context.reply(NewTokens(x)))
+          context.reply(RegisteredExecutor(latestTokens))
           addressToExecutorId(executorRef.address) = executorId
           totalCoreCount.addAndGet(cores)
           totalRegisteredExecutors.addAndGet(1)
diff --git a/docs/security.md b/docs/security.md
@@ -31,7 +31,8 @@ SSL must be configured on each node and configured for each component involved i
 
 ### YARN mode
 The key-store can be prepared on the client side and then distributed and used by the executors as the part of the application. It is possible because the user is able to deploy files before the application is started in YARN by using `spark.yarn.dist.files` or `spark.yarn.dist.archives` configuration settings. The responsibility for encryption of transferring these files is on YARN side and has nothing to do with Spark.
-For long-running apps like Spark Streaming apps be able to write to HDFS, it is possible to pass a principal and keytab to `spark-submit` via the `--principal` and `--keytab` parameters respectively. The keytab passed in will be copied over to the machine running the Application Master securely via the Hadoop Distributed Cache. The Kerberos login will be periodically renewed using this principal and keytab and the delegation tokens required for HDFS will be generated periodically so the application can continue writing to HDFS.
+
+For long-running apps like Spark Streaming apps to be able to write to HDFS, it is possible to pass a principal and keytab to `spark-submit` via the `--principal` and `--keytab` parameters respectively. The keytab passed in will be copied over to the machine running the Application Master via the Hadoop Distributed Cache (securely - if YARN is configured with SSL and HDFS encryption is enabled). The Kerberos login will be periodically renewed using this principal and keytab and the delegation tokens required for HDFS will be generated periodically so the application can continue writing to HDFS.
 
 ### Standalone mode
 The user needs to provide key-stores and configuration options for master and workers. They have to be set by attaching appropriate Java system properties in `SPARK_MASTER_OPTS` and in `SPARK_WORKER_OPTS` environment variables, or just in `SPARK_DAEMON_JAVA_OPTS`. In this mode, the user may allow the executors to use the SSL settings inherited from the worker which spawned that executor. It can be accomplished by setting `spark.ssl.useNodeLocalConf` to `true`. If that parameter is set, the settings provided by user on the client side, are not used by the executors.
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
@@ -78,13 +78,14 @@ private[yarn] class AMDelegationTokenRenewer(
       val credentials = UserGroupInformation.getCurrentUser.getCredentials
       val interval = (0.75 * (hadoopUtil.getLatestTokenValidity(credentials) -
         System.currentTimeMillis())).toLong
-      // If only 6 hours left, then force a renewal immediately. This is to avoid tokens with
-      // very less validity being used on AM restart.
-      if ((interval millis).toHours <= 6) {
-        0L
-      } else {
-        interval
-      }
+//      // If only 6 hours left, then force a renewal immediately. This is to avoid tokens with
+//      // very less validity being used on AM restart.
+//      if ((interval millis).toHours <= 6) {
+//        0L
+//      } else {
+//        interval
+//      }
+      interval
     }
 
     def scheduleRenewal(runnable: Runnable): Unit = {
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
@@ -29,7 +29,7 @@ import org.scalatest.{FunSuite, Matchers}
 
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType
 
-import org.apache.spark.{SparkException, Logging, SecurityManager, SparkConf}
+import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkException}
 import org.apache.spark.util.Utils