Skip to content

Commit 2debcea

Browse files
Change the file structure for credentials files. I will push a followup patch which
adds a cleanup mechanism for old credentials files. The credentials files are small and few enough for it to cause issues on HDFS.
1 parent af6d5f0 commit 2debcea

File tree

6 files changed

+122
-112
lines changed

6 files changed

+122
-112
lines changed

core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ import scala.collection.JavaConversions._
4040
*/
4141
@DeveloperApi
4242
class SparkHadoopUtil extends Logging {
43-
val sparkConf = new SparkConf()
43+
protected val sparkConf = new SparkConf() // YarnSparkHadoopUtil requires this
4444
val conf: Configuration = newConfiguration(sparkConf)
4545
UserGroupInformation.setConfiguration(conf)
4646

core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -372,8 +372,10 @@ object SparkSubmit {
372372
OptionAssigner(args.files, YARN, CLUSTER, clOption = "--files"),
373373
OptionAssigner(args.archives, YARN, CLUSTER, clOption = "--archives"),
374374
OptionAssigner(args.jars, YARN, CLUSTER, clOption = "--addJars"),
375-
OptionAssigner(args.principal, YARN, CLUSTER, clOption = "--principal"),
376-
OptionAssigner(args.keytab, YARN, CLUSTER, clOption = "--keytab"),
375+
376+
// Yarn client or cluster
377+
OptionAssigner(args.principal, YARN, ALL_DEPLOY_MODES, clOption = "--principal"),
378+
OptionAssigner(args.keytab, YARN, ALL_DEPLOY_MODES, clOption = "--keytab"),
377379

378380
// Other options
379381
OptionAssigner(args.executorMemory, STANDALONE | MESOS | YARN, ALL_DEPLOY_MODES,

core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,10 @@ private[spark] class SparkSubmitArguments(args: Seq[String], env: Map[String, St
517517
| --num-executors NUM Number of executors to launch (Default: 2).
518518
| --archives ARCHIVES Comma separated list of archives to be extracted into the
519519
| working directory of each executor.
520+
| --principal PRINCIPAL Principal to be used to login to KDC, while running on
521+
| secure HDFS.
522+
| --keytab KEYTAB The full path to the file that contains the keytab for the
523+
| principal specified above.
520524
""".stripMargin
521525
)
522526
SparkSubmit.exitFn()

core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,8 @@ import akka.actor._
2727
import akka.pattern.ask
2828
import akka.remote.{DisassociatedEvent, RemotingLifecycleEvent}
2929

30-
31-
import org.apache.spark.{ExecutorAllocationClient, Logging, SparkEnv, SparkException, TaskState}
3230
import org.apache.spark.deploy.SparkHadoopUtil
31+
import org.apache.spark.{ExecutorAllocationClient, Logging, SparkEnv, SparkException, TaskState}
3332
import org.apache.spark.scheduler._
3433
import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
3534
import org.apache.spark.util.{ActorLogReceive, SerializableBuffer, AkkaUtils, Utils}

yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ import java.util.UUID
2424

2525
import scala.collection.JavaConversions._
2626
import scala.collection.mutable.{ArrayBuffer, HashMap, ListBuffer, Map}
27-
import scala.util.{Random, Try, Success, Failure}
27+
import scala.util.{Try, Success, Failure}
2828

29-
import com.google.common.base.Objects
29+
import com.google.common.base.{Preconditions, Objects}
3030

3131
import org.apache.hadoop.io.DataOutputBuffer
3232
import org.apache.hadoop.conf.Configuration
@@ -43,8 +43,8 @@ import org.apache.hadoop.yarn.client.api.{YarnClient, YarnClientApplication}
4343
import org.apache.hadoop.yarn.conf.YarnConfiguration
4444
import org.apache.hadoop.yarn.util.Records
4545

46-
import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext, SparkException}
4746
import org.apache.spark.deploy.SparkHadoopUtil
47+
import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext, SparkException}
4848
import org.apache.spark.util.Utils
4949

5050
private[spark] class Client(
@@ -67,10 +67,7 @@ private[spark] class Client(
6767
private val executorMemoryOverhead = args.executorMemoryOverhead // MB
6868
private val distCacheMgr = new ClientDistributedCacheManager()
6969
private val isClusterMode = args.isClusterMode
70-
7170
private var loginFromKeytab = false
72-
private var keytabFileName: String = null
73-
7471

7572
def stop(): Unit = yarnClient.stop()
7673

@@ -221,10 +218,8 @@ private[spark] class Client(
221218
// and add them as local resources to the application master.
222219
val fs = FileSystem.get(hadoopConf)
223220
val dst = new Path(fs.getHomeDirectory(), appStagingDir)
224-
val nns =
225-
SparkHadoopUtil.get.asInstanceOf[YarnSparkHadoopUtil].getNameNodesToAccess(sparkConf) + dst
226-
SparkHadoopUtil.get.asInstanceOf[YarnSparkHadoopUtil].
227-
obtainTokensForNamenodes(nns, hadoopConf, credentials)
221+
val nns = YarnSparkHadoopUtil.get.getNameNodesToAccess(sparkConf) + dst
222+
YarnSparkHadoopUtil.get.obtainTokensForNamenodes(nns, hadoopConf, credentials)
228223

229224
val replication = sparkConf.getInt("spark.yarn.submit.file.replication",
230225
fs.getDefaultReplication(dst)).toShort
@@ -249,8 +244,8 @@ private[spark] class Client(
249244
val destinationPath = copyFileToRemote(dst, localPath, replication)
250245
val destFs = FileSystem.get(destinationPath.toUri(), hadoopConf)
251246
distCacheMgr.addResource(
252-
destFs, hadoopConf, destinationPath, localResources, LocalResourceType.FILE, keytabFileName,
253-
statCache, appMasterOnly = true)
247+
destFs, hadoopConf, destinationPath, localResources, LocalResourceType.FILE,
248+
sparkConf.get("spark.yarn.keytab"), statCache, appMasterOnly = true)
254249
}
255250

256251
/**
@@ -334,8 +329,11 @@ private[spark] class Client(
334329
env("SPARK_YARN_STAGING_DIR") = stagingDir
335330
env("SPARK_USER") = UserGroupInformation.getCurrentUser().getShortUserName()
336331
if (loginFromKeytab) {
337-
env("SPARK_PRINCIPAL") = args.principal
338-
env("SPARK_KEYTAB") = keytabFileName
332+
val remoteFs = FileSystem.get(hadoopConf)
333+
val stagingDirPath = new Path(remoteFs.getHomeDirectory, stagingDir)
334+
val credentialsFile = "credentials-" + UUID.randomUUID().toString
335+
sparkConf.set(
336+
"spark.yarn.credentials.file", new Path(stagingDirPath, credentialsFile).toString)
339337
}
340338

341339
// Set the environment variables to be passed on to the executors.
@@ -573,13 +571,13 @@ private[spark] class Client(
573571
val f = new File(args.keytab)
574572
// Generate a file name that can be used for the keytab file, that does not conflict
575573
// with any user file.
576-
keytabFileName = f.getName + "-" + UUID.randomUUID().toString
574+
val keytabFileName = f.getName + "-" + UUID.randomUUID().toString
577575
val ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI(args.principal, args.keytab)
578576
credentials = ugi.getCredentials
579577
loginFromKeytab = true
580-
val credentialsFile = "credentials-" + UUID.randomUUID().toString
581-
sparkConf.set("spark.yarn.credentials.file", credentialsFile)
582-
logInfo("Successfully logged into Kerberos.")
578+
sparkConf.set("spark.yarn.keytab", keytabFileName)
579+
sparkConf.set("spark.yarn.principal", args.principal)
580+
logInfo("Successfully logged into the KDC.")
583581
} else {
584582
credentials = UserGroupInformation.getCurrentUser.getCredentials
585583
}

0 commit comments

Comments
 (0)