From 16ce28c08e463567fcc5a66c80967923bd19193e Mon Sep 17 00:00:00 2001 From: Subroto Sanyal Date: Fri, 3 Jun 2016 16:58:17 +0200 Subject: [PATCH 1/3] SPARK-15754 Not letting the credentials containing hdfs delegation tokens to be added in current user credential. --- .../scala/org/apache/spark/deploy/yarn/Client.scala | 4 ++-- .../scheduler/cluster/YarnClientSchedulerBackend.scala | 10 +++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 598eb17d5d4a0..9bb369549d943 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -980,7 +980,6 @@ private[spark] class Client( amContainer.setApplicationACLs( YarnSparkHadoopUtil.getApplicationAclsForYarn(securityManager).asJava) setupSecurityToken(amContainer) - UserGroupInformation.getCurrentUser().addCredentials(credentials) amContainer } @@ -1001,7 +1000,8 @@ private[spark] class Client( sparkConf.set(KEYTAB.key, keytabFileName) sparkConf.set(PRINCIPAL.key, principal) } - credentials = UserGroupInformation.getCurrentUser.getCredentials + // Defensive copy of the credentials + credentials = new Credentials(UserGroupInformation.getCurrentUser.getCredentials) } /** diff --git a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala index 56dc0004d04cc..0440a5ad5a3a1 100644 --- a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala +++ b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala @@ -21,9 +21,13 @@ import scala.collection.mutable.ArrayBuffer import org.apache.hadoop.yarn.api.records.YarnApplicationState -import org.apache.spark.{SparkContext, SparkException} -import org.apache.spark.deploy.yarn.{Client, ClientArguments, YarnSparkHadoopUtil} +import org.apache.spark.SparkContext +import org.apache.spark.SparkException +import org.apache.spark.deploy.yarn.Client +import org.apache.spark.deploy.yarn.ClientArguments +import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil import org.apache.spark.internal.Logging +import org.apache.spark.internal.config.PRINCIPAL import org.apache.spark.launcher.SparkAppHandle import org.apache.spark.scheduler.TaskSchedulerImpl @@ -64,7 +68,7 @@ private[spark] class YarnClientSchedulerBackend( // SPARK-8851: In yarn-client mode, the AM still does the credentials refresh. The driver // reads the credentials from HDFS, just like the executors and updates its own credentials // cache. - if (conf.contains("spark.yarn.credentials.file")) { + if (!conf.contains(PRINCIPAL.key) && conf.contains("spark.yarn.credentials.file")) { YarnSparkHadoopUtil.get.startExecutorDelegationTokenRenewer(conf) } monitorThread = asyncMonitorApplication() From d53ad55cf558c7222c7f2d501606f62513d4512f Mon Sep 17 00:00:00 2001 From: Subroto Sanyal Date: Fri, 3 Jun 2016 21:33:24 +0200 Subject: [PATCH 2/3] SPARK-15754 Review comment fix: scala coding guideline related to import statements --- .../scheduler/cluster/YarnClientSchedulerBackend.scala | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala index 0440a5ad5a3a1..f09e8cd65cb50 100644 --- a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala +++ b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala @@ -21,11 +21,8 @@ import scala.collection.mutable.ArrayBuffer import org.apache.hadoop.yarn.api.records.YarnApplicationState -import org.apache.spark.SparkContext -import org.apache.spark.SparkException -import org.apache.spark.deploy.yarn.Client -import org.apache.spark.deploy.yarn.ClientArguments -import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil +import org.apache.spark.{SparkContext, SparkException} +import org.apache.spark.deploy.yarn.{Client, ClientArguments, YarnSparkHadoopUtil} import org.apache.spark.internal.Logging import org.apache.spark.internal.config.PRINCIPAL import org.apache.spark.launcher.SparkAppHandle From 3a4c04a6c57851512e836d247b0c97e647aa86cd Mon Sep 17 00:00:00 2001 From: Subroto Sanyal Date: Fri, 3 Jun 2016 22:15:30 +0200 Subject: [PATCH 3/3] SPARK-15754 Review comment fix: Reverting back the change --- .../spark/scheduler/cluster/YarnClientSchedulerBackend.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala index f09e8cd65cb50..56dc0004d04cc 100644 --- a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala +++ b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala @@ -24,7 +24,6 @@ import org.apache.hadoop.yarn.api.records.YarnApplicationState import org.apache.spark.{SparkContext, SparkException} import org.apache.spark.deploy.yarn.{Client, ClientArguments, YarnSparkHadoopUtil} import org.apache.spark.internal.Logging -import org.apache.spark.internal.config.PRINCIPAL import org.apache.spark.launcher.SparkAppHandle import org.apache.spark.scheduler.TaskSchedulerImpl @@ -65,7 +64,7 @@ private[spark] class YarnClientSchedulerBackend( // SPARK-8851: In yarn-client mode, the AM still does the credentials refresh. The driver // reads the credentials from HDFS, just like the executors and updates its own credentials // cache. - if (!conf.contains(PRINCIPAL.key) && conf.contains("spark.yarn.credentials.file")) { + if (conf.contains("spark.yarn.credentials.file")) { YarnSparkHadoopUtil.get.startExecutorDelegationTokenRenewer(conf) } monitorThread = asyncMonitorApplication()