-
Notifications
You must be signed in to change notification settings - Fork 117
Clean up resources that are not used by pods. #305
Changes from all commits
2fc4efc
c41a7fa
ef70ac2
2ce7968
33b5938
0ea81c1
8fbdb24
161da02
831b94f
a360760
d887081
b06ad41
ce8a19e
aeca760
2f48823
1187b2e
e19a36f
950bf92
9a05f64
b79ae4c
3aad99a
05586cf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,44 +17,42 @@ | |
|
|
||
| package org.apache.spark.deploy.kubernetes | ||
|
|
||
| import java.io.File | ||
| import java.nio.ByteBuffer | ||
|
|
||
| import io.fabric8.kubernetes.api.model.Pod | ||
| import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watch, Watcher} | ||
| import io.fabric8.kubernetes.client.{Config, KubernetesClient, KubernetesClientException, Watch, Watcher} | ||
| import io.fabric8.kubernetes.client.Watcher.Action | ||
| import org.apache.commons.io.IOUtils | ||
| import scala.collection.JavaConverters._ | ||
| import scala.collection.mutable | ||
|
|
||
| import org.apache.spark.{SecurityManager, SparkConf} | ||
| import org.apache.spark.deploy.ExternalShuffleService | ||
| import org.apache.spark.deploy.kubernetes.config._ | ||
| import org.apache.spark.deploy.kubernetes.constants._ | ||
| import org.apache.spark.internal.Logging | ||
| import org.apache.spark.network.client.{RpcResponseCallback, TransportClient} | ||
| import org.apache.spark.network.shuffle.ExternalShuffleBlockHandler | ||
| import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, RegisterDriver} | ||
| import org.apache.spark.network.util.TransportConf | ||
| import org.apache.spark.scheduler.cluster.kubernetes.DriverPodKubernetesClientProvider | ||
|
|
||
| /** | ||
| * An RPC endpoint that receives registration requests from Spark drivers running on Kubernetes. | ||
| * It detects driver termination and calls the cleanup callback to [[ExternalShuffleService]]. | ||
| */ | ||
| private[spark] class KubernetesShuffleBlockHandler ( | ||
| transportConf: TransportConf, | ||
| kubernetesClientProvider: DriverPodKubernetesClientProvider) | ||
| kubernetesClient: KubernetesClient) | ||
| extends ExternalShuffleBlockHandler(transportConf, null) with Logging { | ||
|
|
||
| private val INIT_AND_STOP_LOCK = new Object | ||
| private val CONNECTED_APPS_LOCK = new Object | ||
| private val connectedApps = mutable.Set.empty[String] | ||
| private var shuffleWatch: Option[Watch] = None | ||
| private var kubernetesClient: Option[KubernetesClient] = None | ||
|
|
||
| def start(): Unit = INIT_AND_STOP_LOCK.synchronized { | ||
| val client = kubernetesClientProvider.get | ||
| shuffleWatch = startShuffleWatcher(client) | ||
| kubernetesClient = Some(client) | ||
| shuffleWatch = startShuffleWatcher() | ||
| } | ||
|
|
||
| override def close(): Unit = { | ||
|
|
@@ -64,8 +62,7 @@ private[spark] class KubernetesShuffleBlockHandler ( | |
| INIT_AND_STOP_LOCK.synchronized { | ||
| shuffleWatch.foreach(IOUtils.closeQuietly) | ||
| shuffleWatch = None | ||
| kubernetesClient.foreach(IOUtils.closeQuietly) | ||
| kubernetesClient = None | ||
| IOUtils.closeQuietly(kubernetesClient) | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -90,9 +87,9 @@ private[spark] class KubernetesShuffleBlockHandler ( | |
| } | ||
| } | ||
|
|
||
| private def startShuffleWatcher(client: KubernetesClient): Option[Watch] = { | ||
| private def startShuffleWatcher(): Option[Watch] = { | ||
| try { | ||
| Some(client | ||
| Some(kubernetesClient | ||
| .pods() | ||
| .withLabels(Map(SPARK_ROLE_LABEL -> "driver").asJava) | ||
| .watch(new Watcher[Pod] { | ||
|
|
@@ -137,42 +134,55 @@ private[spark] class KubernetesShuffleBlockHandler ( | |
| */ | ||
| private[spark] class KubernetesExternalShuffleService( | ||
| conf: SparkConf, | ||
| securityManager: SecurityManager, | ||
| kubernetesClientProvider: DriverPodKubernetesClientProvider) | ||
| securityManager: SecurityManager) | ||
| extends ExternalShuffleService(conf, securityManager) { | ||
|
|
||
| private var shuffleBlockHandlers: mutable.Buffer[KubernetesShuffleBlockHandler] = _ | ||
| protected override def newShuffleBlockHandler( | ||
| tConf: TransportConf): ExternalShuffleBlockHandler = { | ||
| val newBlockHandler = new KubernetesShuffleBlockHandler(tConf, kubernetesClientProvider) | ||
| newBlockHandler.start() | ||
|
|
||
| // TODO: figure out a better way of doing this. | ||
| // This is necessary because the constructor is not called | ||
| // when this class is initialized through ExternalShuffleService. | ||
| if (shuffleBlockHandlers == null) { | ||
| val kubernetesClient = SparkKubernetesClientFactory.createKubernetesClient( | ||
| conf.get(KUBERNETES_SHUFFLE_APISERVER_URI), | ||
| None, | ||
| APISERVER_AUTH_SHUFFLE_SERVICE_CONF_PREFIX, | ||
| conf, | ||
| Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH)) | ||
| .filter( _ => conf.get(KUBERNETES_SHUFFLE_USE_SERVICE_ACCOUNT_CREDENTIALS)), | ||
| Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH)) | ||
| .filter( _ => conf.get(KUBERNETES_SHUFFLE_USE_SERVICE_ACCOUNT_CREDENTIALS))) | ||
| val newBlockHandler = new KubernetesShuffleBlockHandler(tConf, kubernetesClient) | ||
| try { | ||
| newBlockHandler.start() | ||
| // TODO: figure out a better way of doing this. | ||
| // This is necessary because the constructor is not called | ||
| // when this class is initialized through ExternalShuffleService. | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how does the
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @foxish for this, I was also confused here. I don't see this being created reflectively in the surrounding code.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the comment isn't clear there. The issue I think, was that the So, at that point, the constructor for |
||
| if (shuffleBlockHandlers == null) { | ||
| shuffleBlockHandlers = mutable.Buffer.empty[KubernetesShuffleBlockHandler] | ||
| } | ||
| shuffleBlockHandlers += newBlockHandler | ||
| newBlockHandler | ||
| } catch { | ||
| case e: Throwable => | ||
| logError("Failed to create Kubernetes shuffle block handler.", e) | ||
| newBlockHandler.close() | ||
| throw e | ||
| } | ||
| shuffleBlockHandlers += newBlockHandler | ||
| newBlockHandler | ||
| } | ||
|
|
||
| override def stop(): Unit = { | ||
| try { | ||
| super.stop() | ||
| } finally { | ||
| shuffleBlockHandlers.foreach(_.close()) | ||
| if (shuffleBlockHandlers != null) { | ||
| shuffleBlockHandlers.foreach(_.close()) | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private[spark] object KubernetesExternalShuffleService extends Logging { | ||
| def main(args: Array[String]): Unit = { | ||
| ExternalShuffleService.main(args, | ||
| (conf: SparkConf, sm: SecurityManager) => { | ||
| val kubernetesClientProvider = new DriverPodKubernetesClientProvider(conf) | ||
| new KubernetesExternalShuffleService(conf, sm, kubernetesClientProvider) | ||
| }) | ||
| (conf: SparkConf, sm: SecurityManager) => new KubernetesExternalShuffleService(conf, sm)) | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,103 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
| package org.apache.spark.deploy.kubernetes | ||
|
|
||
| import java.io.File | ||
|
|
||
| import com.google.common.base.Charsets | ||
| import com.google.common.io.Files | ||
| import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient, KubernetesClient} | ||
| import io.fabric8.kubernetes.client.utils.HttpClientUtils | ||
| import okhttp3.Dispatcher | ||
|
|
||
| import org.apache.spark.SparkConf | ||
| import org.apache.spark.deploy.kubernetes.config._ | ||
| import org.apache.spark.util.ThreadUtils | ||
|
|
||
| /** | ||
| * Spark-opinionated builder for Kubernetes clients. It uses a prefix plus common suffixes to | ||
| * parse configuration keys, similar to the manner in which Spark's SecurityManager parses SSL | ||
| * options for different components. | ||
| */ | ||
| private[spark] object SparkKubernetesClientFactory { | ||
|
|
||
| def createKubernetesClient( | ||
| master: String, | ||
| namespace: Option[String], | ||
| kubernetesAuthConfPrefix: String, | ||
| sparkConf: SparkConf, | ||
| maybeServiceAccountToken: Option[File], | ||
| maybeServiceAccountCaCert: Option[File]): KubernetesClient = { | ||
| val oauthTokenFileConf = s"$kubernetesAuthConfPrefix.$OAUTH_TOKEN_FILE_CONF_SUFFIX" | ||
| val oauthTokenConf = s"$kubernetesAuthConfPrefix.$OAUTH_TOKEN_CONF_SUFFIX" | ||
| val oauthTokenFile = sparkConf.getOption(oauthTokenFileConf) | ||
| .map(new File(_)) | ||
| .orElse(maybeServiceAccountToken) | ||
| val oauthTokenValue = sparkConf.getOption(oauthTokenConf) | ||
| OptionRequirements.requireNandDefined( | ||
| oauthTokenFile, | ||
| oauthTokenValue, | ||
| s"Cannot specify OAuth token through both a file $oauthTokenFileConf and a" + | ||
| s" value $oauthTokenConf.") | ||
|
|
||
| val caCertFile = sparkConf | ||
| .getOption(s"$kubernetesAuthConfPrefix.$CA_CERT_FILE_CONF_SUFFIX") | ||
| .orElse(maybeServiceAccountCaCert.map(_.getAbsolutePath)) | ||
| val clientKeyFile = sparkConf | ||
| .getOption(s"$kubernetesAuthConfPrefix.$CLIENT_KEY_FILE_CONF_SUFFIX") | ||
| val clientCertFile = sparkConf | ||
| .getOption(s"$kubernetesAuthConfPrefix.$CLIENT_CERT_FILE_CONF_SUFFIX") | ||
| val dispatcher = new Dispatcher( | ||
| ThreadUtils.newDaemonCachedThreadPool("kubernetes-dispatcher")) | ||
| val config = new ConfigBuilder() | ||
| .withApiVersion("v1") | ||
| .withMasterUrl(master) | ||
| .withWebsocketPingInterval(0) | ||
| .withOption(oauthTokenValue) { | ||
| (token, configBuilder) => configBuilder.withOauthToken(token) | ||
| }.withOption(oauthTokenFile) { | ||
| (file, configBuilder) => | ||
| configBuilder.withOauthToken(Files.toString(file, Charsets.UTF_8)) | ||
| }.withOption(caCertFile) { | ||
| (file, configBuilder) => configBuilder.withCaCertFile(file) | ||
| }.withOption(clientKeyFile) { | ||
| (file, configBuilder) => configBuilder.withClientKeyFile(file) | ||
| }.withOption(clientCertFile) { | ||
| (file, configBuilder) => configBuilder.withClientCertFile(file) | ||
| }.withOption(namespace) { | ||
| (ns, configBuilder) => configBuilder.withNamespace(ns) | ||
| }.build() | ||
| val baseHttpClient = HttpClientUtils.createHttpClient(config) | ||
| val httpClientWithCustomDispatcher = baseHttpClient.newBuilder() | ||
| .dispatcher(dispatcher) | ||
| .build() | ||
| new DefaultKubernetesClient(httpClientWithCustomDispatcher, config) | ||
| } | ||
|
|
||
| private implicit class OptionConfigurableConfigBuilder(configBuilder: ConfigBuilder) { | ||
|
|
||
| def withOption[T] | ||
| (option: Option[T]) | ||
| (configurator: ((T, ConfigBuilder) => ConfigBuilder)): OptionConfigurableConfigBuilder = { | ||
| new OptionConfigurableConfigBuilder(option.map { opt => | ||
| configurator(opt, configBuilder) | ||
| }.getOrElse(configBuilder)) | ||
| } | ||
|
|
||
| def build(): Config = configBuilder.build() | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The two options:
spark.kubernetes.authenticate.resourceStagingServer.oauthTokenFileand
spark.kubernetes.authenticate.resourceStagingServer.caCertFileare already mentioned above in the table.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, but here it's worth mentioning that those configurations take precedence over the service account credentials.