From 7fa509711ce40b9a0eef05c1ed8b00b31df9ec23 Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Wed, 31 Oct 2018 11:07:31 +0000 Subject: [PATCH 1/6] [SPARK-25887][K8S] Configurable K8S context support This enhancement allows for specifying the desired context to use for the initial K8S client auto-configuration. This allows users to more easily access alternative K8S contexts without having to first explicitly change their current context via kubectl. --- docs/running-on-kubernetes.md | 24 +++++++++++++++++-- .../org/apache/spark/deploy/k8s/Config.scala | 12 ++++++++++ .../k8s/SparkKubernetesClientFactory.scala | 21 ++++++++++------ 3 files changed, 48 insertions(+), 9 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 3453ee912205f..efdadab3bf88b 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -334,6 +334,16 @@ the Spark application. ## Kubernetes Features +### Configuration File + +Your Kubernetes config file typically lives under `.kube/config` in your home directory or in a location specified by the `KUBECONFIG` environment variable. Spark on Kubernetes will attempt to use this file to do an initial auto-configuration of the Kubernetes client used to interact with the Kubernetes cluster. A variety of Spark configuration properties are provided that allow further customising the client configuration e.g. using an alternative authentication method. + +### Contexts + +Kubernetes configuration files can contain multiple contexts that allow for switching between different clusters and/or user identities. By default Spark on Kubernetes will use your current context (which can be checked by running `kubectl config current-context`) when doing the initial auto-configuration of the Kubernetes client. + +In order to use an alternative context users can specify the desired context via the Spark configuration property `spark.kubernetes.context` e.g. `spark.kubernetes.context=minikube`. + ### Namespaces Kubernetes has the concept of [namespaces](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/). @@ -406,13 +416,23 @@ Some of these include: # Configuration -See the [configuration page](configuration.html) for information on Spark configurations. The following configurations are -specific to Spark on Kubernetes. +See the [configuration page](configuration.html) for information on Spark configurations. The following configurations are specific to Spark on Kubernetes. #### Spark Properties + + + + + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala index e8bf16df190e8..4cca1e22bd108 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala @@ -24,6 +24,18 @@ import org.apache.spark.internal.config.ConfigBuilder private[spark] object Config extends Logging { + val KUBERNETES_CONTEXT = + ConfigBuilder("spark.kubernetes.context") + .doc("The desired context from your K8S config file used to configure the K8S " + + "client for interacting with the cluster. Useful if your config file has " + + "multiple clusters or user identities defined. The client library used " + + "locates the config file via the KUBECONFIG environment variable or by defaulting " + + "to .kube/config under your home directory. If not specified then your current " + + "context is used. You can always override specific aspects of the config file " + + "provided configuration using other Spark on K8S configuration options.") + .stringConf + .createOptional + val KUBERNETES_NAMESPACE = ConfigBuilder("spark.kubernetes.namespace") .doc("The namespace that will be used for running the driver and executor pods.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala index 77bd66b608e7c..90a3241ff7806 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala @@ -20,12 +20,14 @@ import java.io.File import com.google.common.base.Charsets import com.google.common.io.Files +import io.fabric8.kubernetes.client.Config.autoConfigure import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient, KubernetesClient} import io.fabric8.kubernetes.client.utils.HttpClientUtils import okhttp3.Dispatcher - +import org.apache.commons.lang3.StringUtils import org.apache.spark.SparkConf import org.apache.spark.deploy.k8s.Config._ +import org.apache.spark.internal.Logging import org.apache.spark.util.ThreadUtils /** @@ -33,7 +35,7 @@ import org.apache.spark.util.ThreadUtils * parse configuration keys, similar to the manner in which Spark's SecurityManager parses SSL * options for different components. */ -private[spark] object SparkKubernetesClientFactory { +private[spark] object SparkKubernetesClientFactory extends Logging { def createKubernetesClient( master: String, @@ -42,9 +44,6 @@ private[spark] object SparkKubernetesClientFactory { sparkConf: SparkConf, defaultServiceAccountToken: Option[File], defaultServiceAccountCaCert: Option[File]): KubernetesClient = { - - // TODO [SPARK-25887] Support configurable context - val oauthTokenFileConf = s"$kubernetesAuthConfPrefix.$OAUTH_TOKEN_FILE_CONF_SUFFIX" val oauthTokenConf = s"$kubernetesAuthConfPrefix.$OAUTH_TOKEN_CONF_SUFFIX" val oauthTokenFile = sparkConf.getOption(oauthTokenFileConf) @@ -67,8 +66,16 @@ private[spark] object SparkKubernetesClientFactory { val dispatcher = new Dispatcher( ThreadUtils.newDaemonCachedThreadPool("kubernetes-dispatcher")) - // TODO [SPARK-25887] Create builder in a way that respects configurable context - val config = new ConfigBuilder() + // Allow for specifying a context used to auto-configure from the users K8S config file + val kubeContext = sparkConf.get(KUBERNETES_CONTEXT).filter(c => StringUtils.isNotBlank(c)) + logInfo(s"Auto-configuring K8S client using " + + s"${if (kubeContext.isEmpty) s"context ${kubeContext.get}" else "current context"}" + + s" from users K8S config file") + + // Start from an auto-configured config with the desired context + // Fabric 8 uses null to indicate that the users current context should be used so if no + // explicit setting pass null + val config = new ConfigBuilder(autoConfigure(kubeContext.getOrElse(null))) .withApiVersion("v1") .withMasterUrl(master) .withWebsocketPingInterval(0) From 8fc744d5d5cf0da378490084067a53cd6a8b4353 Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Mon, 3 Dec 2018 15:51:21 -0800 Subject: [PATCH 2/6] [SPARK-25887][K8S] Clean up issues - Simplify filter invocation - Fix logging message logic error --- .../spark/deploy/k8s/SparkKubernetesClientFactory.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala index 90a3241ff7806..a3d7e0d777514 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala @@ -67,9 +67,9 @@ private[spark] object SparkKubernetesClientFactory extends Logging { ThreadUtils.newDaemonCachedThreadPool("kubernetes-dispatcher")) // Allow for specifying a context used to auto-configure from the users K8S config file - val kubeContext = sparkConf.get(KUBERNETES_CONTEXT).filter(c => StringUtils.isNotBlank(c)) + val kubeContext = sparkConf.get(KUBERNETES_CONTEXT).filter(_.nonEmpty) logInfo(s"Auto-configuring K8S client using " + - s"${if (kubeContext.isEmpty) s"context ${kubeContext.get}" else "current context"}" + + s"${if (!kubeContext.isEmpty) s"context ${kubeContext.get}" else "current context"}" + s" from users K8S config file") // Start from an auto-configured config with the desired context From f8b06329f78c328ff7b78184ecebcd4fc48a6b70 Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Tue, 4 Dec 2018 10:16:07 -0800 Subject: [PATCH 3/6] [SPARK-25887][K8S] Avoid possible NPE --- .../apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala index a3d7e0d777514..21b6e8fe02c0b 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala @@ -69,7 +69,7 @@ private[spark] object SparkKubernetesClientFactory extends Logging { // Allow for specifying a context used to auto-configure from the users K8S config file val kubeContext = sparkConf.get(KUBERNETES_CONTEXT).filter(_.nonEmpty) logInfo(s"Auto-configuring K8S client using " + - s"${if (!kubeContext.isEmpty) s"context ${kubeContext.get}" else "current context"}" + + s"${if (kubeContext.isDefined) s"context ${kubeContext.getOrElse("?")}" else "current context"}" + s" from users K8S config file") // Start from an auto-configured config with the desired context From c76f752bb7b5a658c285d1fb0ac7f8549131b7b3 Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Tue, 4 Dec 2018 10:26:27 -0800 Subject: [PATCH 4/6] [SPARK-25887][K8S] Address PR comments - Fix doc typos - Remove unused import --- docs/running-on-kubernetes.md | 2 +- .../apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index efdadab3bf88b..e23f28cf755c3 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -430,7 +430,7 @@ See the [configuration page](configuration.html) for information on Spark config auto-configuration of the Kubernetes client library. When not specified then the users current context is used. NB: Many of the auto-configured settings can be overridden by the use of other Spark - configuration properties e.g. spark.kuberentes.namespace. + configuration properties e.g. spark.kubernetes.namespace. diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala index 21b6e8fe02c0b..18b4ce63bef31 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala @@ -24,7 +24,6 @@ import io.fabric8.kubernetes.client.Config.autoConfigure import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient, KubernetesClient} import io.fabric8.kubernetes.client.utils.HttpClientUtils import okhttp3.Dispatcher -import org.apache.commons.lang3.StringUtils import org.apache.spark.SparkConf import org.apache.spark.deploy.k8s.Config._ import org.apache.spark.internal.Logging From 5e4c340c17f46b6b0f4adeb6ea56289c8ed90a37 Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Wed, 19 Dec 2018 17:47:37 +0000 Subject: [PATCH 5/6] Fix scalastyle errors --- .../spark/deploy/k8s/SparkKubernetesClientFactory.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala index 18b4ce63bef31..63ebf9e37f35a 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala @@ -20,10 +20,11 @@ import java.io.File import com.google.common.base.Charsets import com.google.common.io.Files -import io.fabric8.kubernetes.client.Config.autoConfigure import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient, KubernetesClient} +import io.fabric8.kubernetes.client.Config.autoConfigure import io.fabric8.kubernetes.client.utils.HttpClientUtils import okhttp3.Dispatcher + import org.apache.spark.SparkConf import org.apache.spark.deploy.k8s.Config._ import org.apache.spark.internal.Logging @@ -68,7 +69,8 @@ private[spark] object SparkKubernetesClientFactory extends Logging { // Allow for specifying a context used to auto-configure from the users K8S config file val kubeContext = sparkConf.get(KUBERNETES_CONTEXT).filter(_.nonEmpty) logInfo(s"Auto-configuring K8S client using " + - s"${if (kubeContext.isDefined) s"context ${kubeContext.getOrElse("?")}" else "current context"}" + + s"${if (kubeContext.isDefined) s"context ${kubeContext.getOrElse("?")}" + else "current context"}" + s" from users K8S config file") // Start from an auto-configured config with the desired context From 78324805ac9b36f38b770aaa6778c986237713a8 Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Tue, 22 Jan 2019 11:26:51 +0000 Subject: [PATCH 6/6] [SPARK-25887][K8S] Simplify logging statement --- .../apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala index 63ebf9e37f35a..1e3fb4d38f031 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala @@ -69,8 +69,7 @@ private[spark] object SparkKubernetesClientFactory extends Logging { // Allow for specifying a context used to auto-configure from the users K8S config file val kubeContext = sparkConf.get(KUBERNETES_CONTEXT).filter(_.nonEmpty) logInfo(s"Auto-configuring K8S client using " + - s"${if (kubeContext.isDefined) s"context ${kubeContext.getOrElse("?")}" - else "current context"}" + + kubeContext.map("context " + _).getOrElse("current context") + s" from users K8S config file") // Start from an auto-configured config with the desired context
Property NameDefaultMeaning
spark.kubernetes.context(none) + The context from the user Kubernetes configuration file used for the initial + auto-configuration of the Kubernetes client library. When not specified then + the users current context is used. NB: Many of the + auto-configured settings can be overridden by the use of other Spark + configuration properties e.g. spark.kuberentes.namespace. +
spark.kubernetes.namespace default