From 2ffed5916291077548e56ea733625dd715e1f0f3 Mon Sep 17 00:00:00 2001 From: mcheah Date: Tue, 6 Dec 2016 14:25:59 -0800 Subject: [PATCH 001/156] [SPARK-18278] Minimal support for submitting to Kubernetes. --- .../org/apache/spark/deploy/SparkSubmit.scala | 36 +- .../spark/deploy/SparkSubmitArguments.scala | 30 ++ kubernetes/core/pom.xml | 101 +++++ ...che.spark.scheduler.ExternalClusterManager | 1 + .../spark/deploy/kubernetes/Client.scala | 355 ++++++++++++++++++ .../kubernetes/KubernetesClientBuilder.scala | 53 +++ .../spark/deploy/kubernetes/Retry.scala | 42 +++ .../rest/KubernetesRestProtocolMessages.scala | 58 +++ .../rest/kubernetes/HttpClientUtil.scala | 57 +++ .../kubernetes/KubernetesSparkRestApi.scala | 39 ++ .../KubernetesSparkRestServer.scala | 274 ++++++++++++++ .../kubernetes/KubernetesClusterManager.scala | 42 +++ .../KubernetesClusterSchedulerBackend.scala | 264 +++++++++++++ kubernetes/docker-minimal-bundle/pom.xml | 137 +++++++ .../src/main/assembly/driver-assembly.xml | 84 +++++ .../src/main/assembly/executor-assembly.xml | 84 +++++ .../src/main/docker/driver/Dockerfile | 26 ++ .../src/main/docker/executor/Dockerfile | 26 ++ .../integration-tests-spark-jobs/pom.xml | 45 +++ .../jobs/SparkPiWithInfiniteWait.scala | 50 +++ kubernetes/integration-tests/pom.xml | 206 ++++++++++ .../integrationtest/KubernetesSuite.scala | 157 ++++++++ .../docker/SparkDockerImageBuilder.scala | 59 +++ .../integrationtest/minikube/Minikube.scala | 173 +++++++++ .../restapis/SparkRestApiV1.scala | 50 +++ .../launcher/SparkSubmitOptionParser.java | 10 + pom.xml | 49 +++ 27 files changed, 2505 insertions(+), 3 deletions(-) create mode 100644 kubernetes/core/pom.xml create mode 100644 kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager create mode 100644 kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala create mode 100644 kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala create mode 100644 kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala create mode 100644 kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala create mode 100644 kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala create mode 100644 kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala create mode 100644 kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala create mode 100644 kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala create mode 100644 kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala create mode 100644 kubernetes/docker-minimal-bundle/pom.xml create mode 100644 kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml create mode 100644 kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml create mode 100644 kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile create mode 100644 kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile create mode 100644 kubernetes/integration-tests-spark-jobs/pom.xml create mode 100644 kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala create mode 100644 kubernetes/integration-tests/pom.xml create mode 100644 kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala create mode 100644 kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala create mode 100644 kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala create mode 100644 kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/restapis/SparkRestApiV1.scala diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index c70061bc5b5bc..598bafcab81dc 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -70,7 +70,8 @@ object SparkSubmit { private val STANDALONE = 2 private val MESOS = 4 private val LOCAL = 8 - private val ALL_CLUSTER_MGRS = YARN | STANDALONE | MESOS | LOCAL + private val KUBERNETES = 16 + private val ALL_CLUSTER_MGRS = YARN | STANDALONE | MESOS | KUBERNETES | LOCAL // Deploy modes private val CLIENT = 1 @@ -239,6 +240,7 @@ object SparkSubmit { YARN case m if m.startsWith("spark") => STANDALONE case m if m.startsWith("mesos") => MESOS + case m if m.startsWith("kubernetes") => KUBERNETES case m if m.startsWith("local") => LOCAL case _ => printErrorAndExit("Master must either be yarn or start with spark, mesos, local") @@ -284,6 +286,7 @@ object SparkSubmit { } val isYarnCluster = clusterManager == YARN && deployMode == CLUSTER val isMesosCluster = clusterManager == MESOS && deployMode == CLUSTER + val isKubernetesCluster = clusterManager == KUBERNETES && deployMode == CLUSTER // Resolve maven dependencies if there are any and add classpath to jars. Add them to py-files // too for packages that include Python code @@ -330,6 +333,10 @@ object SparkSubmit { // The following modes are not supported or applicable (clusterManager, deployMode) match { + case (KUBERNETES, CLIENT) => + printErrorAndExit("Client mode is currently not supported for Kubernetes.") + case (KUBERNETES, CLUSTER) if args.isPython || args.isR => + printErrorAndExit("Kubernetes does not currently support python or R applications.") case (STANDALONE, CLUSTER) if args.isPython => printErrorAndExit("Cluster deploy mode is currently not supported for python " + "applications on standalone clusters.") @@ -463,7 +470,17 @@ object SparkSubmit { OptionAssigner(args.principal, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.principal"), OptionAssigner(args.keytab, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.keytab"), - // Other options + // Kubernetes only + OptionAssigner(args.kubernetesMaster, KUBERNETES, ALL_DEPLOY_MODES, + sysProp = "spark.kubernetes.master"), + OptionAssigner(args.kubernetesNamespace, KUBERNETES, ALL_DEPLOY_MODES, + sysProp = "spark.kubernetes.namespace"), + OptionAssigner(args.kubernetesUploadJars, KUBERNETES, CLUSTER, + sysProp = "spark.kubernetes.driver.uploads.jars"), + OptionAssigner(args.kubernetesUploadDriverExtraClasspath, KUBERNETES, CLUSTER, + sysProp = "spark.kubernetes.driver.uploads.driverExtraClasspath"), + + // Other options OptionAssigner(args.executorCores, STANDALONE | YARN, ALL_DEPLOY_MODES, sysProp = "spark.executor.cores"), OptionAssigner(args.executorMemory, STANDALONE | MESOS | YARN, ALL_DEPLOY_MODES, @@ -506,8 +523,9 @@ object SparkSubmit { // Add the application jar automatically so the user doesn't have to call sc.addJar // For YARN cluster mode, the jar is already distributed on each node as "app.jar" + // In Kubernetes cluster mode, the jar will be uploaded by the client separately. // For python and R files, the primary resource is already distributed as a regular file - if (!isYarnCluster && !args.isPython && !args.isR) { + if (!isYarnCluster && !isKubernetesCluster && !args.isPython && !args.isR) { var jars = sysProps.get("spark.jars").map(x => x.split(",").toSeq).getOrElse(Seq.empty) if (isUserJar(args.primaryResource)) { jars = jars ++ Seq(args.primaryResource) @@ -606,6 +624,13 @@ object SparkSubmit { } } + if (isKubernetesCluster) { + childMainClass = "org.apache.spark.deploy.kubernetes.Client" + childArgs += args.primaryResource + childArgs += args.mainClass + childArgs ++= args.childArgs + } + // Load any properties specified through --conf and the default properties file for ((k, v) <- args.sparkProperties) { sysProps.getOrElseUpdate(k, v) @@ -829,6 +854,7 @@ private[spark] object SparkSubmitUtils { /** * Represents a Maven Coordinate + * * @param groupId the groupId of the coordinate * @param artifactId the artifactId of the coordinate * @param version the version of the coordinate @@ -840,6 +866,7 @@ private[spark] object SparkSubmitUtils { /** * Extracts maven coordinates from a comma-delimited string. Coordinates should be provided * in the format `groupId:artifactId:version` or `groupId/artifactId:version`. + * * @param coordinates Comma-delimited string of maven coordinates * @return Sequence of Maven coordinates */ @@ -870,6 +897,7 @@ private[spark] object SparkSubmitUtils { /** * Extracts maven coordinates from a comma-delimited string + * * @param remoteRepos Comma-delimited string of remote repositories * @param ivySettings The Ivy settings for this session * @return A ChainResolver used by Ivy to search for and resolve dependencies. @@ -934,6 +962,7 @@ private[spark] object SparkSubmitUtils { /** * Output a comma-delimited list of paths for the downloaded jars to be added to the classpath * (will append to jars in SparkSubmit). + * * @param artifacts Sequence of dependencies that were resolved and retrieved * @param cacheDirectory directory where jars are cached * @return a comma-delimited list of paths for the dependencies @@ -990,6 +1019,7 @@ private[spark] object SparkSubmitUtils { /** * Resolves any dependencies that were supplied through maven coordinates + * * @param coordinates Comma-delimited string of maven coordinates * @param remoteRepos Comma-delimited string of remote repositories other than maven central * @param ivyPath The path to the local ivy repository diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index f1761e7c1ec92..4244742aad14c 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -71,6 +71,12 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S var principal: String = null var keytab: String = null + // Kubernetes only + var kubernetesMaster: String = null + var kubernetesNamespace: String = null + var kubernetesUploadJars: String = null + var kubernetesUploadDriverExtraClasspath: String = null + // Standalone cluster mode only var supervise: Boolean = false var driverCores: String = null @@ -186,6 +192,18 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S .getOrElse(sparkProperties.get("spark.executor.instances").orNull) keytab = Option(keytab).orElse(sparkProperties.get("spark.yarn.keytab")).orNull principal = Option(principal).orElse(sparkProperties.get("spark.yarn.principal")).orNull + kubernetesMaster = Option(kubernetesMaster) + .orElse(sparkProperties.get("spark.kubernetes.master")) + .orNull + kubernetesNamespace = Option(kubernetesNamespace) + .orElse(sparkProperties.get("spark.kubernetes.namespace")) + .orNull + kubernetesUploadJars = Option(kubernetesUploadJars) + .orElse(sparkProperties.get("spark.kubernetes.driver.uploads.jars")) + .orNull + kubernetesUploadDriverExtraClasspath = Option(kubernetesUploadDriverExtraClasspath) + .orElse(sparkProperties.get("spark.kubernetes.driver.uploads.driverExtraClasspath")) + .orNull // Try to set main class from JAR if no --class argument is given if (mainClass == null && !isPython && !isR && primaryResource != null) { @@ -426,6 +444,18 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S case KEYTAB => keytab = value + case KUBERNETES_MASTER => + kubernetesMaster = value + + case KUBERNETES_NAMESPACE => + kubernetesNamespace = value + + case KUBERNETES_UPLOAD_JARS => + kubernetesUploadJars = value + + case KUBERNETES_UPLOAD_DRIVER_EXTRA_CLASSPATH => + kubernetesUploadDriverExtraClasspath = value + case HELP => printUsageAndExit(0) diff --git a/kubernetes/core/pom.xml b/kubernetes/core/pom.xml new file mode 100644 index 0000000000000..9c7eb52b2680a --- /dev/null +++ b/kubernetes/core/pom.xml @@ -0,0 +1,101 @@ + + + + 4.0.0 + + org.apache.spark + spark-parent_2.11 + 2.1.0-SNAPSHOT + ../../pom.xml + + + spark-kubernetes_2.11 + jar + Spark Project Kubernetes + + kubernetes + 1.4.17 + + + + + org.apache.spark + spark-core_${scala.binary.version} + ${project.version} + + + + org.apache.spark + spark-core_${scala.binary.version} + ${project.version} + test-jar + test + + + + io.fabric8 + kubernetes-client + ${kubernetes.client.version} + + + com.netflix.feign + feign-core + + + com.netflix.feign + feign-okhttp + + + com.netflix.feign + feign-jackson + + + com.netflix.feign + feign-jaxrs + + + javax.ws.rs + jsr311-api + + + + + com.fasterxml.jackson.jaxrs + jackson-jaxrs-json-provider + + + javax.ws.rs + javax.ws.rs-api + + + + com.google.guava + guava + + + + + + + + target/scala-${scala.binary.version}/classes + target/scala-${scala.binary.version}/test-classes + + + + diff --git a/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager b/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager new file mode 100644 index 0000000000000..55e7e38b28a08 --- /dev/null +++ b/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager @@ -0,0 +1 @@ +org.apache.spark.scheduler.cluster.kubernetes.KubernetesClusterManager diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala new file mode 100644 index 0000000000000..4ee00e8802080 --- /dev/null +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -0,0 +1,355 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import java.io.File +import java.security.SecureRandom +import java.util.concurrent.{Executors, TimeUnit} +import javax.net.ssl.X509TrustManager + +import com.google.common.io.Files +import com.google.common.util.concurrent.{SettableFuture, ThreadFactoryBuilder} +import io.fabric8.kubernetes.api.model._ +import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient, KubernetesClientException, Watch, Watcher} +import io.fabric8.kubernetes.client.Watcher.Action +import io.fabric8.kubernetes.client.internal.SSLUtils +import org.apache.commons.codec.binary.Base64 +import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer +import scala.concurrent.ExecutionContext +import scala.concurrent.duration.DurationInt +import scala.util.Success + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.rest.{AppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, UploadedAppResource} +import org.apache.spark.deploy.rest.kubernetes._ +import org.apache.spark.internal.Logging +import org.apache.spark.util.Utils + +private[spark] class Client( + sparkConf: SparkConf, + mainClass: String, + mainAppResource: String, + appArgs: Array[String]) extends Logging { + import Client._ + + private val namespace = sparkConf.getOption("spark.kubernetes.namespace").getOrElse( + throw new IllegalArgumentException("Namespace must be provided in spark.kubernetes.namespace")) + private val master = sparkConf + .getOption("spark.kubernetes.master") + .getOrElse("Master must be provided in spark.kubernetes.master") + + private val launchTime = System.currentTimeMillis + private val kubernetesAppId = sparkConf.getOption("spark.app.name") + .orElse(sparkConf.getOption("spark.app.id")) + .getOrElse(s"spark-$launchTime") + + private val secretName = s"spark-submission-server-secret-$kubernetesAppId" + private val driverLauncherSelectorValue = s"driver-launcher-$launchTime" + // TODO set precise version by default + private val driverDockerImage = sparkConf.get( + "spark.kubernetes.driver.docker.image", "spark-driver:latest") + private val uploadedDriverExtraClasspath = sparkConf + .getOption("spark.kubernetes.driver.uploads.driverExtraClasspath") + private val uploadedJars = sparkConf.getOption("spark.kubernetes.driver.uploads.jars") + + private val secretBytes = new Array[Byte](128) + SECURE_RANDOM.nextBytes(secretBytes) + private val secretBase64String = Base64.encodeBase64String(secretBytes) + + private implicit val retryableExecutionContext = ExecutionContext + .fromExecutorService( + Executors.newSingleThreadExecutor(new ThreadFactoryBuilder() + .setNameFormat("kubernetes-client-retryable-futures-%d") + .setDaemon(true) + .build())) + + def run(): Unit = { + var k8ConfBuilder = new ConfigBuilder() + .withApiVersion("v1") + .withMasterUrl(master) + .withNamespace(namespace) + sparkConf.getOption("spark.kubernetes.submit.caCertFile").foreach { + f => k8ConfBuilder = k8ConfBuilder.withCaCertFile(f) + } + sparkConf.getOption("spark.kubernetes.submit.clientKeyFile").foreach { + f => k8ConfBuilder = k8ConfBuilder.withClientKeyFile(f) + } + sparkConf.getOption("spark.kubernetes.submit.clientCertFile").foreach { + f => k8ConfBuilder = k8ConfBuilder.withClientCertFile(f) + } + + val k8ClientConfig = k8ConfBuilder.build + Utils.tryWithResource(new DefaultKubernetesClient(k8ClientConfig))(kubernetesClient => { + val secret = kubernetesClient.secrets().createNew() + .withNewMetadata() + .withName(secretName) + .endMetadata() + .withData(Map((SUBMISSION_SERVER_SECRET_NAME, secretBase64String)).asJava) + .withType("Opaque") + .done() + try { + val selectors = Map(DRIVER_LAUNCHER_SELECTOR_LABEL -> driverLauncherSelectorValue).asJava + val uiPort = sparkConf + .getOption("spark.ui.port") + .map(_.toInt) + .getOrElse(DEFAULT_UI_PORT) + val (servicePorts, containerPorts) = configurePorts(uiPort) + val service = kubernetesClient.services().createNew() + .withNewMetadata() + .withName(kubernetesAppId) + .endMetadata() + .withNewSpec() + .withSelector(selectors) + .withPorts(servicePorts.asJava) + .endSpec() + .done() + sparkConf.set("spark.kubernetes.driver.service.name", service.getMetadata.getName) + sparkConf.setIfMissing("spark.driver.port", DRIVER_PORT.toString) + sparkConf.setIfMissing("spark.blockmanager.port", BLOCKMANAGER_PORT.toString) + val submitRequest = buildSubmissionRequest() + val submitCompletedFuture = SettableFuture.create[Boolean] + val secretDirectory = s"/var/run/secrets/spark-submission/$kubernetesAppId" + + val podWatcher = new Watcher[Pod] { + override def eventReceived(action: Action, t: Pod): Unit = { + if ((action == Action.ADDED || action == Action.MODIFIED) + && t.getStatus.getPhase == "Running" + && !submitCompletedFuture.isDone) { + t.getStatus + .getContainerStatuses + .asScala + .find(status => + status.getName == DRIVER_LAUNCHER_CONTAINER_NAME && status.getReady) match { + case Some(status) => + try { + val driverLauncher = getDriverLauncherService( + k8ClientConfig, master) + val ping = Retry.retry(5, 5.seconds) { + driverLauncher.ping() + } + ping onFailure { + case t: Throwable => + if (!submitCompletedFuture.isDone) { + submitCompletedFuture.setException(t) + } + } + val submitComplete = ping andThen { + case Success(_) => + driverLauncher.create(submitRequest) + submitCompletedFuture.set(true) + } + submitComplete onFailure { + case t: Throwable => + if (!submitCompletedFuture.isDone) { + submitCompletedFuture.setException(t) + } + } + } catch { + case e: Throwable => + if (!submitCompletedFuture.isDone) { + submitCompletedFuture.setException(e) + throw e + } + } + case None => + } + } + } + + override def onClose(e: KubernetesClientException): Unit = { + if (!submitCompletedFuture.isDone) { + submitCompletedFuture.setException(e) + } + } + } + + def createDriverPod(unused: Watch): Unit = { + kubernetesClient.pods().createNew() + .withNewMetadata() + .withName(kubernetesAppId) + .withLabels(selectors) + .endMetadata() + .withNewSpec() + .withRestartPolicy("OnFailure") + .addNewVolume() + .withName(s"spark-submission-secret-volume") + .withNewSecret() + .withSecretName(secret.getMetadata.getName) + .endSecret() + .endVolume + .addNewContainer() + .withName(DRIVER_LAUNCHER_CONTAINER_NAME) + .withImage(driverDockerImage) + .withImagePullPolicy("IfNotPresent") + .addNewVolumeMount() + .withName("spark-submission-secret-volume") + .withMountPath(secretDirectory) + .withReadOnly(true) + .endVolumeMount() + .addNewEnv() + .withName("SPARK_SUBMISSION_SECRET_LOCATION") + .withValue(s"$secretDirectory/$SUBMISSION_SERVER_SECRET_NAME") + .endEnv() + .addNewEnv() + .withName("SPARK_DRIVER_LAUNCHER_SERVER_PORT") + .withValue(DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT.toString) + .endEnv() + .withPorts(containerPorts.asJava) + .endContainer() + .endSpec() + .done() + submitCompletedFuture.get(30, TimeUnit.SECONDS) + } + + Utils.tryWithResource(kubernetesClient + .pods() + .withLabels(selectors) + .watch(podWatcher)) { createDriverPod } + } finally { + kubernetesClient.secrets().delete(secret) + } + }) + } + + private def configurePorts(uiPort: Int): (Seq[ServicePort], Seq[ContainerPort]) = { + val servicePorts = new ArrayBuffer[ServicePort] + val containerPorts = new ArrayBuffer[ContainerPort] + + def addPortToServiceAndContainer(portName: String, portValue: Int): Unit = { + servicePorts += new ServicePortBuilder() + .withName(portName) + .withPort(portValue) + .withNewTargetPort(portValue) + .build() + containerPorts += new ContainerPortBuilder() + .withContainerPort(portValue) + .build() + } + + addPortToServiceAndContainer( + DRIVER_LAUNCHER_SERVICE_PORT_NAME, + DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT) + addPortToServiceAndContainer( + DRIVER_PORT_NAME, + sparkConf + .getOption("spark.driver.port") + .map(_.toInt) + .getOrElse(DRIVER_PORT)) + addPortToServiceAndContainer( + BLOCKMANAGER_PORT_NAME, + sparkConf + .getOption("spark.blockmanager.port") + .map(_.toInt) + .getOrElse(BLOCKMANAGER_PORT)) + + addPortToServiceAndContainer(UI_PORT_NAME, uiPort) + (servicePorts.toSeq, containerPorts.toSeq) + } + + private def buildSubmissionRequest(): KubernetesCreateSubmissionRequest = { + val appResourceUri = Utils.resolveURI(mainAppResource) + val resolvedAppResource: AppResource = appResourceUri.getScheme match { + case "file" | null => + val appFile = new File(appResourceUri.getPath) + if (!appFile.isFile) { + throw new IllegalStateException("Provided local file path does not exist" + + s" or is not a file: ${appFile.getAbsolutePath}") + } + val fileBytes = Files.toByteArray(appFile) + val fileBase64 = Base64.encodeBase64String(fileBytes) + UploadedAppResource(resourceBase64Contents = fileBase64, name = appFile.getName) + case other => RemoteAppResource(other) + } + + val uploadDriverExtraClasspathBase64Contents = getFileContents(uploadedDriverExtraClasspath) + val uploadJarsBase64Contents = getFileContents(uploadedJars) + KubernetesCreateSubmissionRequest( + appResource = resolvedAppResource, + mainClass = mainClass, + appArgs = appArgs, + secret = secretBase64String, + sparkProperties = sparkConf.getAll.toMap, + uploadedDriverExtraClasspathBase64Contents = uploadDriverExtraClasspathBase64Contents, + uploadedJarsBase64Contents = uploadJarsBase64Contents) + } + + def getFileContents(maybeFilePaths: Option[String]): Array[(String, String)] = { + maybeFilePaths + .map(_.split(",").map(filePath => { + val driverExtraClasspathFile = new File(filePath) + if (!driverExtraClasspathFile.isFile) { + throw new IllegalStateException("Provided file to upload for driver extra classpath" + + s" does not exist or is not a file: $filePath") + } else { + val fileBytes = Files.toByteArray(driverExtraClasspathFile) + val fileBase64 = Base64.encodeBase64String(fileBytes) + (driverExtraClasspathFile.getName, fileBase64) + } + })).getOrElse(Array.empty[(String, String)]) + } + + private def getDriverLauncherService( + k8ClientConfig: Config, + kubernetesMaster: String): KubernetesSparkRestApi = { + val url = s"${ + Array[String]( + kubernetesMaster, + "api", "v1", "proxy", + "namespaces", namespace, + "services", kubernetesAppId).mkString("/")}" + + s":$DRIVER_LAUNCHER_SERVICE_PORT_NAME/" + + val sslContext = SSLUtils.sslContext(k8ClientConfig) + val trustManager = SSLUtils.trustManagers( + k8ClientConfig)(0).asInstanceOf[X509TrustManager] + HttpClientUtil.createClient[KubernetesSparkRestApi]( + uri = url, + sslSocketFactory = sslContext.getSocketFactory, + trustContext = trustManager) + } +} + +private object Client { + + private val SUBMISSION_SERVER_SECRET_NAME = "spark-submission-server-secret" + private val DRIVER_LAUNCHER_SELECTOR_LABEL = "driver-launcher-selector" + private val DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT = 7077 + private val DRIVER_PORT = 7078 + private val BLOCKMANAGER_PORT = 7079 + private val DEFAULT_UI_PORT = 4040 + private val UI_PORT_NAME = "spark-ui-port" + private val DRIVER_LAUNCHER_SERVICE_PORT_NAME = "driver-launcher-port" + private val DRIVER_PORT_NAME = "driver-port" + private val BLOCKMANAGER_PORT_NAME = "block-manager-port" + private val DRIVER_LAUNCHER_CONTAINER_NAME = "spark-kubernetes-driver-launcher" + private val SECURE_RANDOM = new SecureRandom() + + def main(args: Array[String]): Unit = { + require(args.length >= 2, s"Too few arguments. Usage: ${getClass.getName} " + + s" []") + val mainAppResource = args(0) + val mainClass = args(1) + val appArgs = args.drop(2) + val sparkConf = new SparkConf(true) + new Client( + mainAppResource = mainAppResource, + mainClass = mainClass, + sparkConf = sparkConf, + appArgs = appArgs).run() + } +} diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala new file mode 100644 index 0000000000000..4c715c86cc7f9 --- /dev/null +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import java.io.File + +import com.google.common.base.Charsets +import com.google.common.io.Files +import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient} + +private[spark] object KubernetesClientBuilder { + private val API_SERVER_TOKEN = new File("/var/run/secrets/kubernetes.io/serviceaccount/token") + private val CA_CERT_FILE = new File("/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") + + /** + * Creates a {@link KubernetesClient}, expecting to be from + * within the context of a pod. When doing so, credentials files + * are picked up from canonical locations, as they are injected + * into the pod's disk space. + */ + def buildFromWithinPod( + kubernetesMaster: String, + kubernetesNamespace: String): DefaultKubernetesClient = { + var clientConfigBuilder = new ConfigBuilder() + .withApiVersion("v1") + .withMasterUrl(kubernetesMaster) + .withNamespace(kubernetesNamespace) + + if (CA_CERT_FILE.isFile) { + clientConfigBuilder = clientConfigBuilder.withCaCertFile(CA_CERT_FILE.getAbsolutePath) + } + + if (API_SERVER_TOKEN.isFile) { + clientConfigBuilder = clientConfigBuilder.withOauthToken( + Files.toString(API_SERVER_TOKEN, Charsets.UTF_8)) + } + new DefaultKubernetesClient(clientConfigBuilder.build) + } +} diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala new file mode 100644 index 0000000000000..e5ce0bcd606b2 --- /dev/null +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import scala.concurrent.{ExecutionContext, Future} +import scala.concurrent.duration.Duration + +private[spark] object Retry { + + private def retryableFuture[T] + (times: Int, interval: Duration) + (f: => Future[T]) + (implicit executionContext: ExecutionContext): Future[T] = { + f recoverWith { + case _ if times > 0 => { + Thread.sleep(interval.toMillis) + retryableFuture(times - 1, interval)(f) + } + } + } + + def retry[T] + (times: Int, interval: Duration) + (f: => T) + (implicit executionContext: ExecutionContext): Future[T] = { + retryableFuture(times, interval)(Future[T] { f }) + } +} diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala new file mode 100644 index 0000000000000..4b7bb66083f29 --- /dev/null +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest + +import com.fasterxml.jackson.annotation.{JsonSubTypes, JsonTypeInfo} + +import org.apache.spark.SPARK_VERSION + +// TODO: jars should probably be compressed. Shipping tarballs would be optimal. +case class KubernetesCreateSubmissionRequest( + val appResource: AppResource, + val mainClass: String, + val appArgs: Array[String], + val sparkProperties: Map[String, String], + val secret: String, + val uploadedDriverExtraClasspathBase64Contents: Array[(String, String)] + = Array.empty[(String, String)], + val uploadedJarsBase64Contents: Array[(String, String)] + = Array.empty[(String, String)]) extends SubmitRestProtocolRequest { + message = "create" + clientSparkVersion = SPARK_VERSION +} + +@JsonTypeInfo( + use = JsonTypeInfo.Id.NAME, + include = JsonTypeInfo.As.PROPERTY, + property = "type") +@JsonSubTypes(value = Array( + new JsonSubTypes.Type(value = classOf[UploadedAppResource], name = "UploadedAppResource"), + new JsonSubTypes.Type(value = classOf[RemoteAppResource], name = "RemoteAppResource"))) +abstract class AppResource + +case class UploadedAppResource( + resourceBase64Contents: String, + name: String = "spark-app-resource") extends AppResource + +case class RemoteAppResource(resource: String) extends AppResource + +class PingResponse extends SubmitRestProtocolResponse { + val text = "pong" + message = "pong" + serverSparkVersion = SPARK_VERSION +} + diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala new file mode 100644 index 0000000000000..eb7d411700829 --- /dev/null +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import javax.net.ssl.{SSLContext, SSLSocketFactory, X509TrustManager} + +import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import feign.Feign +import feign.Request.Options +import feign.jackson.{JacksonDecoder, JacksonEncoder} +import feign.jaxrs.JAXRSContract +import okhttp3.OkHttpClient +import scala.reflect.ClassTag + +import org.apache.spark.status.api.v1.JacksonMessageWriter + +private[spark] object HttpClientUtil { + + def createClient[T: ClassTag]( + uri: String, + sslSocketFactory: SSLSocketFactory = SSLContext.getDefault.getSocketFactory, + trustContext: X509TrustManager = null, + readTimeoutMillis: Int = 20000, + connectTimeoutMillis: Int = 20000): T = { + var httpClientBuilder = new OkHttpClient.Builder() + Option.apply(trustContext).foreach(context => { + httpClientBuilder = httpClientBuilder.sslSocketFactory(sslSocketFactory, context) + }) + val objectMapper = new ObjectMapper() + .registerModule(new DefaultScalaModule) + .setDateFormat(JacksonMessageWriter.makeISODateFormat) + objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) + val clazz = implicitly[ClassTag[T]].runtimeClass.asInstanceOf[Class[T]] + Feign.builder() + .client(new feign.okhttp.OkHttpClient(httpClientBuilder.build())) + .contract(new JAXRSContract) + .encoder(new JacksonEncoder(objectMapper)) + .decoder(new JacksonDecoder(objectMapper)) + .options(new Options(connectTimeoutMillis, readTimeoutMillis)) + .target(clazz, uri) + } +} diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala new file mode 100644 index 0000000000000..3cbcb16293b1d --- /dev/null +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import javax.ws.rs.{Consumes, GET, Path, POST, Produces} +import javax.ws.rs.core.MediaType + +import org.apache.spark.deploy.rest.{CreateSubmissionResponse, KubernetesCreateSubmissionRequest, PingResponse} + +@Path("/v1/submissions/") +trait KubernetesSparkRestApi { + + @POST + @Consumes(Array(MediaType.APPLICATION_JSON)) + @Produces(Array(MediaType.APPLICATION_JSON)) + @Path("/create") + def create(request: KubernetesCreateSubmissionRequest): CreateSubmissionResponse + + @GET + @Consumes(Array(MediaType.APPLICATION_JSON)) + @Produces(Array(MediaType.APPLICATION_JSON)) + @Path("/ping") + def ping(): PingResponse + +} diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala new file mode 100644 index 0000000000000..0a2e8176394ab --- /dev/null +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import java.io.File +import java.net.URI +import java.nio.file.Paths +import java.util.concurrent.CountDownLatch +import javax.servlet.http.{HttpServletRequest, HttpServletResponse} + +import com.google.common.io.Files +import org.apache.commons.codec.binary.Base64 +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.{SecurityManager, SPARK_VERSION, SparkConf} +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.deploy.rest._ +import org.apache.spark.util.{ShutdownHookManager, Utils} + +private case class KubernetesSparkRestServerArguments( + val host: Option[String] = None, + val port: Option[Int] = None, + val secretFile: Option[String] = None) { + def validate(): KubernetesSparkRestServerArguments = { + require(host.isDefined, "Hostname not set via --hostname.") + require(port.isDefined, "Port not set via --port") + require(secretFile.isDefined, "Secret file not set via --secret-file") + this + } +} + +private object KubernetesSparkRestServerArguments { + def fromArgsArray(inputArgs: Array[String]): KubernetesSparkRestServerArguments = { + var args = inputArgs.toList + var resolvedArguments = KubernetesSparkRestServerArguments() + while (args.nonEmpty) { + resolvedArguments = args match { + case "--hostname" :: value :: tail => + args = tail + resolvedArguments.copy(host = Some(value)) + case "--port" :: value :: tail => + args = tail + resolvedArguments.copy(port = Some(value.toInt)) + case "--secret-file" :: value :: tail => + args = tail + resolvedArguments.copy(secretFile = Some(value)) + // TODO polish usage message + case Nil => resolvedArguments + case unknown => throw new IllegalStateException(s"Unknown argument(s) found: $unknown") + } + } + resolvedArguments.validate() + } +} + +private[spark] class KubernetesSparkRestServer( + host: String, + port: Int, + conf: SparkConf, + expectedApplicationSecret: Array[Byte]) + extends RestSubmissionServer(host, port, conf) { + + private val javaExecutable = s"${System.getenv("JAVA_HOME")}/bin/java" + private val sparkHome = System.getenv("SPARK_HOME") + private val securityManager = new SecurityManager(conf) + override protected lazy val contextToServlet = Map[String, RestServlet]( + s"$baseContext/create/*" -> submitRequestServlet, + s"$baseContext/ping/*" -> pingServlet) + + private val pingServlet = new PingServlet + override protected val submitRequestServlet: SubmitRequestServlet + = new KubernetesSubmitRequestServlet + // TODO + override protected val statusRequestServlet: StatusRequestServlet = null + override protected val killRequestServlet: KillRequestServlet = null + + private class PingServlet extends RestServlet { + protected override def doGet( + request: HttpServletRequest, + response: HttpServletResponse): Unit = { + sendResponse(new PingResponse, response) + } + } + + private class KubernetesSubmitRequestServlet extends SubmitRequestServlet { + + // TODO validating the secret should be done as part of a header of the request. + // Instead here we have to specify the secret in the body. + override protected def handleSubmit( + requestMessageJson: String, + requestMessage: SubmitRestProtocolMessage, + responseServlet: HttpServletResponse): SubmitRestProtocolResponse = { + requestMessage match { + case KubernetesCreateSubmissionRequest( + appResource, + mainClass, + appArgs, + sparkProperties, + secret, + uploadedDriverExtraClasspath, + uploadedJars) => + val decodedSecret = Base64.decodeBase64(secret) + if (!expectedApplicationSecret.sameElements(decodedSecret)) { + responseServlet.setStatus(HttpServletResponse.SC_UNAUTHORIZED) + handleError("Unauthorized to submit application.") + } else { + val tempDir = Utils.createTempDir() + val appResourcePath = resolvedAppResource(appResource, tempDir) + val driverClasspathDirectory = new File(tempDir, "driver-extra-classpath") + if (!driverClasspathDirectory.mkdir) { + throw new IllegalStateException("Failed to create driver extra classpath" + + s" dir at ${driverClasspathDirectory.getAbsolutePath}") + } + val jarsDirectory = new File(tempDir, "jars") + if (!jarsDirectory.mkdir) { + throw new IllegalStateException("Failed to create jars dir at" + + s"${jarsDirectory.getAbsolutePath}") + } + val writtenDriverExtraClasspath = writeBase64ContentsToFiles( + uploadedDriverExtraClasspath, driverClasspathDirectory) + val writtenJars = writeBase64ContentsToFiles(uploadedJars, jarsDirectory) + val originalDriverExtraClasspath = sparkProperties.get("spark.driver.extraClassPath") + .map(_.split(",")) + .getOrElse(Array.empty[String]) + val resolvedDriverExtraClasspath = writtenDriverExtraClasspath ++ + originalDriverExtraClasspath + val originalJars = sparkProperties.get("spark.jars") + .map(_.split(",")) + .getOrElse(Array.empty[String]) + val resolvedJars = writtenJars ++ originalJars ++ Array(appResourcePath) + val sparkJars = new File(sparkHome, "jars").listFiles().map(_.getAbsolutePath) + val driverClasspath = resolvedDriverExtraClasspath ++ + resolvedJars ++ + sparkJars ++ + Array(appResourcePath) + val resolvedSparkProperties = new mutable.HashMap[String, String] + resolvedSparkProperties ++= sparkProperties + resolvedSparkProperties("spark.jars") = resolvedJars.mkString(",") + + val command = new ArrayBuffer[String] + command += javaExecutable + command += "-cp" + command += s"${driverClasspath.mkString(":")}" + for (prop <- resolvedSparkProperties) { + command += s"-D${prop._1}=${prop._2}" + } + val driverMemory = resolvedSparkProperties.getOrElse("spark.driver.memory", "1g") + command += s"-Xms$driverMemory" + command += s"-Xmx$driverMemory" + command += mainClass + command ++= appArgs + val pb = new ProcessBuilder(command: _*) + Paths.get(sparkHome, "logs").toFile.mkdirs + pb.redirectOutput(Paths.get(sparkHome, "logs", "stdout").toFile) + pb.redirectError(Paths.get(sparkHome, "logs", "stderr").toFile) + val process = pb.start() + ShutdownHookManager.addShutdownHook(() => { + logInfo("Received stop command, shutting down the running Spark application...") + process.destroy() + }) + val response = new CreateSubmissionResponse + response.success = true + response.submissionId = null + response.message = "success" + response.serverSparkVersion = SPARK_VERSION + response + } + case unexpected => + responseServlet.setStatus(HttpServletResponse.SC_BAD_REQUEST) + handleError(s"Received message of unexpected type ${unexpected.messageType}.") + } + } + + def resolvedAppResource(appResource: AppResource, tempDir: File): String = { + val appResourcePath = appResource match { + case UploadedAppResource(resourceContentsBase64, resourceName) => + val resourceFile = new File(tempDir, resourceName) + val resourceFilePath = resourceFile.getAbsolutePath + if (resourceFile.createNewFile()) { + val resourceContentsBytes = Base64.decodeBase64(resourceContentsBase64) + Files.write(resourceContentsBytes, resourceFile) + resourceFile.getAbsolutePath + } else { + throw new IllegalStateException(s"Failed to write main app resource file" + + s" to $resourceFilePath") + } + case RemoteAppResource(resource) => + Utils.fetchFile(resource, tempDir, conf, + securityManager, SparkHadoopUtil.get.newConfiguration(conf), + System.currentTimeMillis(), useCache = false) + val fileName = Utils.decodeFileNameInURI(URI.create(resource)) + val downloadedFile = new File(tempDir, fileName) + val downloadedFilePath = downloadedFile.getAbsolutePath + if (!downloadedFile.isFile) { + throw new IllegalStateException(s"Main app resource is not a file or" + + s" does not exist at $downloadedFilePath") + } + downloadedFilePath + } + appResourcePath + } + } + + private def writeBase64ContentsToFiles( + filesBase64Contents: Array[(String, String)], + rootDir: File): Seq[String] = { + val resolvedFileNames = new scala.collection.mutable.HashSet[String] + val resolvedFilePaths = new ArrayBuffer[String] + for (file <- filesBase64Contents) { + var currentFileName = file._1 + var deduplicationCounter = 1 + while (resolvedFileNames.contains(currentFileName)) { + // Prepend the deduplication counter so as to not mess with the extension + currentFileName = s"$deduplicationCounter-$currentFileName" + deduplicationCounter += 1 + } + val resolvedFile = new File(rootDir, currentFileName) + val resolvedFilePath = resolvedFile.getAbsolutePath + if (resolvedFile.createNewFile()) { + val fileContents = Base64.decodeBase64(file._2) + Files.write(fileContents, resolvedFile) + } else { + throw new IllegalStateException(s"Could not write jar file to $resolvedFilePath") + } + resolvedFileNames += currentFileName + resolvedFilePaths += resolvedFilePath + } + resolvedFilePaths.toSeq + } +} + +private[spark] object KubernetesSparkRestServer { + private val barrier = new CountDownLatch(1) + def main(args: Array[String]): Unit = { + val parsedArguments = KubernetesSparkRestServerArguments.fromArgsArray(args) + val secretFile = new File(parsedArguments.secretFile.get) + if (!secretFile.isFile) { + throw new IllegalArgumentException(s"Secret file specified by --secret-file" + + " is not a file, or does not exist.") + } + val secretBytes = Files.toByteArray(secretFile) + val sparkConf = new SparkConf(true) + val server = new KubernetesSparkRestServer( + parsedArguments.host.get, + parsedArguments.port.get, + sparkConf, + secretBytes) + server.start() + ShutdownHookManager.addShutdownHook(() => { + try { + server.stop() + } finally { + barrier.countDown() + } + }) + barrier.await() + } +} + diff --git a/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala b/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala new file mode 100644 index 0000000000000..0d3b97c636ca3 --- /dev/null +++ b/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.scheduler.cluster.kubernetes + +import org.apache.spark.SparkContext +import org.apache.spark.scheduler.{ExternalClusterManager, SchedulerBackend, TaskScheduler, TaskSchedulerImpl} + +private[spark] class KubernetesClusterManager extends ExternalClusterManager { + + override def canCreate(masterURL: String): Boolean = masterURL.startsWith("kubernetes") + + override def createTaskScheduler(sc: SparkContext, masterURL: String): TaskScheduler = { + val scheduler = new TaskSchedulerImpl(sc) + sc.taskScheduler = scheduler + scheduler + } + + override def createSchedulerBackend(sc: SparkContext, masterURL: String, scheduler: TaskScheduler) + : SchedulerBackend = { + new KubernetesClusterSchedulerBackend(sc.taskScheduler.asInstanceOf[TaskSchedulerImpl], sc) + } + + override def initialize(scheduler: TaskScheduler, backend: SchedulerBackend): Unit = { + scheduler.asInstanceOf[TaskSchedulerImpl].initialize(backend) + } + +} + diff --git a/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala new file mode 100644 index 0000000000000..f37b97e4dd0dc --- /dev/null +++ b/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -0,0 +1,264 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.scheduler.cluster.kubernetes + +import java.util.UUID +import java.util.concurrent.Executors +import java.util.concurrent.atomic.{AtomicInteger, AtomicLong} + +import com.google.common.util.concurrent.ThreadFactoryBuilder +import io.fabric8.kubernetes.api.model.{ContainerPort, ContainerPortBuilder, EnvVar, EnvVarBuilder, Pod, QuantityBuilder} +import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer +import scala.concurrent.{ExecutionContext, Future} + +import org.apache.spark.{SparkContext, SparkException} +import org.apache.spark.deploy.kubernetes.KubernetesClientBuilder +import org.apache.spark.rpc.RpcEndpointAddress +import org.apache.spark.scheduler.TaskSchedulerImpl +import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend +import org.apache.spark.util.Utils + +private[spark] class KubernetesClusterSchedulerBackend( + scheduler: TaskSchedulerImpl, + val sc: SparkContext) + extends CoarseGrainedSchedulerBackend(scheduler, sc.env.rpcEnv) { + + import KubernetesClusterSchedulerBackend._ + + private val EXECUTOR_MODIFICATION_LOCK = new Object + private val runningExecutorPods = new scala.collection.mutable.HashMap[String, Pod] + + private val kubernetesMaster = conf + .getOption("spark.kubernetes.master") + .getOrElse( + throw new SparkException("Kubernetes master must be specified in kubernetes mode.")) + + private val executorDockerImage = conf + .get("spark.kubernetes.executor.docker.image", s"spark-executor:${sc.version}") + + private val kubernetesNamespace = conf + .getOption("spark.kubernetes.namespace") + .getOrElse( + throw new SparkException("Kubernetes namespace must be specified in kubernetes mode.")) + + private val executorPort = conf.getInt("spark.executor.port", DEFAULT_STATIC_PORT) + + private val blockmanagerPort = conf + .getInt("spark.blockmanager.port", DEFAULT_BLOCKMANAGER_PORT) + + private val kubernetesDriverServiceName = conf + .getOption("spark.kubernetes.driver.service.name") + .getOrElse( + throw new SparkException("Must specify the service name the driver is running with")) + + private val executorMemory = conf.getOption("spark.executor.memory").getOrElse("1g") + private val executorMemoryBytes = Utils.byteStringAsBytes(executorMemory) + + private val memoryOverheadBytes = conf + .getOption("spark.kubernetes.executor.memoryOverhead") + .map(overhead => Utils.byteStringAsBytes(overhead)) + .getOrElse(math.max((MEMORY_OVERHEAD_FACTOR * executorMemoryBytes).toInt, + MEMORY_OVERHEAD_MIN)) + private val executorMemoryWithOverhead = executorMemoryBytes + memoryOverheadBytes + + private val executorCores = conf.getOption("spark.executor.cores").getOrElse("1") + + private implicit val requestExecutorContext = ExecutionContext.fromExecutorService( + Executors.newCachedThreadPool( + new ThreadFactoryBuilder() + .setDaemon(true) + .setNameFormat("kubernetes-executor-requests-%d") + .build)) + + private val kubernetesClient = KubernetesClientBuilder + .buildFromWithinPod(kubernetesMaster, kubernetesNamespace) + + override val minRegisteredRatio = + if (conf.getOption("spark.scheduler.minRegisteredResourcesRatio").isEmpty) { + 0.8 + } else { + super.minRegisteredRatio + } + + protected var totalExpectedExecutors = new AtomicInteger(0) + + private val driverUrl = RpcEndpointAddress( + System.getenv(s"${convertToEnvMode(kubernetesDriverServiceName)}_SERVICE_HOST"), + sc.getConf.getInt("spark.driver.port", DEFAULT_DRIVER_PORT), + CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString + + private def convertToEnvMode(value: String): String = + value.toUpperCase.map { c => if (c == '-') '_' else c } + + private val initialExecutors = getInitialTargetExecutorNumber(1) + + private def getInitialTargetExecutorNumber(defaultNumExecutors: Int = 1): Int = { + if (Utils.isDynamicAllocationEnabled(conf)) { + val minNumExecutors = conf.getInt("spark.dynamicAllocation.minExecutors", 0) + val initialNumExecutors = Utils.getDynamicAllocationInitialExecutors(conf) + val maxNumExecutors = conf.getInt("spark.dynamicAllocation.maxExecutors", 1) + require(initialNumExecutors >= minNumExecutors && initialNumExecutors <= maxNumExecutors, + s"initial executor number $initialNumExecutors must between min executor number " + + s"$minNumExecutors and max executor number $maxNumExecutors") + + initialNumExecutors + } else { + conf.getInt("spark.executor.instances", defaultNumExecutors) + } + } + + override def sufficientResourcesRegistered(): Boolean = { + totalRegisteredExecutors.get() >= initialExecutors * minRegisteredRatio + } + + override def start(): Unit = { + super.start() + if (!Utils.isDynamicAllocationEnabled(sc.conf)) { + doRequestTotalExecutors(initialExecutors) + } + } + + override def stop(): Unit = { + // TODO investigate why Utils.tryLogNonFatalError() doesn't work in this context. + // When using Utils.tryLogNonFatalError some of the code fails but without any logs or + // indication as to why. + try { + runningExecutorPods.values.foreach(kubernetesClient.pods().delete(_)) + } catch { + case e: Throwable => logError("Uncaught exception while shutting down controllers.", e) + } + try { + kubernetesClient.services().withName(kubernetesDriverServiceName).delete() + } catch { + case e: Throwable => logError("Uncaught exception while shutting down driver service.", e) + } + try { + kubernetesClient.close() + } catch { + case e: Throwable => logError("Uncaught exception closing Kubernetes client.", e) + } + super.stop() + } + + private def allocateNewExecutorPod(): (String, Pod) = { + val executorKubernetesId = UUID.randomUUID().toString.replaceAll("-", "") + val executorId = EXECUTOR_ID_COUNTER.incrementAndGet().toString + val name = s"exec$executorKubernetesId" + val selectors = Map(SPARK_EXECUTOR_SELECTOR -> executorId, + SPARK_APP_SELECTOR -> applicationId()).asJava + val executorMemoryQuantity = new QuantityBuilder(false) + .withAmount(executorMemoryBytes.toString) + .build() + val executorMemoryLimitQuantity = new QuantityBuilder(false) + .withAmount(executorMemoryWithOverhead.toString) + .build() + val requiredEnv = new ArrayBuffer[EnvVar] + requiredEnv += new EnvVarBuilder() + .withName("SPARK_EXECUTOR_PORT") + .withValue(executorPort.toString) + .build() + requiredEnv += new EnvVarBuilder() + .withName("SPARK_DRIVER_URL") + .withValue(driverUrl) + .build() + requiredEnv += new EnvVarBuilder() + .withName("SPARK_EXECUTOR_CORES") + .withValue(executorCores) + .build() + requiredEnv += new EnvVarBuilder() + .withName("SPARK_EXECUTOR_MEMORY") + .withValue(executorMemory) + .build() + requiredEnv += new EnvVarBuilder() + .withName("SPARK_APPLICATION_ID") + .withValue(applicationId()) + .build() + requiredEnv += new EnvVarBuilder() + .withName("SPARK_EXECUTOR_ID") + .withValue(executorId) + .build() + val requiredPorts = new ArrayBuffer[ContainerPort] + requiredPorts += new ContainerPortBuilder() + .withName(EXECUTOR_PORT_NAME) + .withContainerPort(executorPort) + .build() + requiredPorts += new ContainerPortBuilder() + .withName(BLOCK_MANAGER_PORT_NAME) + .withContainerPort(blockmanagerPort) + .build() + (executorKubernetesId, kubernetesClient.pods().createNew() + .withNewMetadata() + .withName(name) + .withLabels(selectors) + .endMetadata() + .withNewSpec() + .addNewContainer() + .withName(s"exec-${applicationId()}-container") + .withImage(executorDockerImage) + .withImagePullPolicy("IfNotPresent") + .withNewResources() + .addToRequests("memory", executorMemoryQuantity) + .addToLimits("memory", executorMemoryLimitQuantity) + .endResources() + .withEnv(requiredEnv.asJava) + .withPorts(requiredPorts.asJava) + .endContainer() + .endSpec() + .done()) + } + + override def doRequestTotalExecutors(requestedTotal: Int): Future[Boolean] = Future[Boolean] { + EXECUTOR_MODIFICATION_LOCK.synchronized { + if (requestedTotal > totalExpectedExecutors.get) { + logInfo(s"Requesting ${requestedTotal - totalExpectedExecutors.get}" + + s" additional executors, expecting total $requestedTotal and currently" + + s" expected ${totalExpectedExecutors.get}") + for (i <- 0 until (requestedTotal - totalExpectedExecutors.get)) { + runningExecutorPods += allocateNewExecutorPod() + } + } + totalExpectedExecutors.set(requestedTotal) + } + true + } + + override def doKillExecutors(executorIds: Seq[String]): Future[Boolean] = Future[Boolean] { + EXECUTOR_MODIFICATION_LOCK.synchronized { + for (executor <- executorIds) { + runningExecutorPods.remove(executor) match { + case Some(pod) => kubernetesClient.pods().delete(pod) + case None => logWarning(s"Unable to remove pod for unknown executor $executor") + } + } + } + true + } +} + +private object KubernetesClusterSchedulerBackend { + private val SPARK_EXECUTOR_SELECTOR = "spark-exec" + private val SPARK_APP_SELECTOR = "spark-app" + private val DEFAULT_STATIC_PORT = 10000 + private val DEFAULT_BLOCKMANAGER_PORT = 7079 + private val DEFAULT_DRIVER_PORT = 7078 + private val BLOCK_MANAGER_PORT_NAME = "blockmanager" + private val EXECUTOR_PORT_NAME = "executor" + private val MEMORY_OVERHEAD_FACTOR = 0.10 + private val MEMORY_OVERHEAD_MIN = 384L + private val EXECUTOR_ID_COUNTER = new AtomicLong(0L) +} diff --git a/kubernetes/docker-minimal-bundle/pom.xml b/kubernetes/docker-minimal-bundle/pom.xml new file mode 100644 index 0000000000000..3de939ea3978a --- /dev/null +++ b/kubernetes/docker-minimal-bundle/pom.xml @@ -0,0 +1,137 @@ + + + + + 4.0.0 + + org.apache.spark + spark-parent_2.11 + 2.1.0-SNAPSHOT + ../pom.xml + + + spark-docker-minimal-bundle_2.11 + Spark Project Docker Minimal Bundle + http://spark.apache.org/ + pom + + + docker-minimal-bundle + none + pre-integration-test + + + + + org.apache.spark + spark-assembly_${scala.binary.version} + ${project.version} + pom + + + + com.google.guava + guava + ${hadoop.deps.scope} + + + + + org.apache.spark + spark-kubernetes_${scala.binary.version} + ${project.version} + + + + + + org.apache.maven.plugins + maven-assembly-plugin + + + driver-docker-dist + pre-integration-test + + single + + + + src/main/assembly/driver-assembly.xml + + posix + + + + executor-docker-dist + pre-integration-test + + single + + + + src/main/assembly/executor-assembly.xml + + posix + + + + + + + + + + + hive + + + org.apache.spark + spark-hive_${scala.binary.version} + ${project.version} + + + + + hive-thriftserver + + + org.apache.spark + spark-hive-thriftserver_${scala.binary.version} + ${project.version} + + + + + spark-ganglia-lgpl + + + org.apache.spark + spark-ganglia-lgpl_${scala.binary.version} + ${project.version} + + + + + diff --git a/kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml b/kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml new file mode 100644 index 0000000000000..145244f34d1d9 --- /dev/null +++ b/kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml @@ -0,0 +1,84 @@ + + + driver-docker-dist + + tar.gz + dir + + false + + + + ${project.parent.basedir}/core/src/main/resources/org/apache/spark/ui/static/ + + ui-resources/org/apache/spark/ui/static + + **/* + + + + + ${project.parent.basedir}/sbin/ + + sbin + + **/* + + + + + ${project.parent.basedir}/bin/ + + bin + + **/* + + + + + ${project.parent.basedir}/conf/ + + conf + + **/* + + + + + src/main/docker/driver + + + + **/* + + + + + + jars + true + false + runtime + false + + org.apache.spark:spark-assembly_${scala.binary.version}:pom + org.spark-project.spark:unused + + + + diff --git a/kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml b/kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml new file mode 100644 index 0000000000000..d97ba56562a12 --- /dev/null +++ b/kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml @@ -0,0 +1,84 @@ + + + executor-docker-dist + + tar.gz + dir + + false + + + + ${project.parent.basedir}/core/src/main/resources/org/apache/spark/ui/static/ + + ui-resources/org/apache/spark/ui/static + + **/* + + + + + ${project.parent.basedir}/sbin/ + + sbin + + **/* + + + + + ${project.parent.basedir}/bin/ + + bin + + **/* + + + + + ${project.parent.basedir}/conf/ + + conf + + **/* + + + + + src/main/docker/executor + + + + **/* + + + + + + jars + true + false + runtime + false + + org.apache.spark:spark-assembly_${scala.binary.version}:pom + org.spark-project.spark:unused + + + + diff --git a/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile new file mode 100644 index 0000000000000..3bba38d8395ae --- /dev/null +++ b/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -0,0 +1,26 @@ +FROM ubuntu:trusty + +# Upgrade package index +# install a few other useful packages plus Open Jdk 7 +# Remove unneeded /var/lib/apt/lists/* after install to reduce the +# docker image size (by ~30MB) +RUN apt-get update && \ + apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server procps && \ + rm -rf /var/lib/apt/lists/* + +RUN mkdir -p /opt/spark +RUN mkdir -p /opt/spark/ui-resources/org/apache/spark/ui/static +RUN touch /opt/spark/RELEASE + +ADD jars /opt/spark/jars +ADD bin /opt/spark/bin +ADD sbin /opt/spark/sbin +ADD conf /opt/spark/conf + +ENV SPARK_HOME /opt/spark +ENV JAVA_HOME /usr/lib/jvm/java-7-openjdk-amd64/jre + +WORKDIR /opt/spark + +# This class will also require setting a secret via the SPARK_APP_SECRET environment variable +CMD exec bin/spark-class org.apache.spark.deploy.rest.kubernetes.KubernetesSparkRestServer --hostname $HOSTNAME --port $SPARK_DRIVER_LAUNCHER_SERVER_PORT --secret-file $SPARK_SUBMISSION_SECRET_LOCATION diff --git a/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile new file mode 100644 index 0000000000000..f68f1a3fb2694 --- /dev/null +++ b/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -0,0 +1,26 @@ +FROM ubuntu:trusty + +# Upgrade package index +# install a few other useful packages plus Open Jdk 7 +# Remove unneeded /var/lib/apt/lists/* after install to reduce the +# docker image size (by ~30MB) +RUN apt-get update && \ + apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server procps && \ + rm -rf /var/lib/apt/lists/* + +RUN mkdir -p /opt/spark +RUN mkdir -p /opt/spark/ui-resources/org/apache/spark/ui/static +RUN touch /opt/spark/RELEASE + +ADD jars /opt/spark/jars +ADD bin /opt/spark/bin +ADD sbin /opt/spark/sbin +ADD conf /opt/spark/conf + +ENV SPARK_HOME /opt/spark +ENV JAVA_HOME /usr/lib/jvm/java-7-openjdk-amd64/jre + +WORKDIR /opt/spark + +# TODO support spark.executor.extraClassPath +CMD exec ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp ${SPARK_HOME}/jars/\* org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $HOSTNAME diff --git a/kubernetes/integration-tests-spark-jobs/pom.xml b/kubernetes/integration-tests-spark-jobs/pom.xml new file mode 100644 index 0000000000000..17f1c4906214f --- /dev/null +++ b/kubernetes/integration-tests-spark-jobs/pom.xml @@ -0,0 +1,45 @@ + + + + 4.0.0 + + org.apache.spark + spark-parent_2.11 + 2.1.0-SNAPSHOT + ../../pom.xml + + + spark-kubernetes-integration-tests-spark-jobs_2.11 + jar + Spark Project Kubernetes Integration Tests Spark Jobs + + + + org.apache.spark + spark-core_${scala.binary.version} + ${project.version} + provided + + + org.apache.spark + spark-sql_${scala.binary.version} + ${project.version} + provided + + + diff --git a/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala b/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala new file mode 100644 index 0000000000000..6e4660b771305 --- /dev/null +++ b/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest.jobs + +import scala.math.random + +import org.apache.spark.sql.SparkSession + +// Equivalent to SparkPi except does not stop the Spark Context +// at the end and spins forever, so other things can inspect the +// Spark UI immediately after the fact. +private[spark] object SparkPiWithInfiniteWait { + + def main(args: Array[String]): Unit = { + val spark = SparkSession + .builder + .appName("Spark Pi") + .getOrCreate() + val slices = if (args.length > 0) args(0).toInt else 10 + val n = math.min(100000L * slices, Int.MaxValue).toInt // avoid overflow + val count = spark.sparkContext.parallelize(1 until n, slices).map { i => + val x = random * 2 - 1 + val y = random * 2 - 1 + if (x*x + y*y < 1) 1 else 0 + }.reduce(_ + _) + // scalastyle:off println + println("Pi is roughly " + 4.0 * count / (n - 1)) + // scalastyle:on println + + // Spin forever to keep the Spark UI active, so other things can inspect the job. + while (true) { + Thread.sleep(600000) + } + } + +} diff --git a/kubernetes/integration-tests/pom.xml b/kubernetes/integration-tests/pom.xml new file mode 100644 index 0000000000000..0568cb1e21826 --- /dev/null +++ b/kubernetes/integration-tests/pom.xml @@ -0,0 +1,206 @@ + + + + 4.0.0 + + org.apache.spark + spark-parent_2.11 + 2.1.0-SNAPSHOT + ../../pom.xml + + + spark-kubernetes-integration-tests_2.11 + jar + Spark Project Kubernetes Integration Tests + + + + org.apache.spark + spark-kubernetes_${scala.binary.version} + ${project.version} + test + + + org.apache.spark + spark-core_${scala.binary.version} + ${project.version} + test-jar + test + + + org.apache.spark + spark-kubernetes-integration-tests-spark-jobs_${scala.binary.version} + ${project.version} + test + + + org.apache.spark + spark-docker-minimal-bundle_${scala.binary.version} + ${project.version} + tar.gz + driver-docker-dist + test + + + * + * + + + + + com.google.guava + guava + test + + 18.0 + + + com.spotify + docker-client + test + + + + com.fasterxml.jackson.jaxrs + jackson-jaxrs-json-provider + + + com.fasterxml.jackson.core + jackson-databind + + + org.glassfish.jersey.core + jersey-client + + + org.glassfish.jersey.core + jersey-common + + + javax.ws.rs + jsr311-api + + + + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + copy-test-spark-jobs + pre-integration-test + + copy + + + + + org.apache.spark + spark-kubernetes-integration-tests-spark-jobs_${scala.binary.version} + ${project.version} + jar + ${project.build.directory}/integration-tests-spark-jobs + + + + + + unpack-docker-driver-bundle + pre-integration-test + + unpack + + + + + org.apache.spark + spark-docker-minimal-bundle_${scala.binary.version} + ${project.version} + driver-docker-dist + tar.gz + true + ${project.build.directory}/docker/driver + + + + + + unpack-docker-executor-bundle + pre-integration-test + + unpack + + + + + org.apache.spark + spark-docker-minimal-bundle_${scala.binary.version} + ${project.version} + executor-docker-dist + tar.gz + true + ${project.build.directory}/docker/executor + + + + + + + + com.googlecode.maven-download-plugin + download-maven-plugin + 1.3.0 + + + download-minikube-linux + pre-integration-test + + wget + + + https://storage.googleapis.com/minikube/releases/v0.12.2/minikube-linux-amd64 + ${project.build.directory}/minikube-bin/linux-amd64 + minikube + + + + download-minikube-darwin + pre-integration-test + + wget + + + https://storage.googleapis.com/minikube/releases/v0.12.2/minikube-darwin-amd64 + ${project.build.directory}/minikube-bin/darwin-amd64 + minikube + + + + + + + + + diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala new file mode 100644 index 0000000000000..d79c75e484af5 --- /dev/null +++ b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest + +import java.nio.file.Paths +import java.util.UUID + +import com.google.common.collect.ImmutableList +import io.fabric8.kubernetes.client.{Config, KubernetesClient} +import org.scalatest.BeforeAndAfter +import org.scalatest.concurrent.{Eventually, PatienceConfiguration} +import org.scalatest.time.{Minutes, Seconds, Span} +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.SparkSubmit +import org.apache.spark.deploy.kubernetes.Client +import org.apache.spark.deploy.kubernetes.integrationtest.docker.SparkDockerImageBuilder +import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube +import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 +import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} + +private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { + + private val EXAMPLES_JAR = Paths.get("target", "integration-tests-spark-jobs") + .toFile + .listFiles()(0) + .getAbsolutePath + + private val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) + private val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) + private val MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + + ".integrationtest.jobs.SparkPiWithInfiniteWait" + private val NAMESPACE = UUID.randomUUID().toString.replaceAll("-", "") + private var minikubeKubernetesClient: KubernetesClient = _ + private var clientConfig: Config = _ + + override def beforeAll(): Unit = { + Minikube.startMinikube() + new SparkDockerImageBuilder(Minikube.getDockerEnv).buildSparkDockerImages() + Minikube.getKubernetesClient.namespaces.createNew() + .withNewMetadata() + .withName(NAMESPACE) + .endMetadata() + .done() + minikubeKubernetesClient = Minikube.getKubernetesClient.inNamespace(NAMESPACE) + clientConfig = minikubeKubernetesClient.getConfiguration + } + + before { + Eventually.eventually(TIMEOUT, INTERVAL) { + assert(minikubeKubernetesClient.pods().list().getItems.isEmpty) + assert(minikubeKubernetesClient.services().list().getItems.isEmpty) + } + } + + after { + val pods = minikubeKubernetesClient.pods().list().getItems.asScala + pods.par.foreach(pod => { + minikubeKubernetesClient + .pods() + .withName(pod.getMetadata.getName) + .withGracePeriod(60) + .delete + }) + } + + override def afterAll(): Unit = { + if (!System.getProperty("spark.docker.test.persistMinikube", "false").toBoolean) { + Minikube.deleteMinikube() + } + } + + private def expectationsForStaticAllocation(sparkMetricsService: SparkRestApiV1): Unit = { + val apps = Eventually.eventually(TIMEOUT, INTERVAL) { + val result = sparkMetricsService + .getApplications(ImmutableList.of(ApplicationStatus.RUNNING, ApplicationStatus.COMPLETED)) + assert(result.size == 1 + && !result.head.id.equalsIgnoreCase("appid") + && !result.head.id.equalsIgnoreCase("{appId}")) + result + } + Eventually.eventually(TIMEOUT, INTERVAL) { + val result = sparkMetricsService.getExecutors(apps.head.id) + assert(result.size == 2) + assert(result.count(exec => exec.id != "driver") == 1) + result + } + Eventually.eventually(TIMEOUT, INTERVAL) { + val result = sparkMetricsService.getStages( + apps.head.id, Seq(StageStatus.COMPLETE).asJava) + assert(result.size == 1) + result + } + } + + test("Run a simple example") { + val sparkConf = new SparkConf(true) + .setMaster("kubernetes") + .set("spark.kubernetes.master", s"https://${Minikube.getMinikubeIp}:8443") + .set("spark.kubernetes.submit.caCertFile", clientConfig.getCaCertFile) + .set("spark.kubernetes.submit.clientKeyFile", clientConfig.getClientKeyFile) + .set("spark.kubernetes.submit.clientCertFile", clientConfig.getClientCertFile) + .set("spark.kubernetes.namespace", NAMESPACE) + .set("spark.kubernetes.executor.docker.image", "spark-executor:latest") + .set("spark.executor.memory", "500m") + .set("spark.executor.cores", "1") + .set("spark.executors.instances", "1") + .set("spark.app.id", "spark-pi") + val mainAppResource = s"file://$EXAMPLES_JAR" + + new Client( + sparkConf = sparkConf, + mainClass = MAIN_CLASS, + mainAppResource = mainAppResource, + appArgs = Array.empty[String]).run() + val sparkMetricsService = Minikube.getService[SparkRestApiV1]( + "spark-pi", NAMESPACE, "spark-ui-port") + expectationsForStaticAllocation(sparkMetricsService) + } + + test("Run using spark-submit") { + val args = Array( + "--master", "kubernetes", + "--deploy-mode", "cluster", + "--kubernetes-master", s"https://${Minikube.getMinikubeIp}:8443", + "--kubernetes-namespace", NAMESPACE, + "--name", "spark-pi", + "--executor-memory", "512m", + "--executor-cores", "1", + "--num-executors", "1", + "--class", MAIN_CLASS, + "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", + "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", + "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", + "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", + EXAMPLES_JAR) + SparkSubmit.main(args) + val sparkMetricsService = Minikube.getService[SparkRestApiV1]( + "spark-pi", NAMESPACE, "spark-ui-port") + expectationsForStaticAllocation(sparkMetricsService) + } +} diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala new file mode 100644 index 0000000000000..22d78142508c1 --- /dev/null +++ b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest.docker + +import java.net.URI +import java.nio.file.Paths + +import com.fasterxml.jackson.jaxrs.json.JacksonJaxbJsonProvider +import com.spotify.docker.client.{DefaultDockerClient, DockerCertificates} +import org.apache.http.client.utils.URIBuilder +import org.scalatest.concurrent.{Eventually, PatienceConfiguration} +import org.scalatest.time.{Minutes, Seconds, Span} + +private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, String]) { + + private val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) + private val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) + private val dockerHost = dockerEnv.getOrElse("DOCKER_HOST", + throw new IllegalStateException("DOCKER_HOST env not found.")) + + private val originalDockerUri = URI.create(dockerHost) + private val httpsDockerUri = new URIBuilder() + .setHost(originalDockerUri.getHost) + .setPort(originalDockerUri.getPort) + .setScheme("https") + .build() + + private val dockerCerts = dockerEnv.getOrElse("DOCKER_CERT_PATH", + throw new IllegalStateException("DOCKER_CERT_PATH env not found.")) + + private val dockerClient = new DefaultDockerClient.Builder() + .uri(httpsDockerUri) + .dockerCertificates(DockerCertificates + .builder() + .dockerCertPath(Paths.get(dockerCerts)) + .build().get()) + .build() + + def buildSparkDockerImages(): Unit = { + Eventually.eventually(TIMEOUT, INTERVAL) { dockerClient.ping() } + dockerClient.build(Paths.get("target", "docker", "driver"), "spark-driver") + dockerClient.build(Paths.get("target", "docker", "executor"), "spark-executor") + } + +} \ No newline at end of file diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala new file mode 100644 index 0000000000000..92b809a4c7c59 --- /dev/null +++ b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest.minikube + +import java.io.{BufferedReader, InputStreamReader} +import java.nio.file.Paths +import java.util.concurrent.TimeUnit +import javax.net.ssl.X509TrustManager + +import io.fabric8.kubernetes.client.internal.SSLUtils +import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient} +import scala.collection.mutable.ArrayBuffer +import scala.reflect.ClassTag + +import org.apache.spark.deploy.rest.kubernetes.HttpClientUtil +import org.apache.spark.internal.Logging +import org.apache.spark.util.Utils + +// TODO support windows +private[spark] object Minikube extends Logging { + private val MINIKUBE_EXECUTABLE_DEST = if (Utils.isMac) { + Paths.get("target", "minikube-bin", "darwin-amd64", "minikube").toFile + } else if (Utils.isWindows) { + throw new IllegalStateException("Executing Minikube based integration tests not yet " + + " available on Windows.") + } else { + Paths.get("target", "minikube-bin", "linux-amd64", "minikube").toFile + } + + private val EXPECTED_DOWNLOADED_MINIKUBE_MESSAGE = "Minikube is not downloaded, expected at " + + s"${MINIKUBE_EXECUTABLE_DEST.getAbsolutePath}" + + private val MINIKUBE_STARTUP_TIMEOUT_SECONDS = 60 + + def startMinikube(): Unit = synchronized { + assert(MINIKUBE_EXECUTABLE_DEST.exists(), EXPECTED_DOWNLOADED_MINIKUBE_MESSAGE) + if (getMinikubeStatus != MinikubeStatus.RUNNING) { + executeMinikube("start", "--memory", "6000", "--cpus", "8") + } else { + logInfo("Minikube is already started.") + } + } + + def getMinikubeIp: String = synchronized { + assert(MINIKUBE_EXECUTABLE_DEST.exists(), EXPECTED_DOWNLOADED_MINIKUBE_MESSAGE) + val outputs = executeMinikube("ip") + assert(outputs.size == 1, "Unexpected amount of output from minikube ip") + outputs.head + } + + def getMinikubeStatus: MinikubeStatus.Value = synchronized { + assert(MINIKUBE_EXECUTABLE_DEST.exists(), EXPECTED_DOWNLOADED_MINIKUBE_MESSAGE) + val statusString = executeMinikube("status").head.replaceFirst("minikubeVM: ", "") + MinikubeStatus.unapply(statusString) + .getOrElse(throw new IllegalStateException(s"Unknown status $statusString")) + } + + def getDockerEnv: Map[String, String] = synchronized { + assert(MINIKUBE_EXECUTABLE_DEST.exists(), EXPECTED_DOWNLOADED_MINIKUBE_MESSAGE) + executeMinikube("docker-env") + .filter(_.startsWith("export")) + .map(_.replaceFirst("export ", "").split('=')) + .map(arr => (arr(0), arr(1).replaceAllLiterally("\"", ""))) + .toMap + } + + def deleteMinikube(): Unit = synchronized { + assert(MINIKUBE_EXECUTABLE_DEST.exists, EXPECTED_DOWNLOADED_MINIKUBE_MESSAGE) + if (getMinikubeStatus != MinikubeStatus.DOES_NOT_EXIST) { + executeMinikube("delete") + } else { + logInfo("Minikube was already not running.") + } + } + + def getKubernetesClient: DefaultKubernetesClient = synchronized { + val kubernetesMaster = s"https://$getMinikubeIp:8443" + val userHome = System.getProperty("user.home") + val kubernetesConf = new ConfigBuilder() + .withApiVersion("v1") + .withMasterUrl(kubernetesMaster) + .withCaCertFile(Paths.get(userHome, ".minikube", "ca.crt").toFile.getAbsolutePath) + .withClientCertFile(Paths.get(userHome, ".minikube", "apiserver.crt").toFile.getAbsolutePath) + .withClientKeyFile(Paths.get(userHome, ".minikube", "apiserver.key").toFile.getAbsolutePath) + .build() + new DefaultKubernetesClient(kubernetesConf) + } + + def getService[T: ClassTag]( + serviceName: String, + namespace: String, + servicePortName: String, + servicePath: String = ""): T = synchronized { + val kubernetesMaster = s"https://$getMinikubeIp:8443" + val url = s"${ + Array[String]( + kubernetesMaster, + "api", "v1", "proxy", + "namespaces", namespace, + "services", serviceName).mkString("/")}" + + s":$servicePortName$servicePath" + val userHome = System.getProperty("user.home") + val kubernetesConf = new ConfigBuilder() + .withApiVersion("v1") + .withMasterUrl(kubernetesMaster) + .withCaCertFile(Paths.get(userHome, ".minikube", "ca.crt").toFile.getAbsolutePath) + .withClientCertFile(Paths.get(userHome, ".minikube", "apiserver.crt").toFile.getAbsolutePath) + .withClientKeyFile(Paths.get(userHome, ".minikube", "apiserver.key").toFile.getAbsolutePath) + .build() + val sslContext = SSLUtils.sslContext(kubernetesConf) + val trustManager = SSLUtils.trustManagers(kubernetesConf)(0).asInstanceOf[X509TrustManager] + HttpClientUtil.createClient[T](url, sslContext.getSocketFactory, trustManager) + } + + def executeMinikubeSsh(command: String): Unit = { + executeMinikube("ssh", command) + } + + private def executeMinikube(action: String, args: String*): Seq[String] = { + if (!MINIKUBE_EXECUTABLE_DEST.canExecute) { + if (!MINIKUBE_EXECUTABLE_DEST.setExecutable(true)) { + throw new IllegalStateException("Failed to make the Minikube binary executable.") + } + } + val fullCommand = Array(MINIKUBE_EXECUTABLE_DEST.getAbsolutePath, action) ++ args + val pb = new ProcessBuilder().command(fullCommand: _*) + pb.redirectErrorStream(true) + val proc = pb.start() + val outputLines = new ArrayBuffer[String] + + Utils.tryWithResource(new InputStreamReader(proc.getInputStream)) { procOutput => + Utils.tryWithResource(new BufferedReader(procOutput)) { (bufferedOutput: BufferedReader) => + var line: String = null + do { + line = bufferedOutput.readLine() + if (line != null) { + logInfo(line) + outputLines += line + } + } while (line != null) + } + } + assert(proc.waitFor(MINIKUBE_STARTUP_TIMEOUT_SECONDS, TimeUnit.SECONDS), + s"Timed out while executing $action on minikube.") + assert(proc.exitValue == 0, s"Failed to execute minikube $action ${args.mkString(" ")}") + outputLines.toSeq + } +} + +private[spark] object MinikubeStatus extends Enumeration { + + val RUNNING = status("Running") + val STOPPED = status("Stopped") + val DOES_NOT_EXIST = status("Does Not Exist") + val SAVED = status("Saved") + + def status(value: String): Value = new Val(nextId, value) + def unapply(s: String): Option[Value] = values.find(s == _.toString) +} diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/restapis/SparkRestApiV1.scala b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/restapis/SparkRestApiV1.scala new file mode 100644 index 0000000000000..7a3b06b1b5e58 --- /dev/null +++ b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/restapis/SparkRestApiV1.scala @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest.restapis + +import java.util.{List => JList} +import javax.ws.rs._ +import javax.ws.rs.core.MediaType + +import org.apache.spark.status.api.v1._ + +@Path("/api/v1") +@Consumes(Array(MediaType.APPLICATION_JSON)) +@Produces(Array(MediaType.APPLICATION_JSON)) +trait SparkRestApiV1 { + + @GET + @Path("/applications") + @Consumes(Array(MediaType.APPLICATION_JSON)) + @Produces(Array(MediaType.APPLICATION_JSON)) + def getApplications( + @QueryParam("status") applicationStatuses: JList[ApplicationStatus]): Seq[ApplicationInfo] + + @GET + @Path("applications/{appId}/stages") + @Consumes(Array(MediaType.APPLICATION_JSON)) + @Produces(Array(MediaType.APPLICATION_JSON)) + def getStages( + @PathParam("appId") appId: String, + @QueryParam("status") statuses: JList[StageStatus]): Seq[StageData] + + @GET + @Path("applications/{appId}/executors") + @Consumes(Array(MediaType.APPLICATION_JSON)) + @Produces(Array(MediaType.APPLICATION_JSON)) + def getExecutors(@PathParam("appId") appId: String): Seq[ExecutorSummary] +} diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java index 6767cc5079649..94f9bc319b6a2 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java @@ -76,6 +76,12 @@ class SparkSubmitOptionParser { protected final String PRINCIPAL = "--principal"; protected final String QUEUE = "--queue"; + // Kubernetes-only options. + protected final String KUBERNETES_MASTER = "--kubernetes-master"; + protected final String KUBERNETES_NAMESPACE = "--kubernetes-namespace"; + protected final String KUBERNETES_UPLOAD_JARS = "--upload-jars"; + protected final String KUBERNETES_UPLOAD_DRIVER_EXTRA_CLASSPATH = "--upload-driver-extra-classpath"; + /** * This is the canonical list of spark-submit options. Each entry in the array contains the * different aliases for the same option; the first element of each entry is the "official" @@ -115,6 +121,10 @@ class SparkSubmitOptionParser { { REPOSITORIES }, { STATUS }, { TOTAL_EXECUTOR_CORES }, + { KUBERNETES_MASTER }, + { KUBERNETES_NAMESPACE }, + { KUBERNETES_UPLOAD_JARS }, + { KUBERNETES_UPLOAD_DRIVER_EXTRA_CLASSPATH } }; /** diff --git a/pom.xml b/pom.xml index 49f12703c04df..d923941748ca2 100644 --- a/pom.xml +++ b/pom.xml @@ -136,6 +136,7 @@ 10.12.1.1 1.8.1 1.6.0 + 8.18.0 9.2.16.v20160414 3.1.0 0.8.0 @@ -303,6 +304,33 @@ chill-java ${chill.version} + + + com.netflix.feign + feign-core + ${feign.version} + + + com.netflix.feign + feign-okhttp + ${feign.version} + + + com.netflix.feign + feign-jackson + ${feign.version} + + + com.netflix.feign + feign-jaxrs + ${feign.version} + + + com.squareup.okhttp3 + okhttp + 3.4.1 + + @@ -617,6 +645,11 @@ jackson-module-jaxb-annotations ${fasterxml.jackson.version} + + com.fasterxml.jackson.jaxrs + jackson-jaxrs-json-provider + ${fasterxml.jackson.version} + org.glassfish.jersey.core jersey-server @@ -2592,6 +2625,22 @@ + + kubernetes + + kubernetes/core + + + + + kubernetes-integration-tests + + kubernetes/docker-minimal-bundle + kubernetes/integration-tests + kubernetes/integration-tests-spark-jobs + + + hive-thriftserver From 00e545f6c2df74e53ac3446737d960bc86e54f64 Mon Sep 17 00:00:00 2001 From: mcheah Date: Tue, 6 Dec 2016 14:36:57 -0800 Subject: [PATCH 002/156] Fix style --- .../src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 5 ----- 1 file changed, 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 598bafcab81dc..6d37b093a0b6b 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -854,7 +854,6 @@ private[spark] object SparkSubmitUtils { /** * Represents a Maven Coordinate - * * @param groupId the groupId of the coordinate * @param artifactId the artifactId of the coordinate * @param version the version of the coordinate @@ -866,7 +865,6 @@ private[spark] object SparkSubmitUtils { /** * Extracts maven coordinates from a comma-delimited string. Coordinates should be provided * in the format `groupId:artifactId:version` or `groupId/artifactId:version`. - * * @param coordinates Comma-delimited string of maven coordinates * @return Sequence of Maven coordinates */ @@ -897,7 +895,6 @@ private[spark] object SparkSubmitUtils { /** * Extracts maven coordinates from a comma-delimited string - * * @param remoteRepos Comma-delimited string of remote repositories * @param ivySettings The Ivy settings for this session * @return A ChainResolver used by Ivy to search for and resolve dependencies. @@ -962,7 +959,6 @@ private[spark] object SparkSubmitUtils { /** * Output a comma-delimited list of paths for the downloaded jars to be added to the classpath * (will append to jars in SparkSubmit). - * * @param artifacts Sequence of dependencies that were resolved and retrieved * @param cacheDirectory directory where jars are cached * @return a comma-delimited list of paths for the dependencies @@ -1019,7 +1015,6 @@ private[spark] object SparkSubmitUtils { /** * Resolves any dependencies that were supplied through maven coordinates - * * @param coordinates Comma-delimited string of maven coordinates * @param remoteRepos Comma-delimited string of remote repositories other than maven central * @param ivyPath The path to the local ivy repository From cdbd9bb9f96a86470589e137b58a42217837b869 Mon Sep 17 00:00:00 2001 From: mcheah Date: Tue, 6 Dec 2016 17:23:24 -0800 Subject: [PATCH 003/156] Make naming more consistent --- dev/scalastyle | 2 ++ .../org/apache/spark/deploy/kubernetes/Client.scala | 13 ++++++------- .../integrationtest/KubernetesSuite.scala | 2 ++ 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/dev/scalastyle b/dev/scalastyle index f3dec833636c6..de7423913fad9 100755 --- a/dev/scalastyle +++ b/dev/scalastyle @@ -26,6 +26,8 @@ ERRORS=$(echo -e "q\n" \ -Pyarn \ -Phive \ -Phive-thriftserver \ + -Pkubernetes \ + -Pkubernetes-integration-tests \ scalastyle test:scalastyle \ | awk '{if($1~/error/)print}' \ ) diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 4ee00e8802080..f402b6df82fc4 100644 --- a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -34,7 +34,7 @@ import scala.concurrent.ExecutionContext import scala.concurrent.duration.DurationInt import scala.util.Success -import org.apache.spark.SparkConf +import org.apache.spark.{SPARK_VERSION, SparkConf} import org.apache.spark.deploy.rest.{AppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, UploadedAppResource} import org.apache.spark.deploy.rest.kubernetes._ import org.apache.spark.internal.Logging @@ -60,9 +60,8 @@ private[spark] class Client( private val secretName = s"spark-submission-server-secret-$kubernetesAppId" private val driverLauncherSelectorValue = s"driver-launcher-$launchTime" - // TODO set precise version by default private val driverDockerImage = sparkConf.get( - "spark.kubernetes.driver.docker.image", "spark-driver:latest") + "spark.kubernetes.driver.docker.image", s"spark-driver:$SPARK_VERSION") private val uploadedDriverExtraClasspath = sparkConf .getOption("spark.kubernetes.driver.uploads.driverExtraClasspath") private val uploadedJars = sparkConf.getOption("spark.kubernetes.driver.uploads.jars") @@ -291,14 +290,14 @@ private[spark] class Client( def getFileContents(maybeFilePaths: Option[String]): Array[(String, String)] = { maybeFilePaths .map(_.split(",").map(filePath => { - val driverExtraClasspathFile = new File(filePath) - if (!driverExtraClasspathFile.isFile) { + val fileToUpload = new File(filePath) + if (!fileToUpload.isFile) { throw new IllegalStateException("Provided file to upload for driver extra classpath" + s" does not exist or is not a file: $filePath") } else { - val fileBytes = Files.toByteArray(driverExtraClasspathFile) + val fileBytes = Files.toByteArray(fileToUpload) val fileBase64 = Base64.encodeBase64String(fileBytes) - (driverExtraClasspathFile.getName, fileBase64) + (fileToUpload.getName, fileBase64) } })).getOrElse(Array.empty[(String, String)]) } diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index d79c75e484af5..3f3d2e609ea4d 100644 --- a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -116,6 +116,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .set("spark.kubernetes.submit.clientKeyFile", clientConfig.getClientKeyFile) .set("spark.kubernetes.submit.clientCertFile", clientConfig.getClientCertFile) .set("spark.kubernetes.namespace", NAMESPACE) + .set("spark.kubernetes.driver.docker.image", "spark-driver:latest") .set("spark.kubernetes.executor.docker.image", "spark-executor:latest") .set("spark.executor.memory", "500m") .set("spark.executor.cores", "1") @@ -148,6 +149,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", + "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", EXAMPLES_JAR) SparkSubmit.main(args) val sparkMetricsService = Minikube.getService[SparkRestApiV1]( From 8f69fc0bda56f898ebf97d32d49e7c3ec2e34d04 Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 9 Dec 2016 14:55:13 -0800 Subject: [PATCH 004/156] Fix building assembly with Kubernetes. --- assembly/pom.xml | 10 ++++++++++ .../kubernetes/integrationtest/minikube/Minikube.scala | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index aebfd12227751..1819fe404a1d9 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -148,6 +148,16 @@ + + kubernetes + + + org.apache.spark + spark-kubernetes_${scala.binary.version} + ${project.version} + + + hive diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala index 92b809a4c7c59..60c6564579a6e 100644 --- a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala +++ b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala @@ -21,8 +21,8 @@ import java.nio.file.Paths import java.util.concurrent.TimeUnit import javax.net.ssl.X509TrustManager -import io.fabric8.kubernetes.client.internal.SSLUtils import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient} +import io.fabric8.kubernetes.client.internal.SSLUtils import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag From 75c6086aa52f0a98345b4fa63fde654740ddd28b Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 9 Dec 2016 16:18:11 -0800 Subject: [PATCH 005/156] Service account support, use constants from fabric8 library. --- .../scala/org/apache/spark/deploy/kubernetes/Client.scala | 7 +++++-- .../spark/deploy/kubernetes/KubernetesClientBuilder.scala | 6 +++--- .../kubernetes/integrationtest/KubernetesSuite.scala | 6 +++--- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index f402b6df82fc4..cea90a51386b5 100644 --- a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -55,8 +55,8 @@ private[spark] class Client( private val launchTime = System.currentTimeMillis private val kubernetesAppId = sparkConf.getOption("spark.app.name") - .orElse(sparkConf.getOption("spark.app.id")) - .getOrElse(s"spark-$launchTime") + .orElse(sparkConf.getOption("spark.app.id")) + .getOrElse(s"spark-$launchTime") private val secretName = s"spark-submission-server-secret-$kubernetesAppId" private val driverLauncherSelectorValue = s"driver-launcher-$launchTime" @@ -69,6 +69,8 @@ private[spark] class Client( private val secretBytes = new Array[Byte](128) SECURE_RANDOM.nextBytes(secretBytes) private val secretBase64String = Base64.encodeBase64String(secretBytes) + private val serviceAccount = sparkConf.get("spark.kubernetes.submit.serviceAccountName", + "default") private implicit val retryableExecutionContext = ExecutionContext .fromExecutorService( @@ -191,6 +193,7 @@ private[spark] class Client( .withSecretName(secret.getMetadata.getName) .endSecret() .endVolume + .withServiceAccount(serviceAccount) .addNewContainer() .withName(DRIVER_LAUNCHER_CONTAINER_NAME) .withImage(driverDockerImage) diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala index 4c715c86cc7f9..61a13dc7274d7 100644 --- a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala @@ -20,11 +20,11 @@ import java.io.File import com.google.common.base.Charsets import com.google.common.io.Files -import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient} +import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient} private[spark] object KubernetesClientBuilder { - private val API_SERVER_TOKEN = new File("/var/run/secrets/kubernetes.io/serviceaccount/token") - private val CA_CERT_FILE = new File("/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") + private val API_SERVER_TOKEN = new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH) + private val CA_CERT_FILE = new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH) /** * Creates a {@link KubernetesClient}, expecting to be from diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 3f3d2e609ea4d..902631b874539 100644 --- a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -37,9 +37,9 @@ import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { private val EXAMPLES_JAR = Paths.get("target", "integration-tests-spark-jobs") - .toFile - .listFiles()(0) - .getAbsolutePath + .toFile + .listFiles()(0) + .getAbsolutePath private val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) private val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) From 93b75cea65e8c5d98e4d9d78a541659d927fceea Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 6 Jan 2017 16:15:35 -0800 Subject: [PATCH 006/156] Some small changes - Don't hold the raw secret bytes - Add CPU limits and requests --- .../spark/deploy/kubernetes/Client.scala | 39 +++++++++++-------- .../KubernetesClusterSchedulerBackend.scala | 9 ++++- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index cea90a51386b5..21c83dbf40e21 100644 --- a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -66,9 +66,12 @@ private[spark] class Client( .getOption("spark.kubernetes.driver.uploads.driverExtraClasspath") private val uploadedJars = sparkConf.getOption("spark.kubernetes.driver.uploads.jars") - private val secretBytes = new Array[Byte](128) - SECURE_RANDOM.nextBytes(secretBytes) - private val secretBase64String = Base64.encodeBase64String(secretBytes) + private val secretBase64String = { + val secretBytes = new Array[Byte](128) + SECURE_RANDOM.nextBytes(secretBytes) + Base64.encodeBase64String(secretBytes) + } + private val serviceAccount = sparkConf.get("spark.kubernetes.submit.serviceAccountName", "default") @@ -105,11 +108,7 @@ private[spark] class Client( .done() try { val selectors = Map(DRIVER_LAUNCHER_SELECTOR_LABEL -> driverLauncherSelectorValue).asJava - val uiPort = sparkConf - .getOption("spark.ui.port") - .map(_.toInt) - .getOrElse(DEFAULT_UI_PORT) - val (servicePorts, containerPorts) = configurePorts(uiPort) + val (servicePorts, containerPorts) = configurePorts() val service = kubernetesClient.services().createNew() .withNewMetadata() .withName(kubernetesAppId) @@ -120,11 +119,11 @@ private[spark] class Client( .endSpec() .done() sparkConf.set("spark.kubernetes.driver.service.name", service.getMetadata.getName) - sparkConf.setIfMissing("spark.driver.port", DRIVER_PORT.toString) - sparkConf.setIfMissing("spark.blockmanager.port", BLOCKMANAGER_PORT.toString) + sparkConf.setIfMissing("spark.driver.port", DEFAULT_DRIVER_PORT.toString) + sparkConf.setIfMissing("spark.blockmanager.port", DEFAULT_BLOCKMANAGER_PORT.toString) val submitRequest = buildSubmissionRequest() val submitCompletedFuture = SettableFuture.create[Boolean] - val secretDirectory = s"/var/run/secrets/spark-submission/$kubernetesAppId" + val secretDirectory = s"$SPARK_SUBMISSION_SECRET_BASE_DIR/$kubernetesAppId" val podWatcher = new Watcher[Pod] { override def eventReceived(action: Action, t: Pod): Unit = { @@ -228,7 +227,7 @@ private[spark] class Client( }) } - private def configurePorts(uiPort: Int): (Seq[ServicePort], Seq[ContainerPort]) = { + private def configurePorts(): (Seq[ServicePort], Seq[ContainerPort]) = { val servicePorts = new ArrayBuffer[ServicePort] val containerPorts = new ArrayBuffer[ContainerPort] @@ -251,15 +250,20 @@ private[spark] class Client( sparkConf .getOption("spark.driver.port") .map(_.toInt) - .getOrElse(DRIVER_PORT)) + .getOrElse(DEFAULT_DRIVER_PORT)) addPortToServiceAndContainer( BLOCKMANAGER_PORT_NAME, sparkConf .getOption("spark.blockmanager.port") .map(_.toInt) - .getOrElse(BLOCKMANAGER_PORT)) + .getOrElse(DEFAULT_BLOCKMANAGER_PORT)) - addPortToServiceAndContainer(UI_PORT_NAME, uiPort) + addPortToServiceAndContainer( + UI_PORT_NAME, + sparkConf + .getOption("spark.ui.port") + .map(_.toInt) + .getOrElse(DEFAULT_UI_PORT)) (servicePorts.toSeq, containerPorts.toSeq) } @@ -331,8 +335,8 @@ private object Client { private val SUBMISSION_SERVER_SECRET_NAME = "spark-submission-server-secret" private val DRIVER_LAUNCHER_SELECTOR_LABEL = "driver-launcher-selector" private val DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT = 7077 - private val DRIVER_PORT = 7078 - private val BLOCKMANAGER_PORT = 7079 + private val DEFAULT_DRIVER_PORT = 7078 + private val DEFAULT_BLOCKMANAGER_PORT = 7079 private val DEFAULT_UI_PORT = 4040 private val UI_PORT_NAME = "spark-ui-port" private val DRIVER_LAUNCHER_SERVICE_PORT_NAME = "driver-launcher-port" @@ -340,6 +344,7 @@ private object Client { private val BLOCKMANAGER_PORT_NAME = "block-manager-port" private val DRIVER_LAUNCHER_CONTAINER_NAME = "spark-kubernetes-driver-launcher" private val SECURE_RANDOM = new SecureRandom() + private val SPARK_SUBMISSION_SECRET_BASE_DIR = "/var/run/secrets/spark-submission" def main(args: Array[String]): Unit = { require(args.length >= 2, s"Too few arguments. Usage: ${getClass.getName} " + diff --git a/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index f37b97e4dd0dc..bbc95d4f4b7e3 100644 --- a/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -34,8 +34,8 @@ import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend import org.apache.spark.util.Utils private[spark] class KubernetesClusterSchedulerBackend( - scheduler: TaskSchedulerImpl, - val sc: SparkContext) + scheduler: TaskSchedulerImpl, + val sc: SparkContext) extends CoarseGrainedSchedulerBackend(scheduler, sc.env.rpcEnv) { import KubernetesClusterSchedulerBackend._ @@ -167,6 +167,9 @@ private[spark] class KubernetesClusterSchedulerBackend( val executorMemoryLimitQuantity = new QuantityBuilder(false) .withAmount(executorMemoryWithOverhead.toString) .build() + val executorCpuQuantity = new QuantityBuilder(false) + .withAmount(executorCores) + .build() val requiredEnv = new ArrayBuffer[EnvVar] requiredEnv += new EnvVarBuilder() .withName("SPARK_EXECUTOR_PORT") @@ -214,6 +217,8 @@ private[spark] class KubernetesClusterSchedulerBackend( .withNewResources() .addToRequests("memory", executorMemoryQuantity) .addToLimits("memory", executorMemoryLimitQuantity) + .addToRequests("cpu", executorCpuQuantity) + .addToLimits("cpu", executorCpuQuantity) .endResources() .withEnv(requiredEnv.asJava) .withPorts(requiredPorts.asJava) From e7397e814f98570a9bfefa66c34a1dd67f53865b Mon Sep 17 00:00:00 2001 From: mcheah Date: Mon, 9 Jan 2017 14:25:49 -0800 Subject: [PATCH 007/156] Use k8s:// formatted URL instead of separate setting. --- .../main/scala/org/apache/spark/deploy/SparkSubmit.scala | 5 +---- .../org/apache/spark/deploy/SparkSubmitArguments.scala | 7 ------- .../scala/org/apache/spark/deploy/kubernetes/Client.scala | 8 +++++--- .../cluster/kubernetes/KubernetesClusterManager.scala | 2 +- .../kubernetes/KubernetesClusterSchedulerBackend.scala | 5 +---- .../kubernetes/integrationtest/KubernetesSuite.scala | 6 ++---- 6 files changed, 10 insertions(+), 23 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 6d37b093a0b6b..bd249ea377b65 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -240,7 +240,7 @@ object SparkSubmit { YARN case m if m.startsWith("spark") => STANDALONE case m if m.startsWith("mesos") => MESOS - case m if m.startsWith("kubernetes") => KUBERNETES + case m if m.startsWith("k8s") => KUBERNETES case m if m.startsWith("local") => LOCAL case _ => printErrorAndExit("Master must either be yarn or start with spark, mesos, local") @@ -470,9 +470,6 @@ object SparkSubmit { OptionAssigner(args.principal, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.principal"), OptionAssigner(args.keytab, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.keytab"), - // Kubernetes only - OptionAssigner(args.kubernetesMaster, KUBERNETES, ALL_DEPLOY_MODES, - sysProp = "spark.kubernetes.master"), OptionAssigner(args.kubernetesNamespace, KUBERNETES, ALL_DEPLOY_MODES, sysProp = "spark.kubernetes.namespace"), OptionAssigner(args.kubernetesUploadJars, KUBERNETES, CLUSTER, diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 4244742aad14c..d80f79332111f 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -72,7 +72,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S var keytab: String = null // Kubernetes only - var kubernetesMaster: String = null var kubernetesNamespace: String = null var kubernetesUploadJars: String = null var kubernetesUploadDriverExtraClasspath: String = null @@ -192,9 +191,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S .getOrElse(sparkProperties.get("spark.executor.instances").orNull) keytab = Option(keytab).orElse(sparkProperties.get("spark.yarn.keytab")).orNull principal = Option(principal).orElse(sparkProperties.get("spark.yarn.principal")).orNull - kubernetesMaster = Option(kubernetesMaster) - .orElse(sparkProperties.get("spark.kubernetes.master")) - .orNull kubernetesNamespace = Option(kubernetesNamespace) .orElse(sparkProperties.get("spark.kubernetes.namespace")) .orNull @@ -444,9 +440,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S case KEYTAB => keytab = value - case KUBERNETES_MASTER => - kubernetesMaster = value - case KUBERNETES_NAMESPACE => kubernetesNamespace = value diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 21c83dbf40e21..0715c84495a2c 100644 --- a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -49,9 +49,11 @@ private[spark] class Client( private val namespace = sparkConf.getOption("spark.kubernetes.namespace").getOrElse( throw new IllegalArgumentException("Namespace must be provided in spark.kubernetes.namespace")) - private val master = sparkConf - .getOption("spark.kubernetes.master") - .getOrElse("Master must be provided in spark.kubernetes.master") + private val rawMaster = sparkConf.get("spark.master") + if (!rawMaster.startsWith("k8s://")) { + throw new IllegalArgumentException("Master should be a URL with scheme k8s://") + } + private val master = rawMaster.replaceFirst("k8s://", "") private val launchTime = System.currentTimeMillis private val kubernetesAppId = sparkConf.getOption("spark.app.name") diff --git a/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala b/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala index 0d3b97c636ca3..36f7149a832c3 100644 --- a/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala +++ b/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala @@ -21,7 +21,7 @@ import org.apache.spark.scheduler.{ExternalClusterManager, SchedulerBackend, Tas private[spark] class KubernetesClusterManager extends ExternalClusterManager { - override def canCreate(masterURL: String): Boolean = masterURL.startsWith("kubernetes") + override def canCreate(masterURL: String): Boolean = masterURL.startsWith("k8s") override def createTaskScheduler(sc: SparkContext, masterURL: String): TaskScheduler = { val scheduler = new TaskSchedulerImpl(sc) diff --git a/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index bbc95d4f4b7e3..4e099cea3198b 100644 --- a/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -43,10 +43,7 @@ private[spark] class KubernetesClusterSchedulerBackend( private val EXECUTOR_MODIFICATION_LOCK = new Object private val runningExecutorPods = new scala.collection.mutable.HashMap[String, Pod] - private val kubernetesMaster = conf - .getOption("spark.kubernetes.master") - .getOrElse( - throw new SparkException("Kubernetes master must be specified in kubernetes mode.")) + private val kubernetesMaster = sc.master.replaceFirst("k8s://", "") private val executorDockerImage = conf .get("spark.kubernetes.executor.docker.image", s"spark-executor:${sc.version}") diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 902631b874539..183f666994d38 100644 --- a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -110,8 +110,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { test("Run a simple example") { val sparkConf = new SparkConf(true) - .setMaster("kubernetes") - .set("spark.kubernetes.master", s"https://${Minikube.getMinikubeIp}:8443") + .setMaster(s"k8s://https://${Minikube.getMinikubeIp}:8443") .set("spark.kubernetes.submit.caCertFile", clientConfig.getCaCertFile) .set("spark.kubernetes.submit.clientKeyFile", clientConfig.getClientKeyFile) .set("spark.kubernetes.submit.clientCertFile", clientConfig.getClientCertFile) @@ -136,9 +135,8 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { test("Run using spark-submit") { val args = Array( - "--master", "kubernetes", + "--master", s"k8s://https://${Minikube.getMinikubeIp}:8443", "--deploy-mode", "cluster", - "--kubernetes-master", s"https://${Minikube.getMinikubeIp}:8443", "--kubernetes-namespace", NAMESPACE, "--name", "spark-pi", "--executor-memory", "512m", From ed65428d64eabc3da71b8ea5f5be6ba5dac913f6 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Mon, 9 Jan 2017 14:33:41 -0800 Subject: [PATCH 008/156] Reindent comment to conforn to JavaDoc style The build process fails ScalaStyle checks otherwise. --- .../deploy/kubernetes/KubernetesClientBuilder.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala index 61a13dc7274d7..61d3ac17ac34a 100644 --- a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala @@ -27,11 +27,11 @@ private[spark] object KubernetesClientBuilder { private val CA_CERT_FILE = new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH) /** - * Creates a {@link KubernetesClient}, expecting to be from - * within the context of a pod. When doing so, credentials files - * are picked up from canonical locations, as they are injected - * into the pod's disk space. - */ + * Creates a {@link KubernetesClient}, expecting to be from + * within the context of a pod. When doing so, credentials files + * are picked up from canonical locations, as they are injected + * into the pod's disk space. + */ def buildFromWithinPod( kubernetesMaster: String, kubernetesNamespace: String): DefaultKubernetesClient = { From f9ddb633d56561bb4272cd8888b64905362c5379 Mon Sep 17 00:00:00 2001 From: mcheah Date: Mon, 9 Jan 2017 15:30:02 -0800 Subject: [PATCH 009/156] Move kubernetes under resource-managers folder. --- .../main/scala/org/apache/spark/deploy/SparkSubmit.scala | 2 +- pom.xml | 8 ++++---- {kubernetes => resource-managers/kubernetes}/core/pom.xml | 4 ++-- .../org.apache.spark.scheduler.ExternalClusterManager | 0 .../scala/org/apache/spark/deploy/kubernetes/Client.scala | 0 .../spark/deploy/kubernetes/KubernetesClientBuilder.scala | 0 .../scala/org/apache/spark/deploy/kubernetes/Retry.scala | 0 .../deploy/rest/KubernetesRestProtocolMessages.scala | 0 .../spark/deploy/rest/kubernetes/HttpClientUtil.scala | 0 .../deploy/rest/kubernetes/KubernetesSparkRestApi.scala | 0 .../rest/kubernetes/KubernetesSparkRestServer.scala | 0 .../cluster/kubernetes/KubernetesClusterManager.scala | 0 .../kubernetes/KubernetesClusterSchedulerBackend.scala | 0 .../kubernetes}/docker-minimal-bundle/pom.xml | 4 ++-- .../src/main/assembly/driver-assembly.xml | 0 .../src/main/assembly/executor-assembly.xml | 0 .../src/main/docker/driver/Dockerfile | 0 .../src/main/docker/executor/Dockerfile | 0 .../kubernetes}/integration-tests-spark-jobs/pom.xml | 4 ++-- .../integrationtest/jobs/SparkPiWithInfiniteWait.scala | 0 .../kubernetes}/integration-tests/pom.xml | 4 ++-- .../kubernetes/integrationtest/KubernetesSuite.scala | 0 .../integrationtest/docker/SparkDockerImageBuilder.scala | 0 .../kubernetes/integrationtest/minikube/Minikube.scala | 0 .../integrationtest/restapis/SparkRestApiV1.scala | 0 25 files changed, 13 insertions(+), 13 deletions(-) rename {kubernetes => resource-managers/kubernetes}/core/pom.xml (97%) rename {kubernetes => resource-managers/kubernetes}/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager (100%) rename {kubernetes => resource-managers/kubernetes}/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala (100%) rename {kubernetes => resource-managers/kubernetes}/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala (100%) rename {kubernetes => resource-managers/kubernetes}/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala (100%) rename {kubernetes => resource-managers/kubernetes}/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala (100%) rename {kubernetes => resource-managers/kubernetes}/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala (100%) rename {kubernetes => resource-managers/kubernetes}/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala (100%) rename {kubernetes => resource-managers/kubernetes}/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala (100%) rename {kubernetes => resource-managers/kubernetes}/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala (100%) rename {kubernetes => resource-managers/kubernetes}/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala (100%) rename {kubernetes => resource-managers/kubernetes}/docker-minimal-bundle/pom.xml (98%) rename {kubernetes => resource-managers/kubernetes}/docker-minimal-bundle/src/main/assembly/driver-assembly.xml (100%) rename {kubernetes => resource-managers/kubernetes}/docker-minimal-bundle/src/main/assembly/executor-assembly.xml (100%) rename {kubernetes => resource-managers/kubernetes}/docker-minimal-bundle/src/main/docker/driver/Dockerfile (100%) rename {kubernetes => resource-managers/kubernetes}/docker-minimal-bundle/src/main/docker/executor/Dockerfile (100%) rename {kubernetes => resource-managers/kubernetes}/integration-tests-spark-jobs/pom.xml (95%) rename {kubernetes => resource-managers/kubernetes}/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala (100%) rename {kubernetes => resource-managers/kubernetes}/integration-tests/pom.xml (98%) rename {kubernetes => resource-managers/kubernetes}/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala (100%) rename {kubernetes => resource-managers/kubernetes}/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala (100%) rename {kubernetes => resource-managers/kubernetes}/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala (100%) rename {kubernetes => resource-managers/kubernetes}/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/restapis/SparkRestApiV1.scala (100%) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index bd249ea377b65..5be2d8a52d84c 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -243,7 +243,7 @@ object SparkSubmit { case m if m.startsWith("k8s") => KUBERNETES case m if m.startsWith("local") => LOCAL case _ => - printErrorAndExit("Master must either be yarn or start with spark, mesos, local") + printErrorAndExit("Master must either be yarn or start with spark, mesos, k8s, or local") -1 } diff --git a/pom.xml b/pom.xml index d923941748ca2..9ba31b4d78016 100644 --- a/pom.xml +++ b/pom.xml @@ -2628,16 +2628,16 @@ kubernetes - kubernetes/core + resource-managers/kubernetes/core kubernetes-integration-tests - kubernetes/docker-minimal-bundle - kubernetes/integration-tests - kubernetes/integration-tests-spark-jobs + resource-managers/kubernetes/docker-minimal-bundle + resource-managers/kubernetes/integration-tests + resource-managers/kubernetes/integration-tests-spark-jobs diff --git a/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml similarity index 97% rename from kubernetes/core/pom.xml rename to resource-managers/kubernetes/core/pom.xml index 9c7eb52b2680a..388defd93465d 100644 --- a/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,8 +20,8 @@ org.apache.spark spark-parent_2.11 - 2.1.0-SNAPSHOT - ../../pom.xml + 2.2.0-SNAPSHOT + ../../../pom.xml spark-kubernetes_2.11 diff --git a/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager b/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager similarity index 100% rename from kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager rename to resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala similarity index 100% rename from kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala similarity index 100% rename from kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala similarity index 100% rename from kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala similarity index 100% rename from kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala similarity index 100% rename from kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala similarity index 100% rename from kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala similarity index 100% rename from kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala diff --git a/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala similarity index 100% rename from kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala diff --git a/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala similarity index 100% rename from kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala diff --git a/kubernetes/docker-minimal-bundle/pom.xml b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml similarity index 98% rename from kubernetes/docker-minimal-bundle/pom.xml rename to resource-managers/kubernetes/docker-minimal-bundle/pom.xml index 3de939ea3978a..c20e51c93e7c7 100644 --- a/kubernetes/docker-minimal-bundle/pom.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml @@ -21,8 +21,8 @@ org.apache.spark spark-parent_2.11 - 2.1.0-SNAPSHOT - ../pom.xml + 2.2.0-SNAPSHOT + ../../../pom.xml spark-docker-minimal-bundle_2.11 diff --git a/kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml b/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml similarity index 100% rename from kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml rename to resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml diff --git a/kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml b/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml similarity index 100% rename from kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml rename to resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml diff --git a/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile similarity index 100% rename from kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile rename to resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile diff --git a/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile similarity index 100% rename from kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile rename to resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile diff --git a/kubernetes/integration-tests-spark-jobs/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml similarity index 95% rename from kubernetes/integration-tests-spark-jobs/pom.xml rename to resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml index 17f1c4906214f..12b0234ae71bd 100644 --- a/kubernetes/integration-tests-spark-jobs/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml @@ -20,8 +20,8 @@ org.apache.spark spark-parent_2.11 - 2.1.0-SNAPSHOT - ../../pom.xml + 2.2.0-SNAPSHOT + ../../../pom.xml spark-kubernetes-integration-tests-spark-jobs_2.11 diff --git a/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala b/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala similarity index 100% rename from kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala rename to resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala diff --git a/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml similarity index 98% rename from kubernetes/integration-tests/pom.xml rename to resource-managers/kubernetes/integration-tests/pom.xml index 0568cb1e21826..1e7eb0e12e6df 100644 --- a/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,8 +20,8 @@ org.apache.spark spark-parent_2.11 - 2.1.0-SNAPSHOT - ../../pom.xml + 2.2.0-SNAPSHOT + ../../../pom.xml spark-kubernetes-integration-tests_2.11 diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala similarity index 100% rename from kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala rename to resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala similarity index 100% rename from kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala rename to resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala similarity index 100% rename from kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala rename to resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/restapis/SparkRestApiV1.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/restapis/SparkRestApiV1.scala similarity index 100% rename from kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/restapis/SparkRestApiV1.scala rename to resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/restapis/SparkRestApiV1.scala From 178abc1375d1c56d4d7801ea355a19a4704e5dcc Mon Sep 17 00:00:00 2001 From: mccheah Date: Wed, 11 Jan 2017 14:36:45 -0800 Subject: [PATCH 010/156] Use tar and gzip to compress+archive shipped jars (#2) * Use tar and gzip to archive shipped jars. * Address comments * Move files to resolve merge --- pom.xml | 1 + .../spark/deploy/kubernetes/Client.scala | 21 +-- .../rest/KubernetesRestProtocolMessages.scala | 13 +- .../rest/kubernetes/CompressionUtils.scala | 139 ++++++++++++++++++ .../KubernetesSparkRestServer.scala | 27 +--- .../pom.xml | 33 +++++ .../kubernetes/integrationtest/PiHelper.java | 33 +++++ .../integration-tests-spark-jobs/pom.xml | 6 + .../jobs/SparkPiWithInfiniteWait.scala | 9 +- .../kubernetes/integration-tests/pom.xml | 13 ++ .../integrationtest/KubernetesSuite.scala | 7 + 11 files changed, 254 insertions(+), 48 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala create mode 100644 resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml create mode 100644 resource-managers/kubernetes/integration-tests-spark-jobs-helpers/src/main/java/org/apache/spark/deploy/kubernetes/integrationtest/PiHelper.java diff --git a/pom.xml b/pom.xml index 9ba31b4d78016..d04d7623d0584 100644 --- a/pom.xml +++ b/pom.xml @@ -2638,6 +2638,7 @@ resource-managers/kubernetes/docker-minimal-bundle resource-managers/kubernetes/integration-tests resource-managers/kubernetes/integration-tests-spark-jobs + resource-managers/kubernetes/integration-tests-spark-jobs-helpers diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 0715c84495a2c..230598d63bed1 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -35,7 +35,7 @@ import scala.concurrent.duration.DurationInt import scala.util.Success import org.apache.spark.{SPARK_VERSION, SparkConf} -import org.apache.spark.deploy.rest.{AppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, UploadedAppResource} +import org.apache.spark.deploy.rest.{AppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, TarGzippedData, UploadedAppResource} import org.apache.spark.deploy.rest.kubernetes._ import org.apache.spark.internal.Logging import org.apache.spark.util.Utils @@ -284,8 +284,8 @@ private[spark] class Client( case other => RemoteAppResource(other) } - val uploadDriverExtraClasspathBase64Contents = getFileContents(uploadedDriverExtraClasspath) - val uploadJarsBase64Contents = getFileContents(uploadedJars) + val uploadDriverExtraClasspathBase64Contents = compressJars(uploadedDriverExtraClasspath) + val uploadJarsBase64Contents = compressJars(uploadedJars) KubernetesCreateSubmissionRequest( appResource = resolvedAppResource, mainClass = mainClass, @@ -296,19 +296,10 @@ private[spark] class Client( uploadedJarsBase64Contents = uploadJarsBase64Contents) } - def getFileContents(maybeFilePaths: Option[String]): Array[(String, String)] = { + def compressJars(maybeFilePaths: Option[String]): Option[TarGzippedData] = { maybeFilePaths - .map(_.split(",").map(filePath => { - val fileToUpload = new File(filePath) - if (!fileToUpload.isFile) { - throw new IllegalStateException("Provided file to upload for driver extra classpath" + - s" does not exist or is not a file: $filePath") - } else { - val fileBytes = Files.toByteArray(fileToUpload) - val fileBase64 = Base64.encodeBase64String(fileBytes) - (fileToUpload.getName, fileBase64) - } - })).getOrElse(Array.empty[(String, String)]) + .map(_.split(",")) + .map(CompressionUtils.createTarGzip(_)) } private def getDriverLauncherService( diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala index 4b7bb66083f29..6da1a848b25e7 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala @@ -27,14 +27,19 @@ case class KubernetesCreateSubmissionRequest( val appArgs: Array[String], val sparkProperties: Map[String, String], val secret: String, - val uploadedDriverExtraClasspathBase64Contents: Array[(String, String)] - = Array.empty[(String, String)], - val uploadedJarsBase64Contents: Array[(String, String)] - = Array.empty[(String, String)]) extends SubmitRestProtocolRequest { + val uploadedDriverExtraClasspathBase64Contents: Option[TarGzippedData], + val uploadedJarsBase64Contents: Option[TarGzippedData]) extends SubmitRestProtocolRequest { message = "create" clientSparkVersion = SPARK_VERSION } +case class TarGzippedData( + val dataBase64: String, + val blockSize: Int = 10240, + val recordSize: Int = 512, + val encoding: String +) + @JsonTypeInfo( use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala new file mode 100644 index 0000000000000..805a52bada219 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import java.io.{ByteArrayInputStream, File, FileInputStream, FileOutputStream} +import java.util.zip.{GZIPInputStream, GZIPOutputStream} + +import com.google.common.io.Files +import org.apache.commons.codec.binary.Base64 +import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveInputStream, TarArchiveOutputStream} +import org.apache.commons.compress.utils.CharsetNames +import org.apache.commons.io.IOUtils +import scala.collection.mutable + +import org.apache.spark.deploy.rest.TarGzippedData +import org.apache.spark.internal.Logging +import org.apache.spark.util.{ByteBufferOutputStream, Utils} + +private[spark] object CompressionUtils extends Logging { + // Defaults from TarArchiveOutputStream + private val BLOCK_SIZE = 10240 + private val RECORD_SIZE = 512 + private val ENCODING = CharsetNames.UTF_8 + + /** + * Compresses all of the given paths into a gzipped-tar archive, returning the compressed data in + * memory as an instance of {@link TarGzippedData}. The files are taken without consideration to their + * original folder structure, and are added to the tar archive in a flat hierarchy. Directories are + * not allowed, and duplicate file names are de-duplicated by appending a numeric suffix to the file name, + * before the file extension. For example, if paths a/b.txt and b/b.txt were provided, then the files added + * to the tar archive would be b.txt and b-1.txt. + * @param paths A list of file paths to be archived + * @return An in-memory representation of the compressed data. + */ + def createTarGzip(paths: Iterable[String]): TarGzippedData = { + val compressedBytesStream = Utils.tryWithResource(new ByteBufferOutputStream()) { raw => + Utils.tryWithResource(new GZIPOutputStream(raw)) { gzipping => + Utils.tryWithResource(new TarArchiveOutputStream( + gzipping, + BLOCK_SIZE, + RECORD_SIZE, + ENCODING)) { tarStream => + val usedFileNames = mutable.HashSet.empty[String] + for (path <- paths) { + val file = new File(path) + if (!file.isFile) { + throw new IllegalArgumentException(s"Cannot add $path to tarball; either does" + + s" not exist or is a directory.") + } + var resolvedFileName = file.getName + val extension = Files.getFileExtension(file.getName) + val nameWithoutExtension = Files.getNameWithoutExtension(file.getName) + var deduplicationCounter = 1 + while (usedFileNames.contains(resolvedFileName)) { + val oldResolvedFileName = resolvedFileName + resolvedFileName = s"$nameWithoutExtension-$deduplicationCounter.$extension" + logWarning(s"File with name $oldResolvedFileName already exists. Trying to add with" + + s" file name $resolvedFileName instead.") + deduplicationCounter += 1 + } + usedFileNames += resolvedFileName + val tarEntry = new TarArchiveEntry(file, resolvedFileName) + tarStream.putArchiveEntry(tarEntry) + Utils.tryWithResource(new FileInputStream(file)) { fileInput => + IOUtils.copy(fileInput, tarStream) + } + tarStream.closeArchiveEntry() + } + } + } + raw + } + val compressedAsBase64 = Base64.encodeBase64String(compressedBytesStream.toByteBuffer.array) + TarGzippedData( + dataBase64 = compressedAsBase64, + blockSize = BLOCK_SIZE, + recordSize = RECORD_SIZE, + encoding = ENCODING + ) + } + + /** + * Decompresses the provided tar archive to a directory. + * @param compressedData In-memory representation of the compressed data, ideally created via + * {@link createTarGzip}. + * @param rootOutputDir Directory to write the output files to. All files from the tarball + * are written here in a flat hierarchy. + * @return List of file paths for each file that was unpacked from the archive. + */ + def unpackAndWriteCompressedFiles( + compressedData: TarGzippedData, + rootOutputDir: File): Seq[String] = { + val paths = mutable.Buffer.empty[String] + val compressedBytes = Base64.decodeBase64(compressedData.dataBase64) + if (!rootOutputDir.exists) { + if (!rootOutputDir.mkdirs) { + throw new IllegalStateException(s"Failed to create output directory for unpacking" + + s" files at ${rootOutputDir.getAbsolutePath}") + } + } else if (rootOutputDir.isFile) { + throw new IllegalArgumentException(s"Root dir for writing decompressed files: " + + s"${rootOutputDir.getAbsolutePath} exists and is not a directory.") + } + Utils.tryWithResource(new ByteArrayInputStream(compressedBytes)) { compressedBytesStream => + Utils.tryWithResource(new GZIPInputStream(compressedBytesStream)) { gzipped => + Utils.tryWithResource(new TarArchiveInputStream( + gzipped, + compressedData.blockSize, + compressedData.recordSize, + compressedData.encoding)) { tarInputStream => + var nextTarEntry = tarInputStream.getNextTarEntry + while (nextTarEntry != null) { + val outputFile = new File(rootOutputDir, nextTarEntry.getName) + Utils.tryWithResource(new FileOutputStream(outputFile)) { fileOutputStream => + IOUtils.copy(tarInputStream, fileOutputStream) + } + paths += outputFile.getAbsolutePath + nextTarEntry = tarInputStream.getNextTarEntry + } + } + } + } + paths.toSeq + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala index 0a2e8176394ab..2ca3d4a8c0656 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala @@ -217,30 +217,11 @@ private[spark] class KubernetesSparkRestServer( } private def writeBase64ContentsToFiles( - filesBase64Contents: Array[(String, String)], + maybeCompressedFiles: Option[TarGzippedData], rootDir: File): Seq[String] = { - val resolvedFileNames = new scala.collection.mutable.HashSet[String] - val resolvedFilePaths = new ArrayBuffer[String] - for (file <- filesBase64Contents) { - var currentFileName = file._1 - var deduplicationCounter = 1 - while (resolvedFileNames.contains(currentFileName)) { - // Prepend the deduplication counter so as to not mess with the extension - currentFileName = s"$deduplicationCounter-$currentFileName" - deduplicationCounter += 1 - } - val resolvedFile = new File(rootDir, currentFileName) - val resolvedFilePath = resolvedFile.getAbsolutePath - if (resolvedFile.createNewFile()) { - val fileContents = Base64.decodeBase64(file._2) - Files.write(fileContents, resolvedFile) - } else { - throw new IllegalStateException(s"Could not write jar file to $resolvedFilePath") - } - resolvedFileNames += currentFileName - resolvedFilePaths += resolvedFilePath - } - resolvedFilePaths.toSeq + maybeCompressedFiles.map { compressedFiles => + CompressionUtils.unpackAndWriteCompressedFiles(compressedFiles, rootDir) + }.getOrElse(Seq.empty[String]) } } diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml new file mode 100644 index 0000000000000..f99838636b349 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml @@ -0,0 +1,33 @@ + + + + 4.0.0 + + org.apache.spark + spark-parent_2.11 + 2.2.0-SNAPSHOT + ../../../pom.xml + + + spark-kubernetes-integration-tests-spark-jobs-helpers_2.11 + jar + Spark Project Kubernetes Integration Tests Spark Jobs Helpers + + + + diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/src/main/java/org/apache/spark/deploy/kubernetes/integrationtest/PiHelper.java b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/src/main/java/org/apache/spark/deploy/kubernetes/integrationtest/PiHelper.java new file mode 100644 index 0000000000000..99d982397bb6e --- /dev/null +++ b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/src/main/java/org/apache/spark/deploy/kubernetes/integrationtest/PiHelper.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest; + +/** + * Primarily extracted so that a separate jar can be added as a dependency for the + * test Spark job. + */ +public class PiHelper { + public static int helpPi() { + double x = Math.random() * 2 - 1; + double y = Math.random() * 2 - 1; + if (x*x + y*y < 1) { + return 1; + } else { + return 0; + } + } +} diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml index 12b0234ae71bd..59e59aca5109b 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml @@ -29,6 +29,12 @@ Spark Project Kubernetes Integration Tests Spark Jobs + + org.apache.spark + spark-kubernetes-integration-tests-spark-jobs-helpers_${scala.binary.version} + ${project.version} + provided + org.apache.spark spark-core_${scala.binary.version} diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala b/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala index 6e4660b771305..d3372749f999e 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala @@ -16,8 +16,7 @@ */ package org.apache.spark.deploy.kubernetes.integrationtest.jobs -import scala.math.random - +import org.apache.spark.deploy.kubernetes.integrationtest.PiHelper import org.apache.spark.sql.SparkSession // Equivalent to SparkPi except does not stop the Spark Context @@ -32,10 +31,8 @@ private[spark] object SparkPiWithInfiniteWait { .getOrCreate() val slices = if (args.length > 0) args(0).toInt else 10 val n = math.min(100000L * slices, Int.MaxValue).toInt // avoid overflow - val count = spark.sparkContext.parallelize(1 until n, slices).map { i => - val x = random * 2 - 1 - val y = random * 2 - 1 - if (x*x + y*y < 1) 1 else 0 + val count = spark.sparkContext.parallelize(1 until n, slices).map { _ => + PiHelper.helpPi() }.reduce(_ + _) // scalastyle:off println println("Pi is roughly " + 4.0 * count / (n - 1)) diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 1e7eb0e12e6df..569527de8e300 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -48,6 +48,12 @@ ${project.version} test + + org.apache.spark + spark-kubernetes-integration-tests-spark-jobs-helpers_${scala.binary.version} + ${project.version} + test + org.apache.spark spark-docker-minimal-bundle_${scala.binary.version} @@ -123,6 +129,13 @@ jar ${project.build.directory}/integration-tests-spark-jobs + + org.apache.spark + spark-kubernetes-integration-tests-spark-jobs-helpers_${scala.binary.version} + ${project.version} + jar + ${project.build.directory}/integration-tests-spark-jobs-helpers + diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 183f666994d38..6247a1674f8d6 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -41,6 +41,11 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .listFiles()(0) .getAbsolutePath + private val HELPER_JAR = Paths.get("target", "integration-tests-spark-jobs-helpers") + .toFile + .listFiles()(0) + .getAbsolutePath + private val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) private val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) private val MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + @@ -117,6 +122,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .set("spark.kubernetes.namespace", NAMESPACE) .set("spark.kubernetes.driver.docker.image", "spark-driver:latest") .set("spark.kubernetes.executor.docker.image", "spark-executor:latest") + .set("spark.kubernetes.driver.uploads.jars", HELPER_JAR) .set("spark.executor.memory", "500m") .set("spark.executor.cores", "1") .set("spark.executors.instances", "1") @@ -142,6 +148,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--executor-memory", "512m", "--executor-cores", "1", "--num-executors", "1", + "--upload-jars", HELPER_JAR, "--class", MAIN_CLASS, "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", From e2787e8c1a45bda367246cc73178412206acbf33 Mon Sep 17 00:00:00 2001 From: mccheah Date: Wed, 11 Jan 2017 16:05:16 -0800 Subject: [PATCH 011/156] Use alpine and java 8 for docker images. (#10) * Use alpine and java 8 for docker images. * Remove installation of vim and redundant comment --- .../src/main/docker/driver/Dockerfile | 11 +---------- .../src/main/docker/executor/Dockerfile | 11 +---------- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index 3bba38d8395ae..7bbabc40c34fc 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -1,12 +1,4 @@ -FROM ubuntu:trusty - -# Upgrade package index -# install a few other useful packages plus Open Jdk 7 -# Remove unneeded /var/lib/apt/lists/* after install to reduce the -# docker image size (by ~30MB) -RUN apt-get update && \ - apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server procps && \ - rm -rf /var/lib/apt/lists/* +FROM anapsix/alpine-java:8 RUN mkdir -p /opt/spark RUN mkdir -p /opt/spark/ui-resources/org/apache/spark/ui/static @@ -18,7 +10,6 @@ ADD sbin /opt/spark/sbin ADD conf /opt/spark/conf ENV SPARK_HOME /opt/spark -ENV JAVA_HOME /usr/lib/jvm/java-7-openjdk-amd64/jre WORKDIR /opt/spark diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile index f68f1a3fb2694..f584525cdc5e9 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -1,12 +1,4 @@ -FROM ubuntu:trusty - -# Upgrade package index -# install a few other useful packages plus Open Jdk 7 -# Remove unneeded /var/lib/apt/lists/* after install to reduce the -# docker image size (by ~30MB) -RUN apt-get update && \ - apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server procps && \ - rm -rf /var/lib/apt/lists/* +FROM anapsix/alpine-java:8 RUN mkdir -p /opt/spark RUN mkdir -p /opt/spark/ui-resources/org/apache/spark/ui/static @@ -18,7 +10,6 @@ ADD sbin /opt/spark/sbin ADD conf /opt/spark/conf ENV SPARK_HOME /opt/spark -ENV JAVA_HOME /usr/lib/jvm/java-7-openjdk-amd64/jre WORKDIR /opt/spark From acceb72d96c739918af3b83f02ebf8524727d1a1 Mon Sep 17 00:00:00 2001 From: mccheah Date: Wed, 11 Jan 2017 18:20:12 -0800 Subject: [PATCH 012/156] Copy the Dockerfiles from docker-minimal-bundle into the distribution. (#12) --- dev/make-distribution.sh | 7 +++++++ .../src/main/docker/driver/Dockerfile | 5 ++++- .../src/main/docker/executor/Dockerfile | 5 ++++- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index 6ea319e4362ab..62706b0fffedc 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -175,6 +175,13 @@ echo "Build flags: $@" >> "$DISTDIR/RELEASE" # Copy jars cp "$SPARK_HOME"/assembly/target/scala*/jars/* "$DISTDIR/jars/" +# Copy docker files +mkdir -p "$DISTDIR/dockerfiles/driver" +mkdir -p "$DISTDIR/dockerfiles/executor" +DOCKERFILES_SRC="$SPARK_HOME/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker" +cp "$DOCKERFILES_SRC/driver/Dockerfile" "$DISTDIR/dockerfiles/driver/Dockerfile" +cp "$DOCKERFILES_SRC/executor/Dockerfile" "$DISTDIR/dockerfiles/executor/Dockerfile" + # Only create the yarn directory if the yarn artifacts were build. if [ -f "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar ]; then mkdir "$DISTDIR"/yarn diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index 7bbabc40c34fc..308bf392fb202 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -1,7 +1,10 @@ FROM anapsix/alpine-java:8 +# If this docker file is being used in the context of building your images from a Spark distribution, the docker build +# command should be invoked from the top level directory of the Spark distribution. E.g.: +# docker build -t spark-driver:latest -f dockerfiles/driver/Dockerfile . + RUN mkdir -p /opt/spark -RUN mkdir -p /opt/spark/ui-resources/org/apache/spark/ui/static RUN touch /opt/spark/RELEASE ADD jars /opt/spark/jars diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile index f584525cdc5e9..164c0a4289cac 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -1,7 +1,10 @@ FROM anapsix/alpine-java:8 +# If this docker file is being used in the context of building your images from a Spark distribution, the docker build +# command should be invoked from the top level directory of the Spark distribution. E.g.: +# docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . + RUN mkdir -p /opt/spark -RUN mkdir -p /opt/spark/ui-resources/org/apache/spark/ui/static RUN touch /opt/spark/RELEASE ADD jars /opt/spark/jars From 24f4bf02f64f0725c849ec1eab3d87b2fd77e594 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Thu, 12 Jan 2017 14:11:02 -0800 Subject: [PATCH 013/156] inherit IO (#13) --- .../deploy/rest/kubernetes/KubernetesSparkRestServer.scala | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala index 2ca3d4a8c0656..837706ca9f5a8 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala @@ -164,10 +164,7 @@ private[spark] class KubernetesSparkRestServer( command += s"-Xmx$driverMemory" command += mainClass command ++= appArgs - val pb = new ProcessBuilder(command: _*) - Paths.get(sparkHome, "logs").toFile.mkdirs - pb.redirectOutput(Paths.get(sparkHome, "logs", "stdout").toFile) - pb.redirectError(Paths.get(sparkHome, "logs", "stderr").toFile) + val pb = new ProcessBuilder(command: _*).inheritIO() val process = pb.start() ShutdownHookManager.addShutdownHook(() => { logInfo("Received stop command, shutting down the running Spark application...") From adcc9062bbea2dcd226684afbba4791256c2b94c Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 12 Jan 2017 17:59:11 -0800 Subject: [PATCH 014/156] Error messages when the driver container fails to start. (#11) * Error messages when the driver container fails to start. * Fix messages a bit * Use timeout constant * Delete the pod if it fails for any reason (not just timeout) * Actually set submit succeeded * Fix typo --- .../spark/deploy/kubernetes/Client.scala | 83 +++++++++++++++++-- 1 file changed, 77 insertions(+), 6 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 230598d63bed1..6d7de973a52c2 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -18,7 +18,7 @@ package org.apache.spark.deploy.kubernetes import java.io.File import java.security.SecureRandom -import java.util.concurrent.{Executors, TimeUnit} +import java.util.concurrent.{Executors, TimeoutException, TimeUnit} import javax.net.ssl.X509TrustManager import com.google.common.io.Files @@ -34,7 +34,7 @@ import scala.concurrent.ExecutionContext import scala.concurrent.duration.DurationInt import scala.util.Success -import org.apache.spark.{SPARK_VERSION, SparkConf} +import org.apache.spark.{SPARK_VERSION, SparkConf, SparkException} import org.apache.spark.deploy.rest.{AppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, TarGzippedData, UploadedAppResource} import org.apache.spark.deploy.rest.kubernetes._ import org.apache.spark.internal.Logging @@ -130,8 +130,8 @@ private[spark] class Client( val podWatcher = new Watcher[Pod] { override def eventReceived(action: Action, t: Pod): Unit = { if ((action == Action.ADDED || action == Action.MODIFIED) - && t.getStatus.getPhase == "Running" - && !submitCompletedFuture.isDone) { + && t.getStatus.getPhase == "Running" + && !submitCompletedFuture.isDone) { t.getStatus .getContainerStatuses .asScala @@ -216,8 +216,78 @@ private[spark] class Client( .endContainer() .endSpec() .done() - submitCompletedFuture.get(30, TimeUnit.SECONDS) - } + var submitSucceeded = false + try { + submitCompletedFuture.get(LAUNCH_TIMEOUT_SECONDS, TimeUnit.SECONDS) + submitSucceeded = true + } catch { + case e: TimeoutException => + val driverPod = try { + kubernetesClient.pods().withName(kubernetesAppId).get() + } catch { + case throwable: Throwable => + logError(s"Timed out while waiting $LAUNCH_TIMEOUT_SECONDS seconds for the" + + " driver pod to start, but an error occurred while fetching the driver" + + " pod's details.", throwable) + throw new SparkException(s"Timed out while waiting $LAUNCH_TIMEOUT_SECONDS" + + " seconds for the driver pod to start. Unfortunately, in attempting to fetch" + + " the latest state of the pod, another error was thrown. Check the logs for" + + " the error that was thrown in looking up the driver pod.", e) + } + val topLevelMessage = s"The driver pod with name ${driverPod.getMetadata.getName}" + + s" in namespace ${driverPod.getMetadata.getNamespace} was not ready in" + + s" $LAUNCH_TIMEOUT_SECONDS seconds." + val podStatusPhase = if (driverPod.getStatus.getPhase != null) { + s"Latest phase from the pod is: ${driverPod.getStatus.getPhase}" + } else { + "The pod had no final phase." + } + val podStatusMessage = if (driverPod.getStatus.getMessage != null) { + s"Latest message from the pod is: ${driverPod.getStatus.getMessage}" + } else { + "The pod had no final message." + } + val failedDriverContainerStatusString = driverPod.getStatus + .getContainerStatuses + .asScala + .find(_.getName == DRIVER_LAUNCHER_CONTAINER_NAME) + .map(status => { + val lastState = status.getState + if (lastState.getRunning != null) { + "Driver container last state: Running\n" + + s"Driver container started at: ${lastState.getRunning.getStartedAt}" + } else if (lastState.getWaiting != null) { + "Driver container last state: Waiting\n" + + s"Driver container wait reason: ${lastState.getWaiting.getReason}\n" + + s"Driver container message: ${lastState.getWaiting.getMessage}\n" + } else if (lastState.getTerminated != null) { + "Driver container last state: Terminated\n" + + s"Driver container started at: ${lastState.getTerminated.getStartedAt}\n" + + s"Driver container finished at: ${lastState.getTerminated.getFinishedAt}\n" + + s"Driver container exit reason: ${lastState.getTerminated.getReason}\n" + + s"Driver container exit code: ${lastState.getTerminated.getExitCode}\n" + + s"Driver container message: ${lastState.getTerminated.getMessage}" + } else { + "Driver container last state: Unknown" + } + }).getOrElse("The driver container wasn't found in the pod; expected to find" + + s" container with name $DRIVER_LAUNCHER_CONTAINER_NAME") + val finalErrorMessage = s"$topLevelMessage\n" + + s"$podStatusPhase\n" + + s"$podStatusMessage\n\n$failedDriverContainerStatusString" + logError(finalErrorMessage, e) + throw new SparkException(finalErrorMessage, e) + } finally { + if (!submitSucceeded) { + try { + kubernetesClient.pods.withName(kubernetesAppId).delete + } catch { + case throwable: Throwable => + logError("Failed to delete driver pod after it failed to run.", throwable) + } + } + } + } Utils.tryWithResource(kubernetesClient .pods() @@ -338,6 +408,7 @@ private object Client { private val DRIVER_LAUNCHER_CONTAINER_NAME = "spark-kubernetes-driver-launcher" private val SECURE_RANDOM = new SecureRandom() private val SPARK_SUBMISSION_SECRET_BASE_DIR = "/var/run/secrets/spark-submission" + private val LAUNCH_TIMEOUT_SECONDS = 30 def main(args: Array[String]): Unit = { require(args.length >= 2, s"Too few arguments. Usage: ${getClass.getName} " + From 0b81dbf516b0c389db50d237f52f32747dda8056 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Thu, 12 Jan 2017 19:27:44 -0800 Subject: [PATCH 015/156] Fix linter error to make CI happy (#18) --- .../org/apache/spark/launcher/SparkSubmitOptionParser.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java index 94f9bc319b6a2..2b7290a12f8c1 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java @@ -80,7 +80,8 @@ class SparkSubmitOptionParser { protected final String KUBERNETES_MASTER = "--kubernetes-master"; protected final String KUBERNETES_NAMESPACE = "--kubernetes-namespace"; protected final String KUBERNETES_UPLOAD_JARS = "--upload-jars"; - protected final String KUBERNETES_UPLOAD_DRIVER_EXTRA_CLASSPATH = "--upload-driver-extra-classpath"; + protected final String KUBERNETES_UPLOAD_DRIVER_EXTRA_CLASSPATH = + "--upload-driver-extra-classpath"; /** * This is the canonical list of spark-submit options. Each entry in the array contains the From e70f427ec99d4dee6668b5eb7e3889288e3db505 Mon Sep 17 00:00:00 2001 From: mccheah Date: Fri, 13 Jan 2017 14:11:08 -0800 Subject: [PATCH 016/156] Documentation for the current state of the world (#16) * Documentation for the current state of the world. * Adding navigation links from other pages * Address comments, add TODO for things that should be fixed * Address comments, mostly making images section clearer * Virtual runtime -> container runtime --- docs/_layouts/global.html | 1 + docs/index.md | 1 + docs/running-on-kubernetes.md | 224 ++++++++++++++++++++++++++++++++++ 3 files changed, 226 insertions(+) create mode 100644 docs/running-on-kubernetes.md diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html index c00d0db63cd10..3c786a6344066 100755 --- a/docs/_layouts/global.html +++ b/docs/_layouts/global.html @@ -99,6 +99,7 @@
  • Spark Standalone
  • Mesos
  • YARN
  • +
  • Kubernetes
  • diff --git a/docs/index.md b/docs/index.md index 57b9fa848f4a3..81d37aa5f63a1 100644 --- a/docs/index.md +++ b/docs/index.md @@ -113,6 +113,7 @@ options for deployment: * [Mesos](running-on-mesos.html): deploy a private cluster using [Apache Mesos](http://mesos.apache.org) * [YARN](running-on-yarn.html): deploy Spark on top of Hadoop NextGen (YARN) + * [Kubernetes](running-on-kubernetes.html): deploy Spark on top of Kubernetes **Other Documents:** diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md new file mode 100644 index 0000000000000..5192d9d086618 --- /dev/null +++ b/docs/running-on-kubernetes.md @@ -0,0 +1,224 @@ +--- +layout: global +title: Running Spark on Kubernetes +--- + +Support for running on [Kubernetes](https://kubernetes.io/) is available in experimental status. The feature set is +currently limited and not well-tested. This should not be used in production environments. + +## Setting Up Docker Images + +Kubernetes requires users to supply images that can be deployed into containers within pods. The images are built to +be run in a container runtime environment that Kubernetes supports. Docker is a container runtime environment that is +frequently used with Kubernetes, so Spark provides some support for working with Docker to get started quickly. + +To use Spark on Kubernetes with Docker, images for the driver and the executors need to built and published to an +accessible Docker registry. Spark distributions include the Docker files for the driver and the executor at +`dockerfiles/driver/Dockerfile` and `docker/executor/Dockerfile`, respectively. Use these Docker files to build the +Docker images, and then tag them with the registry that the images should be sent to. Finally, push the images to the +registry. + +For example, if the registry host is `registry-host` and the registry is listening on port 5000: + + cd $SPARK_HOME + docker build -t registry-host:5000/spark-driver:latest -f dockerfiles/driver/Dockerfile . + docker build -t registry-host:5000/spark-executor:latest -f dockerfiles/executor/Dockerfile . + docker push registry-host:5000/spark-driver:latest + docker push registry-host:5000/spark-executor:latest + +## Submitting Applications to Kubernetes + +Kubernetes applications can be executed via `spark-submit`. For example, to compute the value of pi, assuming the images +are set up as described above: + + bin/spark-submit + --deploy-mode cluster + --class org.apache.spark.examples.SparkPi + --master k8s://https://: + --kubernetes-namespace default + --conf spark.executor.instances=5 + --conf spark.app.name=spark-pi + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest + examples/jars/spark_2.11-2.2.0.jar + + +The Spark master, specified either via passing the `--master` command line argument to `spark-submit` or by setting +`spark.master` in the application's configuration, must be a URL with the format `k8s://`. Prefixing the +master string with `k8s://` will cause the Spark application to launch on the Kubernetes cluster, with the API server +being contacted at `api_server_url`. The HTTP protocol must also be specified. + +Note that applications can currently only be executed in cluster mode, where the driver and its executors are running on +the cluster. + +### Adding Other JARs + +Spark allows users to provide dependencies that are bundled into the driver's Docker image, or that are on the local +disk of the submitter's machine. These two types of dependencies are specified via different configuration options to +`spark-submit`: + +* Local jars provided by specifying the `--jars` command line argument to `spark-submit`, or by setting `spark.jars` in + the application's configuration, will be treated as jars that are located on the *disk of the driver Docker + container*. This only applies to jar paths that do not specify a scheme or that have the scheme `file://`. Paths with + other schemes are fetched from their appropriate locations. +* Local jars provided by specifying the `--upload-jars` command line argument to `spark-submit`, or by setting + `spark.kubernetes.driver.uploads.jars` in the application's configuration, will be treated as jars that are located on + the *disk of the submitting machine*. These jars are uploaded to the driver docker container before executing the + application. + +* A main application resource path that does not have a scheme or that has the scheme `file://` is assumed to be on the + *disk of the submitting machine*. This resource is uploaded to the driver docker container before executing the + application. A remote path can still be specified and the resource will be fetched from the appropriate location. + +In all of these cases, the jars are placed on the driver's classpath, and are also sent to the executors. Below are some +examples of providing application dependencies. + +To submit an application with both the main resource and two other jars living on the submitting user's machine: + + bin/spark-submit + --deploy-mode cluster + --class com.example.applications.SampleApplication + --master k8s://https://192.168.99.100 + --kubernetes-namespace default + --upload-jars /home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest + /home/exampleuser/exampleapplication/main.jar + +Note that since passing the jars through the `--upload-jars` command line argument is equivalent to setting the +`spark.kubernetes.driver.uploads.jars` Spark property, the above will behave identically to this command: + + bin/spark-submit + --deploy-mode cluster + --class com.example.applications.SampleApplication + --master k8s://https://192.168.99.100 + --kubernetes-namespace default + --conf spark.kubernetes.driver.uploads.jars=/home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest + /home/exampleuser/exampleapplication/main.jar + +To specify a main application resource that can be downloaded from an HTTP service, and if a plugin for that application +is located in the jar `/opt/spark-plugins/app-plugin.jar` on the docker image's disk: + + bin/spark-submit + --deploy-mode cluster + --class com.example.applications.PluggableApplication + --master k8s://https://192.168.99.100 + --kubernetes-namespace default + --jars /opt/spark-plugins/app-plugin.jar + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest + http://example.com:8080/applications/sparkpluggable/app.jar + +Note that since passing the jars through the `--jars` command line argument is equivalent to setting the `spark.jars` +Spark property, the above will behave identically to this command: + + bin/spark-submit + --deploy-mode cluster + --class com.example.applications.PluggableApplication + --master k8s://https://192.168.99.100 + --kubernetes-namespace default + --conf spark.jars=file:///opt/spark-plugins/app-plugin.jar + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest + http://example.com:8080/applications/sparkpluggable/app.jar + +### Spark Properties + +Below are some other common properties that are specific to Kubernetes. Most of the other configurations are the same +from the other deployment modes. See the [configuration page](configuration.html) for more information on those. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Property NameDefaultMeaning
    spark.kubernetes.namespace(none) + The namespace that will be used for running the driver and executor pods. Must be specified. When using + spark-submit in cluster mode, this can also be passed to spark-submit via the + --kubernetes-namespace command line argument. +
    spark.kubernetes.driver.docker.imagespark-driver:2.2.0 + Docker image to use for the driver. Specify this using the standard + Docker tag format. +
    spark.kubernetes.executor.docker.imagespark-executor:2.2.0 + Docker image to use for the executors. Specify this using the standard + Docker tag format. +
    spark.kubernetes.submit.caCertFile(none) + CA cert file for connecting to Kubernetes over SSL. This file should be located on the submitting machine's disk. +
    spark.kubernetes.submit.clientKeyFile(none) + Client key file for authenticating against the Kubernetes API server. This file should be located on the submitting + machine's disk. +
    spark.kubernetes.submit.clientCertFile(none) + Client cert file for authenticating against the Kubernetes API server. This file should be located on the submitting + machine's disk. +
    spark.kubernetes.submit.serviceAccountNamedefault + Service account that is used when running the driver pod. The driver pod uses this service account when requesting + executor pods from the API server. +
    spark.kubernetes.driver.uploads.jars(none) + Comma-separated list of jars to sent to the driver and all executors when submitting the application in cluster + mode. Refer to adding other jars for more information. +
    spark.kubernetes.driver.uploads.driverExtraClasspath(none) + Comma-separated list of jars to be sent to the driver only when submitting the application in cluster mode. +
    spark.kubernetes.executor.memoryOverheadexecutorMemory * 0.10, with minimum of 384 + The amount of off-heap memory (in megabytes) to be allocated per executor. This is memory that accounts for things + like VM overheads, interned strings, other native overheads, etc. This tends to grow with the executor size + (typically 6-10%). +
    + +## Current Limitations + +Running Spark on Kubernetes is currently an experimental feature. Some restrictions on the current implementation that +should be lifted in the future include: +* Applications can only use a fixed number of executors. Dynamic allocation is not supported. +* Applications can only run in cluster mode. +* Only Scala and Java applications can be run. From b25bc8b712277a88f00b017a6a69a23585a1d19b Mon Sep 17 00:00:00 2001 From: mccheah Date: Fri, 13 Jan 2017 14:56:08 -0800 Subject: [PATCH 017/156] Development workflow documentation for the current state of the world. (#20) * Development workflow documentation for the current state of the world. * Address comments. * Clarified code change and added ticket link --- resource-managers/kubernetes/README.md | 56 ++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 resource-managers/kubernetes/README.md diff --git a/resource-managers/kubernetes/README.md b/resource-managers/kubernetes/README.md new file mode 100644 index 0000000000000..3c11efa38d5af --- /dev/null +++ b/resource-managers/kubernetes/README.md @@ -0,0 +1,56 @@ +--- +layout: global +title: Spark on Kubernetes Development +--- + +[Kubernetes](https://kubernetes.io/) is a framework for easily deploying, scaling, and managing containerized +applications. It would be useful for a user to run their Spark jobs on a Kubernetes cluster alongside their +other Kubernetes-managed applications. For more about the motivations for adding this feature, see the umbrella JIRA +ticket that tracks this project: [SPARK-18278](https://issues.apache.org/jira/browse/SPARK-18278). + +This submodule is an initial implementation of allowing Kubernetes to be a +supported cluster manager for Spark, along with Mesos, Hadoop YARN, and Standalone. This document provides a summary of +important matters to keep in mind when developing this feature. + +# Building Spark with Kubernetes Support + +To build Spark with Kubernetes support, use the `kubernetes` profile when invoking Maven. For example, to simply compile +the Kubernetes core implementation module along with its dependencies: + + build/mvn compile -Pkubernetes -pl resource-managers/kubernetes/core -am + +To build a distribution of Spark with Kubernetes support, use the `dev/make-distribution.sh` script, and add the +`kubernetes` profile as part of the build arguments. Any other build arguments can be specified as one would expect when +building Spark normally. For example, to build Spark against Hadoop 2.7 and Kubernetes: + + dev/make-distribution.sh --tgz -Phadoop2.7 -Pkubernetes + +# Kubernetes Code Modules + +Below is a list of the submodules for this cluster manager and what they do. + +* `core`: Implementation of the Kubernetes cluster manager support. +* `integration-tests`: Integration tests for the project. +* `docker-minimal-bundle`: Base Dockerfiles for the driver and the executors. The Dockerfiles are used for integration + tests as well as being provided in packaged distributions of Spark. +* `integration-tests-spark-jobs`: Spark jobs that are only used in integration tests. +* `integration-tests-spark-jobs-helpers`: Dependencies for the spark jobs used in integration tests. These dependencies + are separated out to facilitate testing the shipping of jars to drivers running on Kubernetes clusters. + +# Running the Kubernetes Integration Tests + +Note that the integration test framework is currently being heavily revised and is subject to change. + +Running any of the integration tests requires including `kubernetes-integration-tests` profile in the build command. In +order to prepare the environment for running the integration tests, the `pre-integration-test` step must be run in Maven +on the `resource-managers/kubernetes/integration-tests` module: + + build/mvn pre-integration-test -Pkubernetes -Pkubernetes-integration-tests -pl resource-managers/kubernetes/integration-tests -am + +Afterwards, the integration tests can be executed with Maven or your IDE. Note that when running tests from an IDE, the +`pre-integration-test` phase must be run every time the Spark main code changes. When running tests from the +command line, the `pre-integration-test` phase should automatically be invoked if the `integration-test` phase is run. + +# Usage Guide + +See the [usage guide](../../docs/running-on-kubernetes.md) for more information. From 761b3175c0cbe282dae1a23144669a5003f83e39 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Fri, 13 Jan 2017 15:05:22 -0800 Subject: [PATCH 018/156] Added service name as prefix to executor pods (#14) * Added service name as prefix to executor pods to be able to tell them apart from kubectl output * Addressed comments --- .../cluster/kubernetes/KubernetesClusterSchedulerBackend.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 4e099cea3198b..2717d2f37d910 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -155,7 +155,7 @@ private[spark] class KubernetesClusterSchedulerBackend( private def allocateNewExecutorPod(): (String, Pod) = { val executorKubernetesId = UUID.randomUUID().toString.replaceAll("-", "") val executorId = EXECUTOR_ID_COUNTER.incrementAndGet().toString - val name = s"exec$executorKubernetesId" + val name = s"$kubernetesDriverServiceName-exec-$executorKubernetesId" val selectors = Map(SPARK_EXECUTOR_SELECTOR -> executorId, SPARK_APP_SELECTOR -> applicationId()).asJava val executorMemoryQuantity = new QuantityBuilder(false) From 8739b41db6ea93f4f7f3f3e982752366611ea8bf Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Fri, 13 Jan 2017 20:44:56 -0800 Subject: [PATCH 019/156] Add kubernetes profile to travis CI yml file (#21) * Add kubernetes profile to travis yml file * Fix long lines in CompressionUtils.scala --- .travis.yml | 2 +- .../deploy/rest/kubernetes/CompressionUtils.scala | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8739849a20798..a118421eb45e0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -44,7 +44,7 @@ notifications: # 5. Run maven install before running lint-java. install: - export MAVEN_SKIP_RC=1 - - build/mvn -T 4 -q -DskipTests -Pmesos -Pyarn -Phadoop-2.3 -Pkinesis-asl -Phive -Phive-thriftserver install + - build/mvn -T 4 -q -DskipTests -Pmesos -Pyarn -Phadoop-2.3 -Pkubernetes -Pkinesis-asl -Phive -Phive-thriftserver install # 6. Run lint-java. script: diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala index 805a52bada219..1c95dacc7eb01 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala @@ -38,11 +38,11 @@ private[spark] object CompressionUtils extends Logging { /** * Compresses all of the given paths into a gzipped-tar archive, returning the compressed data in - * memory as an instance of {@link TarGzippedData}. The files are taken without consideration to their - * original folder structure, and are added to the tar archive in a flat hierarchy. Directories are - * not allowed, and duplicate file names are de-duplicated by appending a numeric suffix to the file name, - * before the file extension. For example, if paths a/b.txt and b/b.txt were provided, then the files added - * to the tar archive would be b.txt and b-1.txt. + * memory as an instance of {@link TarGzippedData}. The files are taken without consideration to + * their original folder structure, and are added to the tar archive in a flat hierarchy. + * Directories are not allowed, and duplicate file names are de-duplicated by appending a numeric + * suffix to the file name, before the file extension. For example, if paths a/b.txt and b/b.txt + * were provided, then the files added to the tar archive would be b.txt and b-1.txt. * @param paths A list of file paths to be archived * @return An in-memory representation of the compressed data. */ From 928e00eb19be53071f247e98bf9a74897417bc62 Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Tue, 17 Jan 2017 17:24:58 +0000 Subject: [PATCH 020/156] Improved the example commands in running-on-k8s document. (#25) * Improved the example commands in running-on-k8s document. * Fixed more example commands. * Fixed typo. --- docs/running-on-kubernetes.md | 84 +++++++++++++++++------------------ 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 5192d9d086618..234c9870548c7 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -31,16 +31,16 @@ For example, if the registry host is `registry-host` and the registry is listeni Kubernetes applications can be executed via `spark-submit`. For example, to compute the value of pi, assuming the images are set up as described above: - bin/spark-submit - --deploy-mode cluster - --class org.apache.spark.examples.SparkPi - --master k8s://https://: - --kubernetes-namespace default - --conf spark.executor.instances=5 - --conf spark.app.name=spark-pi - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest - examples/jars/spark_2.11-2.2.0.jar + bin/spark-submit \ + --deploy-mode cluster \ + --class org.apache.spark.examples.SparkPi \ + --master k8s://https://: \ + --kubernetes-namespace default \ + --conf spark.executor.instances=5 \ + --conf spark.app.name=spark-pi \ + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \ + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ + examples/jars/spark_examples_2.11-2.2.0.jar The Spark master, specified either via passing the `--master` command line argument to `spark-submit` or by setting @@ -75,53 +75,53 @@ examples of providing application dependencies. To submit an application with both the main resource and two other jars living on the submitting user's machine: - bin/spark-submit - --deploy-mode cluster - --class com.example.applications.SampleApplication - --master k8s://https://192.168.99.100 - --kubernetes-namespace default - --upload-jars /home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest + bin/spark-submit \ + --deploy-mode cluster \ + --class com.example.applications.SampleApplication \ + --master k8s://https://192.168.99.100 \ + --kubernetes-namespace default \ + --upload-jars /home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar \ + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \ + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ /home/exampleuser/exampleapplication/main.jar Note that since passing the jars through the `--upload-jars` command line argument is equivalent to setting the `spark.kubernetes.driver.uploads.jars` Spark property, the above will behave identically to this command: - bin/spark-submit - --deploy-mode cluster - --class com.example.applications.SampleApplication - --master k8s://https://192.168.99.100 - --kubernetes-namespace default - --conf spark.kubernetes.driver.uploads.jars=/home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest + bin/spark-submit \ + --deploy-mode cluster \ + --class com.example.applications.SampleApplication \ + --master k8s://https://192.168.99.100 \ + --kubernetes-namespace default \ + --conf spark.kubernetes.driver.uploads.jars=/home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar \ + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \ + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ /home/exampleuser/exampleapplication/main.jar To specify a main application resource that can be downloaded from an HTTP service, and if a plugin for that application is located in the jar `/opt/spark-plugins/app-plugin.jar` on the docker image's disk: - bin/spark-submit - --deploy-mode cluster - --class com.example.applications.PluggableApplication - --master k8s://https://192.168.99.100 - --kubernetes-namespace default - --jars /opt/spark-plugins/app-plugin.jar - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest + bin/spark-submit \ + --deploy-mode cluster \ + --class com.example.applications.PluggableApplication \ + --master k8s://https://192.168.99.100 \ + --kubernetes-namespace default \ + --jars /opt/spark-plugins/app-plugin.jar \ + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest \ + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ http://example.com:8080/applications/sparkpluggable/app.jar Note that since passing the jars through the `--jars` command line argument is equivalent to setting the `spark.jars` Spark property, the above will behave identically to this command: - bin/spark-submit - --deploy-mode cluster - --class com.example.applications.PluggableApplication - --master k8s://https://192.168.99.100 - --kubernetes-namespace default - --conf spark.jars=file:///opt/spark-plugins/app-plugin.jar - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest + bin/spark-submit \ + --deploy-mode cluster \ + --class com.example.applications.PluggableApplication \ + --master k8s://https://192.168.99.100 \ + --kubernetes-namespace default \ + --conf spark.jars=file:///opt/spark-plugins/app-plugin.jar \ + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest \ + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ http://example.com:8080/applications/sparkpluggable/app.jar ### Spark Properties From 3e3c4d4ac9450bff5ad27b7b20faba943623b86e Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Wed, 18 Jan 2017 11:24:43 -0800 Subject: [PATCH 021/156] Fix spacing for command highlighting (#31) --- resource-managers/kubernetes/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/README.md b/resource-managers/kubernetes/README.md index 3c11efa38d5af..62764dcb2ca03 100644 --- a/resource-managers/kubernetes/README.md +++ b/resource-managers/kubernetes/README.md @@ -45,7 +45,7 @@ Running any of the integration tests requires including `kubernetes-integration- order to prepare the environment for running the integration tests, the `pre-integration-test` step must be run in Maven on the `resource-managers/kubernetes/integration-tests` module: - build/mvn pre-integration-test -Pkubernetes -Pkubernetes-integration-tests -pl resource-managers/kubernetes/integration-tests -am + build/mvn pre-integration-test -Pkubernetes -Pkubernetes-integration-tests -pl resource-managers/kubernetes/integration-tests -am Afterwards, the integration tests can be executed with Maven or your IDE. Note that when running tests from an IDE, the `pre-integration-test` phase must be run every time the Spark main code changes. When running tests from the From 36c4e949756a389d4502ccb6529b9af4541b9805 Mon Sep 17 00:00:00 2001 From: mccheah Date: Wed, 18 Jan 2017 17:30:09 -0800 Subject: [PATCH 022/156] Support custom labels on the driver pod. (#27) * Support custom labels on the driver pod. * Add integration test and fix logic. * Fix tests * Fix minor formatting mistake * Reduce unnecessary diff --- docs/running-on-kubernetes.md | 8 +++++ .../spark/deploy/kubernetes/Client.scala | 35 +++++++++++++++---- .../integrationtest/KubernetesSuite.scala | 34 ++++++++++++++++++ 3 files changed, 70 insertions(+), 7 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 234c9870548c7..14e2df4ed0702 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -213,6 +213,14 @@ from the other deployment modes. See the [configuration page](configuration.html (typically 6-10%). + + spark.kubernetes.driver.labels + (none) + + Custom labels that will be added to the driver pod. This should be a comma-separated list of label key-value pairs, + where each label is in the format key=value. + + ## Current Limitations diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 6d7de973a52c2..073afcbba7b52 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -77,6 +77,8 @@ private[spark] class Client( private val serviceAccount = sparkConf.get("spark.kubernetes.submit.serviceAccountName", "default") + private val customLabels = sparkConf.get("spark.kubernetes.driver.labels", "") + private implicit val retryableExecutionContext = ExecutionContext .fromExecutorService( Executors.newSingleThreadExecutor(new ThreadFactoryBuilder() @@ -85,6 +87,7 @@ private[spark] class Client( .build())) def run(): Unit = { + val parsedCustomLabels = parseCustomLabels(customLabels) var k8ConfBuilder = new ConfigBuilder() .withApiVersion("v1") .withMasterUrl(master) @@ -109,14 +112,15 @@ private[spark] class Client( .withType("Opaque") .done() try { - val selectors = Map(DRIVER_LAUNCHER_SELECTOR_LABEL -> driverLauncherSelectorValue).asJava + val resolvedSelectors = (Map(DRIVER_LAUNCHER_SELECTOR_LABEL -> driverLauncherSelectorValue) + ++ parsedCustomLabels).asJava val (servicePorts, containerPorts) = configurePorts() val service = kubernetesClient.services().createNew() .withNewMetadata() .withName(kubernetesAppId) .endMetadata() .withNewSpec() - .withSelector(selectors) + .withSelector(resolvedSelectors) .withPorts(servicePorts.asJava) .endSpec() .done() @@ -137,7 +141,7 @@ private[spark] class Client( .asScala .find(status => status.getName == DRIVER_LAUNCHER_CONTAINER_NAME && status.getReady) match { - case Some(status) => + case Some(_) => try { val driverLauncher = getDriverLauncherService( k8ClientConfig, master) @@ -184,7 +188,7 @@ private[spark] class Client( kubernetesClient.pods().createNew() .withNewMetadata() .withName(kubernetesAppId) - .withLabels(selectors) + .withLabels(resolvedSelectors) .endMetadata() .withNewSpec() .withRestartPolicy("OnFailure") @@ -291,7 +295,7 @@ private[spark] class Client( Utils.tryWithResource(kubernetesClient .pods() - .withLabels(selectors) + .withLabels(resolvedSelectors) .watch(podWatcher)) { createDriverPod } } finally { kubernetesClient.secrets().delete(secret) @@ -336,7 +340,7 @@ private[spark] class Client( .getOption("spark.ui.port") .map(_.toInt) .getOrElse(DEFAULT_UI_PORT)) - (servicePorts.toSeq, containerPorts.toSeq) + (servicePorts, containerPorts) } private def buildSubmissionRequest(): KubernetesCreateSubmissionRequest = { @@ -366,7 +370,7 @@ private[spark] class Client( uploadedJarsBase64Contents = uploadJarsBase64Contents) } - def compressJars(maybeFilePaths: Option[String]): Option[TarGzippedData] = { + private def compressJars(maybeFilePaths: Option[String]): Option[TarGzippedData] = { maybeFilePaths .map(_.split(",")) .map(CompressionUtils.createTarGzip(_)) @@ -391,6 +395,23 @@ private[spark] class Client( sslSocketFactory = sslContext.getSocketFactory, trustContext = trustManager) } + + private def parseCustomLabels(labels: String): Map[String, String] = { + labels.split(",").map(_.trim).filterNot(_.isEmpty).map(label => { + label.split("=", 2).toSeq match { + case Seq(k, v) => + require(k != DRIVER_LAUNCHER_SELECTOR_LABEL, "Label with key" + + s" $DRIVER_LAUNCHER_SELECTOR_LABEL cannot be used in" + + " spark.kubernetes.driver.labels, as it is reserved for Spark's" + + " internal configuration.") + (k, v) + case _ => + throw new SparkException("Custom labels set by spark.kubernetes.driver.labels" + + " must be a comma-separated list of key-value pairs, with format =." + + s" Got label: $label. All labels: $labels") + } + }).toMap + } } private object Client { diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 6247a1674f8d6..7b3c2b93b865b 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -161,4 +161,38 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "spark-pi", NAMESPACE, "spark-ui-port") expectationsForStaticAllocation(sparkMetricsService) } + + test("Run with custom labels") { + val args = Array( + "--master", s"k8s://https://${Minikube.getMinikubeIp}:8443", + "--deploy-mode", "cluster", + "--kubernetes-namespace", NAMESPACE, + "--name", "spark-pi", + "--executor-memory", "512m", + "--executor-cores", "1", + "--num-executors", "1", + "--upload-jars", HELPER_JAR, + "--class", MAIN_CLASS, + "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", + "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", + "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", + "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", + "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", + "--conf", "spark.kubernetes.driver.labels=label1=label1value,label2=label2value", + EXAMPLES_JAR) + SparkSubmit.main(args) + val driverPodLabels = minikubeKubernetesClient + .pods + .withName("spark-pi") + .get + .getMetadata + .getLabels + // We can't match all of the selectors directly since one of the selectors is based on the + // launch time. + assert(driverPodLabels.size == 3, "Unexpected number of pod labels.") + assert(driverPodLabels.containsKey("driver-launcher-selector"), "Expected driver launcher" + + " selector label to be present.") + assert(driverPodLabels.get("label1") == "label1value", "Unexpected value for label1") + assert(driverPodLabels.get("label2") == "label2value", "Unexpected value for label2") + } } From b6c57c707a93b5d201ee381fac60c751bf068f9e Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Wed, 18 Jan 2017 17:34:02 -0800 Subject: [PATCH 023/156] Make pod name unique using the submission timestamp (#32) --- .../scala/org/apache/spark/deploy/kubernetes/Client.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 073afcbba7b52..30eaa6269cf47 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -56,10 +56,10 @@ private[spark] class Client( private val master = rawMaster.replaceFirst("k8s://", "") private val launchTime = System.currentTimeMillis - private val kubernetesAppId = sparkConf.getOption("spark.app.name") + private val appName = sparkConf.getOption("spark.app.name") .orElse(sparkConf.getOption("spark.app.id")) - .getOrElse(s"spark-$launchTime") - + .getOrElse("spark") + private val kubernetesAppId = s"$appName-$launchTime" private val secretName = s"spark-submission-server-secret-$kubernetesAppId" private val driverLauncherSelectorValue = s"driver-launcher-$launchTime" private val driverDockerImage = sparkConf.get( From 3fd9c62f4b9c23fe72901970d62687cc47a36c81 Mon Sep 17 00:00:00 2001 From: mccheah Date: Mon, 23 Jan 2017 18:02:45 -0800 Subject: [PATCH 024/156] A number of small tweaks to the MVP. (#23) * A number of small tweaks to the MVP. - Master protocol defaults to https if not specified - Removed upload driver extra classpath functionality - Added ability to specify main app resource with container:// URI - Updated docs to reflect all of the above - Add examples to Docker images, mostly for integration testing but could be useful for easily getting started without shipping anything * Add example to documentation. --- docs/running-on-kubernetes.md | 49 +++-- .../spark/deploy/kubernetes/Client.scala | 40 ++-- .../rest/KubernetesRestProtocolMessages.scala | 4 +- .../KubernetesSparkRestServer.scala | 183 ++++++++++-------- .../KubernetesClusterSchedulerBackend.scala | 9 +- .../kubernetes/docker-minimal-bundle/pom.xml | 7 + .../src/main/assembly/driver-assembly.xml | 20 +- .../src/main/assembly/executor-assembly.xml | 11 ++ .../src/main/docker/driver/Dockerfile | 1 + .../src/main/docker/executor/Dockerfile | 1 + .../integrationtest/KubernetesSuite.scala | 104 +++++++++- 11 files changed, 287 insertions(+), 142 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 14e2df4ed0702..5a73b1ad1ea29 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -42,11 +42,12 @@ are set up as described above: --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ examples/jars/spark_examples_2.11-2.2.0.jar - The Spark master, specified either via passing the `--master` command line argument to `spark-submit` or by setting `spark.master` in the application's configuration, must be a URL with the format `k8s://`. Prefixing the master string with `k8s://` will cause the Spark application to launch on the Kubernetes cluster, with the API server -being contacted at `api_server_url`. The HTTP protocol must also be specified. +being contacted at `api_server_url`. If no HTTP protocol is specified in the URL, it defaults to `https`. For example, +setting the master to `k8s://example.com:443` is equivalent to setting it to `k8s://https://example.com:443`, but to +connect without SSL on a different port, the master would be set to `k8s://http://example.com:8443`. Note that applications can currently only be executed in cluster mode, where the driver and its executors are running on the cluster. @@ -58,17 +59,18 @@ disk of the submitter's machine. These two types of dependencies are specified v `spark-submit`: * Local jars provided by specifying the `--jars` command line argument to `spark-submit`, or by setting `spark.jars` in - the application's configuration, will be treated as jars that are located on the *disk of the driver Docker - container*. This only applies to jar paths that do not specify a scheme or that have the scheme `file://`. Paths with - other schemes are fetched from their appropriate locations. + the application's configuration, will be treated as jars that are located on the *disk of the driver container*. This + only applies to jar paths that do not specify a scheme or that have the scheme `file://`. Paths with other schemes are + fetched from their appropriate locations. * Local jars provided by specifying the `--upload-jars` command line argument to `spark-submit`, or by setting `spark.kubernetes.driver.uploads.jars` in the application's configuration, will be treated as jars that are located on the *disk of the submitting machine*. These jars are uploaded to the driver docker container before executing the application. - * A main application resource path that does not have a scheme or that has the scheme `file://` is assumed to be on the *disk of the submitting machine*. This resource is uploaded to the driver docker container before executing the application. A remote path can still be specified and the resource will be fetched from the appropriate location. +* A main application resource path that has the scheme `container://` is assumed to be on the *disk of the driver + container*. In all of these cases, the jars are placed on the driver's classpath, and are also sent to the executors. Below are some examples of providing application dependencies. @@ -78,8 +80,7 @@ To submit an application with both the main resource and two other jars living o bin/spark-submit \ --deploy-mode cluster \ --class com.example.applications.SampleApplication \ - --master k8s://https://192.168.99.100 \ - --kubernetes-namespace default \ + --master k8s://192.168.99.100 \ --upload-jars /home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar \ --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \ --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ @@ -91,8 +92,7 @@ Note that since passing the jars through the `--upload-jars` command line argume bin/spark-submit \ --deploy-mode cluster \ --class com.example.applications.SampleApplication \ - --master k8s://https://192.168.99.100 \ - --kubernetes-namespace default \ + --master k8s://192.168.99.100 \ --conf spark.kubernetes.driver.uploads.jars=/home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar \ --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \ --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ @@ -104,8 +104,7 @@ is located in the jar `/opt/spark-plugins/app-plugin.jar` on the docker image's bin/spark-submit \ --deploy-mode cluster \ --class com.example.applications.PluggableApplication \ - --master k8s://https://192.168.99.100 \ - --kubernetes-namespace default \ + --master k8s://192.168.99.100 \ --jars /opt/spark-plugins/app-plugin.jar \ --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest \ --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ @@ -117,13 +116,22 @@ Spark property, the above will behave identically to this command: bin/spark-submit \ --deploy-mode cluster \ --class com.example.applications.PluggableApplication \ - --master k8s://https://192.168.99.100 \ - --kubernetes-namespace default \ + --master k8s://192.168.99.100 \ --conf spark.jars=file:///opt/spark-plugins/app-plugin.jar \ --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest \ --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ http://example.com:8080/applications/sparkpluggable/app.jar +To specify a main application resource that is in the Docker image, and if it has no other dependencies: + + bin/spark-submit \ + --deploy-mode cluster \ + --class com.example.applications.PluggableApplication \ + --master k8s://192.168.99.100:8443 \ + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest \ + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ + container:///home/applications/examples/example.jar + ### Spark Properties Below are some other common properties that are specific to Kubernetes. Most of the other configurations are the same @@ -133,10 +141,9 @@ from the other deployment modes. See the [configuration page](configuration.html Property NameDefaultMeaning spark.kubernetes.namespace - - (none) + default - The namespace that will be used for running the driver and executor pods. Must be specified. When using + The namespace that will be used for running the driver and executor pods. When using spark-submit in cluster mode, this can also be passed to spark-submit via the --kubernetes-namespace command line argument. @@ -196,14 +203,6 @@ from the other deployment modes. See the [configuration page](configuration.html mode. Refer to adding other jars for more information. - - - spark.kubernetes.driver.uploads.driverExtraClasspath - (none) - - Comma-separated list of jars to be sent to the driver only when submitting the application in cluster mode. - - spark.kubernetes.executor.memoryOverhead executorMemory * 0.10, with minimum of 384 diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 30eaa6269cf47..fe3256b9e12be 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -35,7 +35,7 @@ import scala.concurrent.duration.DurationInt import scala.util.Success import org.apache.spark.{SPARK_VERSION, SparkConf, SparkException} -import org.apache.spark.deploy.rest.{AppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, TarGzippedData, UploadedAppResource} +import org.apache.spark.deploy.rest.{AppResource, ContainerAppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, TarGzippedData, UploadedAppResource} import org.apache.spark.deploy.rest.kubernetes._ import org.apache.spark.internal.Logging import org.apache.spark.util.Utils @@ -47,13 +47,8 @@ private[spark] class Client( appArgs: Array[String]) extends Logging { import Client._ - private val namespace = sparkConf.getOption("spark.kubernetes.namespace").getOrElse( - throw new IllegalArgumentException("Namespace must be provided in spark.kubernetes.namespace")) - private val rawMaster = sparkConf.get("spark.master") - if (!rawMaster.startsWith("k8s://")) { - throw new IllegalArgumentException("Master should be a URL with scheme k8s://") - } - private val master = rawMaster.replaceFirst("k8s://", "") + private val namespace = sparkConf.get("spark.kubernetes.namespace", "default") + private val master = resolveK8sMaster(sparkConf.get("spark.master")) private val launchTime = System.currentTimeMillis private val appName = sparkConf.getOption("spark.app.name") @@ -64,8 +59,6 @@ private[spark] class Client( private val driverLauncherSelectorValue = s"driver-launcher-$launchTime" private val driverDockerImage = sparkConf.get( "spark.kubernetes.driver.docker.image", s"spark-driver:$SPARK_VERSION") - private val uploadedDriverExtraClasspath = sparkConf - .getOption("spark.kubernetes.driver.uploads.driverExtraClasspath") private val uploadedJars = sparkConf.getOption("spark.kubernetes.driver.uploads.jars") private val secretBase64String = { @@ -112,12 +105,15 @@ private[spark] class Client( .withType("Opaque") .done() try { - val resolvedSelectors = (Map(DRIVER_LAUNCHER_SELECTOR_LABEL -> driverLauncherSelectorValue) + val resolvedSelectors = (Map( + DRIVER_LAUNCHER_SELECTOR_LABEL -> driverLauncherSelectorValue, + SPARK_APP_NAME_LABEL -> appName) ++ parsedCustomLabels).asJava val (servicePorts, containerPorts) = configurePorts() val service = kubernetesClient.services().createNew() .withNewMetadata() .withName(kubernetesAppId) + .withLabels(Map(SPARK_APP_NAME_LABEL -> appName).asJava) .endMetadata() .withNewSpec() .withSelector(resolvedSelectors) @@ -355,10 +351,10 @@ private[spark] class Client( val fileBytes = Files.toByteArray(appFile) val fileBase64 = Base64.encodeBase64String(fileBytes) UploadedAppResource(resourceBase64Contents = fileBase64, name = appFile.getName) + case "container" => ContainerAppResource(appResourceUri.getPath) case other => RemoteAppResource(other) } - val uploadDriverExtraClasspathBase64Contents = compressJars(uploadedDriverExtraClasspath) val uploadJarsBase64Contents = compressJars(uploadedJars) KubernetesCreateSubmissionRequest( appResource = resolvedAppResource, @@ -366,7 +362,6 @@ private[spark] class Client( appArgs = appArgs, secret = secretBase64String, sparkProperties = sparkConf.getAll.toMap, - uploadedDriverExtraClasspathBase64Contents = uploadDriverExtraClasspathBase64Contents, uploadedJarsBase64Contents = uploadJarsBase64Contents) } @@ -414,7 +409,7 @@ private[spark] class Client( } } -private object Client { +private[spark] object Client extends Logging { private val SUBMISSION_SERVER_SECRET_NAME = "spark-submission-server-secret" private val DRIVER_LAUNCHER_SELECTOR_LABEL = "driver-launcher-selector" @@ -430,6 +425,7 @@ private object Client { private val SECURE_RANDOM = new SecureRandom() private val SPARK_SUBMISSION_SECRET_BASE_DIR = "/var/run/secrets/spark-submission" private val LAUNCH_TIMEOUT_SECONDS = 30 + private val SPARK_APP_NAME_LABEL = "spark-app-name" def main(args: Array[String]): Unit = { require(args.length >= 2, s"Too few arguments. Usage: ${getClass.getName} " + @@ -444,4 +440,20 @@ private object Client { sparkConf = sparkConf, appArgs = appArgs).run() } + + def resolveK8sMaster(rawMasterString: String): String = { + if (!rawMasterString.startsWith("k8s://")) { + throw new IllegalArgumentException("Master URL should start with k8s:// in Kubernetes mode.") + } + val masterWithoutK8sPrefix = rawMasterString.replaceFirst("k8s://", "") + if (masterWithoutK8sPrefix.startsWith("http://") + || masterWithoutK8sPrefix.startsWith("https://")) { + masterWithoutK8sPrefix + } else { + val resolvedURL = s"https://$masterWithoutK8sPrefix" + logDebug(s"No scheme specified for kubernetes master URL, so defaulting to https. Resolved" + + s" URL is $resolvedURL") + resolvedURL + } + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala index 6da1a848b25e7..813d070e0f876 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala @@ -27,7 +27,6 @@ case class KubernetesCreateSubmissionRequest( val appArgs: Array[String], val sparkProperties: Map[String, String], val secret: String, - val uploadedDriverExtraClasspathBase64Contents: Option[TarGzippedData], val uploadedJarsBase64Contents: Option[TarGzippedData]) extends SubmitRestProtocolRequest { message = "create" clientSparkVersion = SPARK_VERSION @@ -46,6 +45,7 @@ case class TarGzippedData( property = "type") @JsonSubTypes(value = Array( new JsonSubTypes.Type(value = classOf[UploadedAppResource], name = "UploadedAppResource"), + new JsonSubTypes.Type(value = classOf[ContainerAppResource], name = "ContainerLocalAppResource"), new JsonSubTypes.Type(value = classOf[RemoteAppResource], name = "RemoteAppResource"))) abstract class AppResource @@ -53,6 +53,8 @@ case class UploadedAppResource( resourceBase64Contents: String, name: String = "spark-app-resource") extends AppResource +case class ContainerAppResource(resourcePath: String) extends AppResource + case class RemoteAppResource(resource: String) extends AppResource class PingResponse extends SubmitRestProtocolResponse { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala index 837706ca9f5a8..08ddbaf5e50dc 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala @@ -18,7 +18,6 @@ package org.apache.spark.deploy.rest.kubernetes import java.io.File import java.net.URI -import java.nio.file.Paths import java.util.concurrent.CountDownLatch import javax.servlet.http.{HttpServletRequest, HttpServletResponse} @@ -30,12 +29,12 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.{SecurityManager, SPARK_VERSION, SparkConf} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.rest._ -import org.apache.spark.util.{ShutdownHookManager, Utils} +import org.apache.spark.util.{ShutdownHookManager, ThreadUtils, Utils} private case class KubernetesSparkRestServerArguments( - val host: Option[String] = None, - val port: Option[Int] = None, - val secretFile: Option[String] = None) { + val host: Option[String] = None, + val port: Option[Int] = None, + val secretFile: Option[String] = None) { def validate(): KubernetesSparkRestServerArguments = { require(host.isDefined, "Hostname not set via --hostname.") require(port.isDefined, "Port not set via --port") @@ -68,13 +67,21 @@ private object KubernetesSparkRestServerArguments { } } +/** + * Runs in the driver pod and receives a request to run an application. Note that + * unlike the submission rest server in standalone mode, this server is expected + * to be used to run one application only, and then shut down once that application + * is complete. + */ private[spark] class KubernetesSparkRestServer( host: String, port: Int, conf: SparkConf, - expectedApplicationSecret: Array[Byte]) + expectedApplicationSecret: Array[Byte], + shutdownLock: CountDownLatch) extends RestSubmissionServer(host, port, conf) { + private val SERVLET_LOCK = new Object private val javaExecutable = s"${System.getenv("JAVA_HOME")}/bin/java" private val sparkHome = System.getenv("SPARK_HOME") private val securityManager = new SecurityManager(conf) @@ -99,87 +106,105 @@ private[spark] class KubernetesSparkRestServer( private class KubernetesSubmitRequestServlet extends SubmitRequestServlet { + private val waitForProcessCompleteExecutor = ThreadUtils + .newDaemonSingleThreadExecutor("wait-for-spark-app-complete") + private var startedApplication = false + // TODO validating the secret should be done as part of a header of the request. // Instead here we have to specify the secret in the body. override protected def handleSubmit( - requestMessageJson: String, - requestMessage: SubmitRestProtocolMessage, - responseServlet: HttpServletResponse): SubmitRestProtocolResponse = { - requestMessage match { - case KubernetesCreateSubmissionRequest( + requestMessageJson: String, + requestMessage: SubmitRestProtocolMessage, + responseServlet: HttpServletResponse): SubmitRestProtocolResponse = { + SERVLET_LOCK.synchronized { + if (startedApplication) { + throw new IllegalStateException("Application has already been submitted.") + } else { + requestMessage match { + case KubernetesCreateSubmissionRequest( appResource, mainClass, appArgs, sparkProperties, secret, - uploadedDriverExtraClasspath, uploadedJars) => - val decodedSecret = Base64.decodeBase64(secret) - if (!expectedApplicationSecret.sameElements(decodedSecret)) { - responseServlet.setStatus(HttpServletResponse.SC_UNAUTHORIZED) - handleError("Unauthorized to submit application.") - } else { - val tempDir = Utils.createTempDir() - val appResourcePath = resolvedAppResource(appResource, tempDir) - val driverClasspathDirectory = new File(tempDir, "driver-extra-classpath") - if (!driverClasspathDirectory.mkdir) { - throw new IllegalStateException("Failed to create driver extra classpath" + - s" dir at ${driverClasspathDirectory.getAbsolutePath}") - } - val jarsDirectory = new File(tempDir, "jars") - if (!jarsDirectory.mkdir) { - throw new IllegalStateException("Failed to create jars dir at" + - s"${jarsDirectory.getAbsolutePath}") - } - val writtenDriverExtraClasspath = writeBase64ContentsToFiles( - uploadedDriverExtraClasspath, driverClasspathDirectory) - val writtenJars = writeBase64ContentsToFiles(uploadedJars, jarsDirectory) - val originalDriverExtraClasspath = sparkProperties.get("spark.driver.extraClassPath") - .map(_.split(",")) - .getOrElse(Array.empty[String]) - val resolvedDriverExtraClasspath = writtenDriverExtraClasspath ++ - originalDriverExtraClasspath - val originalJars = sparkProperties.get("spark.jars") - .map(_.split(",")) - .getOrElse(Array.empty[String]) - val resolvedJars = writtenJars ++ originalJars ++ Array(appResourcePath) - val sparkJars = new File(sparkHome, "jars").listFiles().map(_.getAbsolutePath) - val driverClasspath = resolvedDriverExtraClasspath ++ - resolvedJars ++ - sparkJars ++ - Array(appResourcePath) - val resolvedSparkProperties = new mutable.HashMap[String, String] - resolvedSparkProperties ++= sparkProperties - resolvedSparkProperties("spark.jars") = resolvedJars.mkString(",") - - val command = new ArrayBuffer[String] - command += javaExecutable - command += "-cp" - command += s"${driverClasspath.mkString(":")}" - for (prop <- resolvedSparkProperties) { - command += s"-D${prop._1}=${prop._2}" - } - val driverMemory = resolvedSparkProperties.getOrElse("spark.driver.memory", "1g") - command += s"-Xms$driverMemory" - command += s"-Xmx$driverMemory" - command += mainClass - command ++= appArgs - val pb = new ProcessBuilder(command: _*).inheritIO() - val process = pb.start() - ShutdownHookManager.addShutdownHook(() => { - logInfo("Received stop command, shutting down the running Spark application...") - process.destroy() - }) - val response = new CreateSubmissionResponse - response.success = true - response.submissionId = null - response.message = "success" - response.serverSparkVersion = SPARK_VERSION - response + val decodedSecret = Base64.decodeBase64(secret) + if (!expectedApplicationSecret.sameElements(decodedSecret)) { + responseServlet.setStatus(HttpServletResponse.SC_UNAUTHORIZED) + handleError("Unauthorized to submit application.") + } else { + val tempDir = Utils.createTempDir() + val appResourcePath = resolvedAppResource(appResource, tempDir) + val driverClasspathDirectory = new File(tempDir, "driver-extra-classpath") + if (!driverClasspathDirectory.mkdir) { + throw new IllegalStateException("Failed to create driver extra classpath" + + s" dir at ${driverClasspathDirectory.getAbsolutePath}") + } + val jarsDirectory = new File(tempDir, "jars") + if (!jarsDirectory.mkdir) { + throw new IllegalStateException("Failed to create jars dir at" + + s"${jarsDirectory.getAbsolutePath}") + } + val writtenJars = writeBase64ContentsToFiles(uploadedJars, jarsDirectory) + val driverExtraClasspath = sparkProperties + .get("spark.driver.extraClassPath") + .map(_.split(",")) + .getOrElse(Array.empty[String]) + val originalJars = sparkProperties.get("spark.jars") + .map(_.split(",")) + .getOrElse(Array.empty[String]) + val resolvedJars = writtenJars ++ originalJars ++ Array(appResourcePath) + val sparkJars = new File(sparkHome, "jars").listFiles().map(_.getAbsolutePath) + val driverClasspath = driverExtraClasspath ++ + resolvedJars ++ + sparkJars ++ + Array(appResourcePath) + val resolvedSparkProperties = new mutable.HashMap[String, String] + resolvedSparkProperties ++= sparkProperties + resolvedSparkProperties("spark.jars") = resolvedJars.mkString(",") + + val command = new ArrayBuffer[String] + command += javaExecutable + command += "-cp" + command += s"${driverClasspath.mkString(":")}" + for (prop <- resolvedSparkProperties) { + command += s"-D${prop._1}=${prop._2}" + } + val driverMemory = resolvedSparkProperties.getOrElse("spark.driver.memory", "1g") + command += s"-Xms$driverMemory" + command += s"-Xmx$driverMemory" + command += mainClass + command ++= appArgs + val pb = new ProcessBuilder(command: _*).inheritIO() + val process = pb.start() + ShutdownHookManager.addShutdownHook(() => { + logInfo("Received stop command, shutting down the running Spark application...") + process.destroy() + shutdownLock.countDown() + }) + waitForProcessCompleteExecutor.submit(new Runnable { + override def run(): Unit = { + process.waitFor + SERVLET_LOCK.synchronized { + logInfo("Spark application complete. Shutting down submission server...") + KubernetesSparkRestServer.this.stop + shutdownLock.countDown() + } + } + }) + startedApplication = true + val response = new CreateSubmissionResponse + response.success = true + response.submissionId = null + response.message = "success" + response.serverSparkVersion = SPARK_VERSION + response + } + case unexpected => + responseServlet.setStatus(HttpServletResponse.SC_BAD_REQUEST) + handleError(s"Received message of unexpected type ${unexpected.messageType}.") } - case unexpected => - responseServlet.setStatus(HttpServletResponse.SC_BAD_REQUEST) - handleError(s"Received message of unexpected type ${unexpected.messageType}.") + } } } @@ -196,6 +221,7 @@ private[spark] class KubernetesSparkRestServer( throw new IllegalStateException(s"Failed to write main app resource file" + s" to $resourceFilePath") } + case ContainerAppResource(resource) => resource case RemoteAppResource(resource) => Utils.fetchFile(resource, tempDir, conf, securityManager, SparkHadoopUtil.get.newConfiguration(conf), @@ -237,7 +263,8 @@ private[spark] object KubernetesSparkRestServer { parsedArguments.host.get, parsedArguments.port.get, sparkConf, - secretBytes) + secretBytes, + barrier) server.start() ShutdownHookManager.addShutdownHook(() => { try { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 2717d2f37d910..b7110ba901842 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -27,7 +27,7 @@ import scala.collection.mutable.ArrayBuffer import scala.concurrent.{ExecutionContext, Future} import org.apache.spark.{SparkContext, SparkException} -import org.apache.spark.deploy.kubernetes.KubernetesClientBuilder +import org.apache.spark.deploy.kubernetes.{Client, KubernetesClientBuilder} import org.apache.spark.rpc.RpcEndpointAddress import org.apache.spark.scheduler.TaskSchedulerImpl import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend @@ -43,15 +43,12 @@ private[spark] class KubernetesClusterSchedulerBackend( private val EXECUTOR_MODIFICATION_LOCK = new Object private val runningExecutorPods = new scala.collection.mutable.HashMap[String, Pod] - private val kubernetesMaster = sc.master.replaceFirst("k8s://", "") + private val kubernetesMaster = Client.resolveK8sMaster(sc.master) private val executorDockerImage = conf .get("spark.kubernetes.executor.docker.image", s"spark-executor:${sc.version}") - private val kubernetesNamespace = conf - .getOption("spark.kubernetes.namespace") - .getOrElse( - throw new SparkException("Kubernetes namespace must be specified in kubernetes mode.")) + private val kubernetesNamespace = conf.get("spark.kubernetes.namespace", "default") private val executorPort = conf.getInt("spark.executor.port", DEFAULT_STATIC_PORT) diff --git a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml index c20e51c93e7c7..0ec2f36075db3 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml @@ -43,6 +43,13 @@ ${project.version} pom
    + + + org.apache.spark + spark-examples_${scala.binary.version} + ${project.version} + provided + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 77b7c793dc37e..07a45c7577bcd 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -16,25 +16,25 @@ */ package org.apache.spark.deploy.kubernetes -import java.io.File -import java.security.SecureRandom +import java.io.{File, FileInputStream} +import java.security.{KeyStore, SecureRandom} import java.util.concurrent.{Executors, TimeoutException, TimeUnit} -import javax.net.ssl.X509TrustManager +import java.util.concurrent.atomic.AtomicBoolean +import javax.net.ssl.{SSLContext, TrustManagerFactory, X509TrustManager} +import com.google.common.base.Charsets import com.google.common.io.Files import com.google.common.util.concurrent.{SettableFuture, ThreadFactoryBuilder} import io.fabric8.kubernetes.api.model._ -import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient, KubernetesClientException, Watch, Watcher} +import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient, KubernetesClient, KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action -import io.fabric8.kubernetes.client.internal.SSLUtils import org.apache.commons.codec.binary.Base64 import scala.collection.JavaConverters._ -import scala.collection.mutable.ArrayBuffer -import scala.concurrent.ExecutionContext +import scala.collection.mutable +import scala.concurrent.{ExecutionContext, Future} import scala.concurrent.duration.DurationInt -import scala.util.Success -import org.apache.spark.{SPARK_VERSION, SparkConf, SparkException} +import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.rest.{AppResource, ContainerAppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, TarGzippedData, UploadedAppResource} import org.apache.spark.deploy.rest.kubernetes._ import org.apache.spark.internal.Logging @@ -56,10 +56,14 @@ private[spark] class Client( .getOrElse("spark") private val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") private val secretName = s"spark-submission-server-secret-$kubernetesAppId" + private val secretDirectory = s"$SPARK_SUBMISSION_SECRET_BASE_DIR/$kubernetesAppId" + private val sslSecretsDirectory = s"$SPARK_SUBMISSION_SECRET_BASE_DIR/$kubernetesAppId-ssl" + private val sslSecretsName = s"spark-submission-server-ssl-$kubernetesAppId" private val driverLauncherSelectorValue = s"driver-launcher-$launchTime" private val driverDockerImage = sparkConf.get( - "spark.kubernetes.driver.docker.image", s"spark-driver:$SPARK_VERSION") + "spark.kubernetes.driver.docker.image", s"spark-driver:$sparkVersion") private val uploadedJars = sparkConf.getOption("spark.kubernetes.driver.uploads.jars") + private val uiPort = sparkConf.getInt("spark.ui.port", DEFAULT_UI_PORT) private val driverLaunchTimeoutSecs = sparkConf.getTimeAsSeconds( "spark.kubernetes.driverLaunchTimeout", s"${DEFAULT_LAUNCH_TIMEOUT_SECONDS}s") @@ -82,6 +86,7 @@ private[spark] class Client( .build())) def run(): Unit = { + val (driverLaunchSslOptions, isKeyStoreLocalFile) = parseDriverLaunchSslOptions() val parsedCustomLabels = parseCustomLabels(customLabels) var k8ConfBuilder = new ConfigBuilder() .withApiVersion("v1") @@ -98,123 +103,50 @@ private[spark] class Client( } val k8ClientConfig = k8ConfBuilder.build - Utils.tryWithResource(new DefaultKubernetesClient(k8ClientConfig))(kubernetesClient => { - val secret = kubernetesClient.secrets().createNew() + Utils.tryWithResource(new DefaultKubernetesClient(k8ClientConfig)) { kubernetesClient => + val submitServerSecret = kubernetesClient.secrets().createNew() .withNewMetadata() - .withName(secretName) - .endMetadata() + .withName(secretName) + .endMetadata() .withData(Map((SUBMISSION_SERVER_SECRET_NAME, secretBase64String)).asJava) .withType("Opaque") .done() + val (sslEnvs, sslVolumes, sslVolumeMounts, sslSecrets) = configureSsl(kubernetesClient, + driverLaunchSslOptions, + isKeyStoreLocalFile) try { - val resolvedSelectors = (Map( + val driverKubernetesSelectors = (Map( DRIVER_LAUNCHER_SELECTOR_LABEL -> driverLauncherSelectorValue, SPARK_APP_NAME_LABEL -> appName) ++ parsedCustomLabels).asJava - val (servicePorts, containerPorts) = configurePorts() - val service = kubernetesClient.services().createNew() - .withNewMetadata() - .withName(kubernetesAppId) - .withLabels(Map(SPARK_APP_NAME_LABEL -> appName).asJava) - .endMetadata() - .withNewSpec() - .withSelector(resolvedSelectors) - .withPorts(servicePorts.asJava) - .endSpec() - .done() - sparkConf.set("spark.kubernetes.driver.service.name", service.getMetadata.getName) - sparkConf.set("spark.kubernetes.driver.pod.name", kubernetesAppId) - - sparkConf.setIfMissing("spark.driver.port", DEFAULT_DRIVER_PORT.toString) - sparkConf.setIfMissing("spark.blockmanager.port", DEFAULT_BLOCKMANAGER_PORT.toString) - val submitRequest = buildSubmissionRequest() + val containerPorts = buildContainerPorts() val submitCompletedFuture = SettableFuture.create[Boolean] - val secretDirectory = s"$SPARK_SUBMISSION_SECRET_BASE_DIR/$kubernetesAppId" - - val podWatcher = new Watcher[Pod] { - override def eventReceived(action: Action, t: Pod): Unit = { - if (action == Action.ADDED) { - val ownerRefs = new ArrayBuffer[OwnerReference] - ownerRefs += new OwnerReferenceBuilder() - .withApiVersion(t.getApiVersion) - .withController(true) - .withKind(t.getKind) - .withName(t.getMetadata.getName) - .withUid(t.getMetadata.getUid) - .build() - - secret.getMetadata().setOwnerReferences(ownerRefs.asJava) - kubernetesClient.secrets().createOrReplace(secret) - - service.getMetadata().setOwnerReferences(ownerRefs.asJava) - kubernetesClient.services().createOrReplace(service) - } - - if ((action == Action.ADDED || action == Action.MODIFIED) - && t.getStatus.getPhase == "Running" - && !submitCompletedFuture.isDone) { - t.getStatus - .getContainerStatuses - .asScala - .find(status => - status.getName == DRIVER_LAUNCHER_CONTAINER_NAME && status.getReady) match { - case Some(_) => - try { - val driverLauncher = getDriverLauncherService( - k8ClientConfig, master) - val ping = Retry.retry(5, 5.seconds) { - driverLauncher.ping() - } - ping onFailure { - case t: Throwable => - if (!submitCompletedFuture.isDone) { - submitCompletedFuture.setException(t) - } - } - val submitComplete = ping andThen { - case Success(_) => - driverLauncher.create(submitRequest) - submitCompletedFuture.set(true) - } - submitComplete onFailure { - case t: Throwable => - if (!submitCompletedFuture.isDone) { - submitCompletedFuture.setException(t) - } - } - } catch { - case e: Throwable => - if (!submitCompletedFuture.isDone) { - submitCompletedFuture.setException(e) - throw e - } - } - case None => - } - } - } - - override def onClose(e: KubernetesClientException): Unit = { - if (!submitCompletedFuture.isDone) { - submitCompletedFuture.setException(e) - } - } - } - - def createDriverPod(unused: Watch): Unit = { + val submitPending = new AtomicBoolean(false) + val podWatcher = new DriverPodWatcher( + submitCompletedFuture, + submitPending, + kubernetesClient, + driverLaunchSslOptions, + Array(submitServerSecret) ++ sslSecrets, + driverKubernetesSelectors) + Utils.tryWithResource(kubernetesClient + .pods() + .withLabels(driverKubernetesSelectors) + .watch(podWatcher)) { _ => kubernetesClient.pods().createNew() .withNewMetadata() .withName(kubernetesAppId) - .withLabels(resolvedSelectors) + .withLabels(driverKubernetesSelectors) .endMetadata() .withNewSpec() .withRestartPolicy("OnFailure") .addNewVolume() .withName(s"spark-submission-secret-volume") - .withNewSecret() - .withSecretName(secret.getMetadata.getName) + .withNewSecret() + .withSecretName(submitServerSecret.getMetadata.getName) .endSecret() .endVolume + .addToVolumes(sslVolumes: _*) .withServiceAccount(serviceAccount) .addNewContainer() .withName(DRIVER_LAUNCHER_CONTAINER_NAME) @@ -225,6 +157,7 @@ private[spark] class Client( .withMountPath(secretDirectory) .withReadOnly(true) .endVolumeMount() + .addToVolumeMounts(sslVolumeMounts: _*) .addNewEnv() .withName("SPARK_SUBMISSION_SECRET_LOCATION") .withValue(s"$secretDirectory/$SUBMISSION_SERVER_SECRET_NAME") @@ -233,6 +166,7 @@ private[spark] class Client( .withName("SPARK_DRIVER_LAUNCHER_SERVER_PORT") .withValue(DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT.toString) .endEnv() + .addToEnv(sslEnvs: _*) .withPorts(containerPorts.asJava) .endContainer() .endSpec() @@ -243,121 +177,321 @@ private[spark] class Client( submitSucceeded = true } catch { case e: TimeoutException => - val driverPod = try { - kubernetesClient.pods().withName(kubernetesAppId).get() - } catch { - case throwable: Throwable => - logError(s"Timed out while waiting $driverLaunchTimeoutSecs seconds for the" + - " driver pod to start, but an error occurred while fetching the driver" + - " pod's details.", throwable) - throw new SparkException(s"Timed out while waiting $driverLaunchTimeoutSecs" + - " seconds for the driver pod to start. Unfortunately, in attempting to fetch" + - " the latest state of the pod, another error was thrown. Check the logs for" + - " the error that was thrown in looking up the driver pod.", e) - } - val topLevelMessage = s"The driver pod with name ${driverPod.getMetadata.getName}" + - s" in namespace ${driverPod.getMetadata.getNamespace} was not ready in" + - s" $driverLaunchTimeoutSecs seconds." - val podStatusPhase = if (driverPod.getStatus.getPhase != null) { - s"Latest phase from the pod is: ${driverPod.getStatus.getPhase}" - } else { - "The pod had no final phase." - } - val podStatusMessage = if (driverPod.getStatus.getMessage != null) { - s"Latest message from the pod is: ${driverPod.getStatus.getMessage}" - } else { - "The pod had no final message." - } - val failedDriverContainerStatusString = driverPod.getStatus - .getContainerStatuses - .asScala - .find(_.getName == DRIVER_LAUNCHER_CONTAINER_NAME) - .map(status => { - val lastState = status.getState - if (lastState.getRunning != null) { - "Driver container last state: Running\n" + - s"Driver container started at: ${lastState.getRunning.getStartedAt}" - } else if (lastState.getWaiting != null) { - "Driver container last state: Waiting\n" + - s"Driver container wait reason: ${lastState.getWaiting.getReason}\n" + - s"Driver container message: ${lastState.getWaiting.getMessage}\n" - } else if (lastState.getTerminated != null) { - "Driver container last state: Terminated\n" + - s"Driver container started at: ${lastState.getTerminated.getStartedAt}\n" + - s"Driver container finished at: ${lastState.getTerminated.getFinishedAt}\n" + - s"Driver container exit reason: ${lastState.getTerminated.getReason}\n" + - s"Driver container exit code: ${lastState.getTerminated.getExitCode}\n" + - s"Driver container message: ${lastState.getTerminated.getMessage}" - } else { - "Driver container last state: Unknown" - } - }).getOrElse("The driver container wasn't found in the pod; expected to find" + - s" container with name $DRIVER_LAUNCHER_CONTAINER_NAME") - val finalErrorMessage = s"$topLevelMessage\n" + - s"$podStatusPhase\n" + - s"$podStatusMessage\n\n$failedDriverContainerStatusString" + val finalErrorMessage: String = buildSubmitFailedErrorMessage(kubernetesClient, e) logError(finalErrorMessage, e) throw new SparkException(finalErrorMessage, e) - } finally { - if (!submitSucceeded) { - try { - kubernetesClient.pods.withName(kubernetesAppId).delete - } catch { - case throwable: Throwable => - logError("Failed to delete driver pod after it failed to run.", throwable) - } + } finally { + if (!submitSucceeded) { + Utils.tryLogNonFatalError { + kubernetesClient.pods.withName(kubernetesAppId).delete() } } } - - Utils.tryWithResource(kubernetesClient - .pods() - .withLabels(resolvedSelectors) - .watch(podWatcher)) { createDriverPod } + } } finally { - kubernetesClient.secrets().delete(secret) + Utils.tryLogNonFatalError { + kubernetesClient.secrets().delete(submitServerSecret) + } + Utils.tryLogNonFatalError { + kubernetesClient.secrets().delete(sslSecrets: _*) + } } - }) + } } - private def configurePorts(): (Seq[ServicePort], Seq[ContainerPort]) = { - val servicePorts = new ArrayBuffer[ServicePort] - val containerPorts = new ArrayBuffer[ContainerPort] + private def parseDriverLaunchSslOptions(): (SSLOptions, Boolean) = { + val maybeKeyStore = sparkConf.getOption("spark.ssl.kubernetes.driverlaunch.keyStore") + val resolvedSparkConf = sparkConf.clone() + val (isLocalKeyStore, resolvedKeyStore) = maybeKeyStore.map(keyStore => { + val keyStoreURI = Utils.resolveURI(keyStore) + val isProvidedKeyStoreLocal = keyStoreURI.getScheme match { + case "file" | null => true + case "container" => false + case _ => throw new SparkException(s"Invalid KeyStore URI $keyStore; keyStore URI" + + " for submit server must have scheme file:// or container:// (no scheme defaults" + + " to file://)") + } + (isProvidedKeyStoreLocal, Option.apply(keyStoreURI.getPath)) + }).getOrElse((true, Option.empty[String])) + resolvedKeyStore.foreach { + resolvedSparkConf.set("spark.ssl.kubernetes.driverlaunch.keyStore", _) + } + sparkConf.getOption("spark.ssl.kubernetes.driverlaunch.trustStore").foreach { trustStore => + val trustStoreURI = Utils.resolveURI(trustStore) + trustStoreURI.getScheme match { + case "file" | null => + resolvedSparkConf.set("spark.ssl.kubernetes.driverlaunch.trustStore", + trustStoreURI.getPath) + case _ => throw new SparkException(s"Invalid trustStore URI $trustStore; trustStore URI" + + " for submit server must have no scheme, or scheme file://") + } + } + val securityManager = new SecurityManager(resolvedSparkConf) + (securityManager.getSSLOptions("kubernetes.driverlaunch"), isLocalKeyStore) + } - def addPortToServiceAndContainer(portName: String, portValue: Int): Unit = { - servicePorts += new ServicePortBuilder() - .withName(portName) - .withPort(portValue) - .withNewTargetPort(portValue) + private def configureSsl(kubernetesClient: KubernetesClient, driverLaunchSslOptions: SSLOptions, + isKeyStoreLocalFile: Boolean): + (Array[EnvVar], Array[Volume], Array[VolumeMount], Array[Secret]) = { + if (driverLaunchSslOptions.enabled) { + val sslSecretsMap = mutable.HashMap[String, String]() + val sslEnvs = mutable.Buffer[EnvVar]() + val secrets = mutable.Buffer[Secret]() + driverLaunchSslOptions.keyStore.foreach(store => { + val resolvedKeyStoreFile = if (isKeyStoreLocalFile) { + if (!store.isFile) { + throw new SparkException(s"KeyStore specified at $store is not a file or" + + s" does not exist.") + } + val keyStoreBytes = Files.toByteArray(store) + val keyStoreBase64 = Base64.encodeBase64String(keyStoreBytes) + sslSecretsMap += (SSL_KEYSTORE_SECRET_NAME -> keyStoreBase64) + s"$sslSecretsDirectory/$SSL_KEYSTORE_SECRET_NAME" + } else { + store.getAbsolutePath + } + sslEnvs += new EnvVarBuilder() + .withName("SPARK_SUBMISSION_KEYSTORE_FILE") + .withValue(resolvedKeyStoreFile) + .build() + }) + driverLaunchSslOptions.keyStorePassword.foreach(password => { + val passwordBase64 = Base64.encodeBase64String(password.getBytes(Charsets.UTF_8)) + sslSecretsMap += (SSL_KEYSTORE_PASSWORD_SECRET_NAME -> passwordBase64) + sslEnvs += new EnvVarBuilder() + .withName("SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE") + .withValue(s"$sslSecretsDirectory/$SSL_KEYSTORE_PASSWORD_SECRET_NAME") + .build() + }) + driverLaunchSslOptions.keyPassword.foreach(password => { + val passwordBase64 = Base64.encodeBase64String(password.getBytes(Charsets.UTF_8)) + sslSecretsMap += (SSL_KEY_PASSWORD_SECRET_NAME -> passwordBase64) + sslEnvs += new EnvVarBuilder() + .withName("SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE") + .withValue(s"$sslSecretsDirectory/$SSL_KEY_PASSWORD_SECRET_NAME") + .build() + }) + driverLaunchSslOptions.keyStoreType.foreach(storeType => { + sslEnvs += new EnvVarBuilder() + .withName("SPARK_SUBMISSION_KEYSTORE_TYPE") + .withValue(storeType) + .build() + }) + sslEnvs += new EnvVarBuilder() + .withName("SPARK_SUBMISSION_USE_SSL") + .withValue("true") + .build() + val sslSecrets = kubernetesClient.secrets().createNew() + .withNewMetadata() + .withName(sslSecretsName) + .endMetadata() + .withData(sslSecretsMap.asJava) + .withType("Opaque") + .done() + secrets += sslSecrets + val sslVolume = new VolumeBuilder() + .withName("spark-submission-server-ssl-secrets") + .withNewSecret() + .withSecretName(sslSecrets.getMetadata.getName) + .endSecret() .build() - containerPorts += new ContainerPortBuilder() - .withContainerPort(portValue) + val sslVolumeMount = new VolumeMountBuilder() + .withName("spark-submission-server-ssl-secrets") + .withReadOnly(true) + .withMountPath(sslSecretsDirectory) .build() + (sslEnvs.toArray, Array(sslVolume), Array(sslVolumeMount), secrets.toArray) + } else { + (Array[EnvVar](), Array[Volume](), Array[VolumeMount](), Array[Secret]()) } + } + + private class DriverPodWatcher( + submitCompletedFuture: SettableFuture[Boolean], + submitPending: AtomicBoolean, + kubernetesClient: KubernetesClient, + driverLaunchSslOptions: SSLOptions, + applicationSecrets: Array[Secret], + driverKubernetesSelectors: java.util.Map[String, String]) extends Watcher[Pod] { + override def eventReceived(action: Action, pod: Pod): Unit = { + if ((action == Action.ADDED || action == Action.MODIFIED) + && pod.getStatus.getPhase == "Running" + && !submitCompletedFuture.isDone) { + if (!submitPending.getAndSet(true)) { + pod.getStatus + .getContainerStatuses + .asScala + .find(status => + status.getName == DRIVER_LAUNCHER_CONTAINER_NAME && status.getReady) match { + case Some(_) => + val ownerRefs = Seq(new OwnerReferenceBuilder() + .withName(pod.getMetadata.getName) + .withUid(pod.getMetadata.getUid) + .withApiVersion(pod.getApiVersion) + .withKind(pod.getKind) + .withController(true) + .build()) + + applicationSecrets.foreach(secret => { + secret.getMetadata.setOwnerReferences(ownerRefs.asJava) + kubernetesClient.secrets().createOrReplace(secret) + }) - addPortToServiceAndContainer( - DRIVER_LAUNCHER_SERVICE_PORT_NAME, - DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT) - addPortToServiceAndContainer( - DRIVER_PORT_NAME, - sparkConf - .getOption("spark.driver.port") - .map(_.toInt) - .getOrElse(DEFAULT_DRIVER_PORT)) - addPortToServiceAndContainer( - BLOCKMANAGER_PORT_NAME, - sparkConf - .getOption("spark.blockmanager.port") - .map(_.toInt) - .getOrElse(DEFAULT_BLOCKMANAGER_PORT)) + val driverLauncherServicePort = new ServicePortBuilder() + .withName(DRIVER_LAUNCHER_SERVICE_PORT_NAME) + .withPort(DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT) + .withNewTargetPort(DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT) + .build() + val service = kubernetesClient.services().createNew() + .withNewMetadata() + .withName(kubernetesAppId) + .withLabels(driverKubernetesSelectors) + .withOwnerReferences(ownerRefs.asJava) + .endMetadata() + .withNewSpec() + .withType("NodePort") + .withSelector(driverKubernetesSelectors) + .withPorts(driverLauncherServicePort) + .endSpec() + .done() + try { + sparkConf.set("spark.kubernetes.driver.service.name", + service.getMetadata.getName) + sparkConf.set("spark.kubernetes.driver.pod.name", kubernetesAppId) + sparkConf.setIfMissing("spark.driver.port", DEFAULT_DRIVER_PORT.toString) + sparkConf.setIfMissing("spark.blockmanager.port", + DEFAULT_BLOCKMANAGER_PORT.toString) + val driverLauncher = buildDriverLauncherClient(kubernetesClient, service, + driverLaunchSslOptions) + val ping = Retry.retry(5, 5.seconds) { + driverLauncher.ping() + } + ping onFailure { + case t: Throwable => + submitCompletedFuture.setException(t) + kubernetesClient.services().delete(service) + } + val submitComplete = ping.flatMap { _ => + Future { + sparkConf.set("spark.driver.host", pod.getStatus.getPodIP) + val submitRequest = buildSubmissionRequest() + driverLauncher.create(submitRequest) + } + } + submitComplete onFailure { + case t: Throwable => + submitCompletedFuture.setException(t) + kubernetesClient.services().delete(service) + } + val adjustServicePort = submitComplete.flatMap { _ => + Future { + // After submitting, adjust the service to only expose the Spark UI + val uiServicePort = new ServicePortBuilder() + .withName(UI_PORT_NAME) + .withPort(uiPort) + .withNewTargetPort(uiPort) + .build() + kubernetesClient.services().withName(kubernetesAppId).edit() + .editSpec() + .withType("ClusterIP") + .withPorts(uiServicePort) + .endSpec() + .done + } + } + adjustServicePort onSuccess { + case _ => + submitCompletedFuture.set(true) + } + adjustServicePort onFailure { + case throwable: Throwable => + submitCompletedFuture.setException(throwable) + kubernetesClient.services().delete(service) + } + } catch { + case e: Throwable => + submitCompletedFuture.setException(e) + Utils.tryLogNonFatalError({ + kubernetesClient.services().delete(service) + }) + throw e + } + case None => + } + } + } + } - addPortToServiceAndContainer( - UI_PORT_NAME, - sparkConf - .getOption("spark.ui.port") - .map(_.toInt) - .getOrElse(DEFAULT_UI_PORT)) - (servicePorts, containerPorts) + override def onClose(e: KubernetesClientException): Unit = { + if (!submitCompletedFuture.isDone) { + submitCompletedFuture.setException(e) + } + } + } + + private def buildSubmitFailedErrorMessage( + kubernetesClient: DefaultKubernetesClient, + e: TimeoutException): String = { + val driverPod = try { + kubernetesClient.pods().withName(kubernetesAppId).get() + } catch { + case throwable: Throwable => + logError(s"Timed out while waiting $driverLaunchTimeoutSecs seconds for the" + + " driver pod to start, but an error occurred while fetching the driver" + + " pod's details.", throwable) + throw new SparkException(s"Timed out while waiting $driverLaunchTimeoutSecs" + + " seconds for the driver pod to start. Unfortunately, in attempting to fetch" + + " the latest state of the pod, another error was thrown. Check the logs for" + + " the error that was thrown in looking up the driver pod.", e) + } + val topLevelMessage = s"The driver pod with name ${driverPod.getMetadata.getName}" + + s" in namespace ${driverPod.getMetadata.getNamespace} was not ready in" + + s" $driverLaunchTimeoutSecs seconds." + val podStatusPhase = if (driverPod.getStatus.getPhase != null) { + s"Latest phase from the pod is: ${driverPod.getStatus.getPhase}" + } else { + "The pod had no final phase." + } + val podStatusMessage = if (driverPod.getStatus.getMessage != null) { + s"Latest message from the pod is: ${driverPod.getStatus.getMessage}" + } else { + "The pod had no final message." + } + val failedDriverContainerStatusString = driverPod.getStatus + .getContainerStatuses + .asScala + .find(_.getName == DRIVER_LAUNCHER_CONTAINER_NAME) + .map(status => { + val lastState = status.getState + if (lastState.getRunning != null) { + "Driver container last state: Running\n" + + s"Driver container started at: ${lastState.getRunning.getStartedAt}" + } else if (lastState.getWaiting != null) { + "Driver container last state: Waiting\n" + + s"Driver container wait reason: ${lastState.getWaiting.getReason}\n" + + s"Driver container message: ${lastState.getWaiting.getMessage}\n" + } else if (lastState.getTerminated != null) { + "Driver container last state: Terminated\n" + + s"Driver container started at: ${lastState.getTerminated.getStartedAt}\n" + + s"Driver container finished at: ${lastState.getTerminated.getFinishedAt}\n" + + s"Driver container exit reason: ${lastState.getTerminated.getReason}\n" + + s"Driver container exit code: ${lastState.getTerminated.getExitCode}\n" + + s"Driver container message: ${lastState.getTerminated.getMessage}" + } else { + "Driver container last state: Unknown" + } + }).getOrElse("The driver container wasn't found in the pod; expected to find" + + s" container with name $DRIVER_LAUNCHER_CONTAINER_NAME") + s"$topLevelMessage\n" + + s"$podStatusPhase\n" + + s"$podStatusMessage\n\n$failedDriverContainerStatusString" + } + + private def buildContainerPorts(): Seq[ContainerPort] = { + Seq(sparkConf.getInt("spark.driver.port", DEFAULT_DRIVER_PORT), + sparkConf.getInt("spark.blockManager.port", DEFAULT_BLOCKMANAGER_PORT), + DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT, + uiPort).map(new ContainerPortBuilder().withContainerPort(_).build()) } private def buildSubmissionRequest(): KubernetesCreateSubmissionRequest = { @@ -392,26 +526,67 @@ private[spark] class Client( .map(CompressionUtils.createTarGzip(_)) } - private def getDriverLauncherService( - k8ClientConfig: Config, - kubernetesMaster: String): KubernetesSparkRestApi = { - val url = s"${ - Array[String]( - kubernetesMaster, - "api", "v1", "proxy", - "namespaces", namespace, - "services", kubernetesAppId).mkString("/")}" + - s":$DRIVER_LAUNCHER_SERVICE_PORT_NAME/" - - val sslContext = SSLUtils.sslContext(k8ClientConfig) - val trustManager = SSLUtils.trustManagers( - k8ClientConfig)(0).asInstanceOf[X509TrustManager] + private def buildDriverLauncherClient( + kubernetesClient: KubernetesClient, + service: Service, + driverLaunchSslOptions: SSLOptions): KubernetesSparkRestApi = { + val servicePort = service + .getSpec + .getPorts + .asScala + .filter(_.getName == DRIVER_LAUNCHER_SERVICE_PORT_NAME) + .head + .getNodePort + // NodePort is exposed on every node, so just pick one of them. + // TODO be resilient to node failures and try all of them + val node = kubernetesClient.nodes.list.getItems.asScala.head + val nodeAddress = node.getStatus.getAddresses.asScala.head.getAddress + val urlScheme = if (driverLaunchSslOptions.enabled) { + "https" + } else { + logWarning("Submitting application details, application secret, and local" + + " jars to the cluster over an insecure connection. You should configure SSL" + + " to secure this step.") + "http" + } + val (trustManager, sslContext): (X509TrustManager, SSLContext) = + if (driverLaunchSslOptions.enabled) { + buildSslConnectionConfiguration(driverLaunchSslOptions) + } else { + (null, SSLContext.getDefault) + } + val url = s"$urlScheme://$nodeAddress:$servicePort" HttpClientUtil.createClient[KubernetesSparkRestApi]( - uri = url, + url, sslSocketFactory = sslContext.getSocketFactory, trustContext = trustManager) } + private def buildSslConnectionConfiguration(driverLaunchSslOptions: SSLOptions) = { + driverLaunchSslOptions.trustStore.map(trustStoreFile => { + val trustManagerFactory = TrustManagerFactory.getInstance( + TrustManagerFactory.getDefaultAlgorithm) + val trustStore = KeyStore.getInstance( + driverLaunchSslOptions.trustStoreType.getOrElse(KeyStore.getDefaultType)) + if (!trustStoreFile.isFile) { + throw new SparkException(s"TrustStore file at ${trustStoreFile.getAbsolutePath}" + + s" does not exist or is not a file.") + } + Utils.tryWithResource(new FileInputStream(trustStoreFile)) { trustStoreStream => + driverLaunchSslOptions.trustStorePassword match { + case Some(password) => + trustStore.load(trustStoreStream, password.toCharArray) + case None => trustStore.load(trustStoreStream, null) + } + } + trustManagerFactory.init(trustStore) + val trustManagers = trustManagerFactory.getTrustManagers + val sslContext = SSLContext.getInstance("TLSv1.2") + sslContext.init(null, trustManagers, SECURE_RANDOM) + (trustManagers(0).asInstanceOf[X509TrustManager], sslContext) + }).getOrElse((null, SSLContext.getDefault)) + } + private def parseCustomLabels(labels: String): Map[String, String] = { labels.split(",").map(_.trim).filterNot(_.isEmpty).map(label => { label.split("=", 2).toSeq match { @@ -433,6 +608,9 @@ private[spark] class Client( private[spark] object Client extends Logging { private val SUBMISSION_SERVER_SECRET_NAME = "spark-submission-server-secret" + private val SSL_KEYSTORE_SECRET_NAME = "spark-submission-server-keystore" + private val SSL_KEYSTORE_PASSWORD_SECRET_NAME = "spark-submission-server-keystore-password" + private val SSL_KEY_PASSWORD_SECRET_NAME = "spark-submission-server-key-password" private val DRIVER_LAUNCHER_SELECTOR_LABEL = "driver-launcher-selector" private val DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT = 7077 private val DEFAULT_DRIVER_PORT = 7078 diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala index 38fa4d1d3f0b2..451dc96dd65ed 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala @@ -21,20 +21,26 @@ import java.net.URI import java.util.concurrent.CountDownLatch import javax.servlet.http.{HttpServletRequest, HttpServletResponse} +import com.google.common.base.Charsets import com.google.common.io.Files import org.apache.commons.codec.binary.Base64 import scala.collection.mutable import scala.collection.mutable.ArrayBuffer -import org.apache.spark.{SecurityManager, SPARK_VERSION, SparkConf} +import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.rest._ import org.apache.spark.util.{ShutdownHookManager, ThreadUtils, Utils} private case class KubernetesSparkRestServerArguments( - val host: Option[String] = None, - val port: Option[Int] = None, - val secretFile: Option[String] = None) { + host: Option[String] = None, + port: Option[Int] = None, + useSsl: Boolean = false, + secretFile: Option[String] = None, + keyStoreFile: Option[String] = None, + keyStorePasswordFile: Option[String] = None, + keyStoreType: Option[String] = None, + keyPasswordFile: Option[String] = None) { def validate(): KubernetesSparkRestServerArguments = { require(host.isDefined, "Hostname not set via --hostname.") require(port.isDefined, "Port not set via --port") @@ -58,6 +64,21 @@ private object KubernetesSparkRestServerArguments { case "--secret-file" :: value :: tail => args = tail resolvedArguments.copy(secretFile = Some(value)) + case "--use-ssl" :: value :: tail => + args = tail + resolvedArguments.copy(useSsl = value.toBoolean) + case "--keystore-file" :: value :: tail => + args = tail + resolvedArguments.copy(keyStoreFile = Some(value)) + case "--keystore-password-file" :: value :: tail => + args = tail + resolvedArguments.copy(keyStorePasswordFile = Some(value)) + case "--keystore-type" :: value :: tail => + args = tail + resolvedArguments.copy(keyStoreType = Some(value)) + case "--keystore-key-password-file" :: value :: tail => + args = tail + resolvedArguments.copy(keyPasswordFile = Some(value)) // TODO polish usage message case Nil => resolvedArguments case unknown => throw new IllegalStateException(s"Unknown argument(s) found: $unknown") @@ -78,8 +99,9 @@ private[spark] class KubernetesSparkRestServer( port: Int, conf: SparkConf, expectedApplicationSecret: Array[Byte], - shutdownLock: CountDownLatch) - extends RestSubmissionServer(host, port, conf) { + shutdownLock: CountDownLatch, + sslOptions: SSLOptions = new SSLOptions) + extends RestSubmissionServer(host, port, conf, sslOptions) { private val SERVLET_LOCK = new Object private val javaExecutable = s"${System.getenv("JAVA_HOME")}/bin/java" @@ -196,7 +218,7 @@ private[spark] class KubernetesSparkRestServer( response.success = true response.submissionId = null response.message = "success" - response.serverSparkVersion = SPARK_VERSION + response.serverSparkVersion = sparkVersion response } case unexpected => @@ -249,6 +271,7 @@ private[spark] class KubernetesSparkRestServer( private[spark] object KubernetesSparkRestServer { private val barrier = new CountDownLatch(1) + def main(args: Array[String]): Unit = { val parsedArguments = KubernetesSparkRestServerArguments.fromArgsArray(args) val secretFile = new File(parsedArguments.secretFile.get) @@ -256,6 +279,24 @@ private[spark] object KubernetesSparkRestServer { throw new IllegalArgumentException(s"Secret file specified by --secret-file" + " is not a file, or does not exist.") } + val sslOptions = if (parsedArguments.useSsl) { + val keyStorePassword = parsedArguments + .keyStorePasswordFile + .map(new File(_)) + .map(Files.toString(_, Charsets.UTF_8)) + val keyPassword = parsedArguments + .keyPasswordFile + .map(new File(_)) + .map(Files.toString(_, Charsets.UTF_8)) + new SSLOptions( + enabled = true, + keyStore = parsedArguments.keyStoreFile.map(new File(_)), + keyStoreType = parsedArguments.keyStoreType, + keyStorePassword = keyStorePassword, + keyPassword = keyPassword) + } else { + new SSLOptions + } val secretBytes = Files.toByteArray(secretFile) val sparkConf = new SparkConf(true) val server = new KubernetesSparkRestServer( @@ -263,7 +304,8 @@ private[spark] object KubernetesSparkRestServer { parsedArguments.port.get, sparkConf, secretBytes, - barrier) + barrier, + sslOptions) server.start() ShutdownHookManager.addShutdownHook(() => { try { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index f512c50a9a934..dae4b2714b4e4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -106,13 +106,10 @@ private[spark] class KubernetesClusterSchedulerBackend( protected var totalExpectedExecutors = new AtomicInteger(0) private val driverUrl = RpcEndpointAddress( - System.getenv(s"${convertToEnvMode(kubernetesDriverServiceName)}_SERVICE_HOST"), + sc.getConf.get("spark.driver.host"), sc.getConf.getInt("spark.driver.port", DEFAULT_DRIVER_PORT), CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString - private def convertToEnvMode(value: String): String = - value.toUpperCase.map { c => if (c == '-') '_' else c } - private val initialExecutors = getInitialTargetExecutorNumber(1) private def getInitialTargetExecutorNumber(defaultNumExecutors: Int = 1): Int = { diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index 4d345158f356a..070008fce7410 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -19,5 +19,14 @@ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark -# This class will also require setting a secret via the SPARK_APP_SECRET environment variable -CMD exec bin/spark-class org.apache.spark.deploy.rest.kubernetes.KubernetesSparkRestServer --hostname $HOSTNAME --port $SPARK_DRIVER_LAUNCHER_SERVER_PORT --secret-file $SPARK_SUBMISSION_SECRET_LOCATION +CMD SSL_ARGS="" && \ + if ! [ -z ${SPARK_SUBMISSION_USE_SSL+x} ]; then SSL_ARGS="$SSL_ARGS --use-ssl $SPARK_SUBMISSION_USE_SSL"; fi && \ + if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-file $SPARK_SUBMISSION_KEYSTORE_FILE"; fi && \ + if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_TYPE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-type $SPARK_SUBMISSION_KEYSTORE_TYPE"; fi && \ + if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-password-file $SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE"; fi && \ + if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-key-password-file $SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE"; fi && \ + exec bin/spark-class org.apache.spark.deploy.rest.kubernetes.KubernetesSparkRestServer \ + --hostname $HOSTNAME \ + --port $SPARK_DRIVER_LAUNCHER_SERVER_PORT \ + --secret-file $SPARK_SUBMISSION_SECRET_LOCATION \ + ${SSL_ARGS} diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 569527de8e300..f6a322f18cd75 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -106,6 +106,10 @@ + + org.bouncycastle + bcpkix-jdk15on +
    diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index c4bb389f5ada2..13edea02dce9a 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -16,6 +16,7 @@ */ package org.apache.spark.deploy.kubernetes.integrationtest +import java.io.File import java.nio.file.Paths import java.util.UUID import java.util.concurrent.TimeUnit @@ -36,7 +37,7 @@ import org.apache.spark.deploy.kubernetes.Client import org.apache.spark.deploy.kubernetes.integrationtest.docker.SparkDockerImageBuilder import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 -import org.apache.spark.internal.Logging +import org.apache.spark.deploy.kubernetes.integrationtest.sslutil.SSLUtils import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} import org.apache.spark.util.Utils @@ -68,6 +69,8 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { private val NAMESPACE = UUID.randomUUID().toString.replaceAll("-", "") private var minikubeKubernetesClient: KubernetesClient = _ private var clientConfig: Config = _ + private var keyStoreFile: File = _ + private var trustStoreFile: File = _ override def beforeAll(): Unit = { Minikube.startMinikube() @@ -79,6 +82,13 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .done() minikubeKubernetesClient = Minikube.getKubernetesClient.inNamespace(NAMESPACE) clientConfig = minikubeKubernetesClient.getConfiguration + val (keyStore, trustStore) = SSLUtils.generateKeyStoreTrustStorePair( + Minikube.getMinikubeIp, + "changeit", + "changeit", + "changeit") + keyStoreFile = keyStore + trustStoreFile = trustStore } before { @@ -296,4 +306,32 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { assert(driverPodLabels.get("label1") == "label1value", "Unexpected value for label1") assert(driverPodLabels.get("label2") == "label2value", "Unexpected value for label2") } + + test("Enable SSL on the driver submit server") { + val args = Array( + "--master", s"k8s://https://${Minikube.getMinikubeIp}:8443", + "--deploy-mode", "cluster", + "--kubernetes-namespace", NAMESPACE, + "--name", "spark-pi", + "--executor-memory", "512m", + "--executor-cores", "1", + "--num-executors", "1", + "--upload-jars", HELPER_JAR, + "--class", MAIN_CLASS, + "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", + "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", + "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", + "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", + "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", + "--conf", "spark.ssl.kubernetes.driverlaunch.enabled=true", + "--conf", "spark.ssl.kubernetes.driverlaunch.keyStore=" + + s"file://${keyStoreFile.getAbsolutePath}", + "--conf", "spark.ssl.kubernetes.driverlaunch.keyStorePassword=changeit", + "--conf", "spark.ssl.kubernetes.driverlaunch.keyPassword=changeit", + "--conf", "spark.ssl.kubernetes.driverlaunch.trustStore=" + + s"file://${trustStoreFile.getAbsolutePath}", + "--conf", s"spark.ssl.kubernetes.driverlaunch.trustStorePassword=changeit", + EXAMPLES_JAR) + SparkSubmit.main(args) + } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil/SSLUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil/SSLUtils.scala new file mode 100644 index 0000000000000..bde7b43226660 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil/SSLUtils.scala @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest.sslutil + +import java.io.{File, FileOutputStream} +import java.math.BigInteger +import java.nio.file.Files +import java.security.{KeyPairGenerator, KeyStore, SecureRandom} +import java.util.{Calendar, Random} +import javax.security.auth.x500.X500Principal + +import org.bouncycastle.asn1.x509.{Extension, GeneralName, GeneralNames} +import org.bouncycastle.cert.jcajce.{JcaX509CertificateConverter, JcaX509v3CertificateBuilder} +import org.bouncycastle.operator.jcajce.JcaContentSignerBuilder + +import org.apache.spark.util.Utils + +private[spark] object SSLUtils { + + def generateKeyStoreTrustStorePair( + ipAddress: String, + keyStorePassword: String, + keyPassword: String, + trustStorePassword: String): (File, File) = { + val keyPairGenerator = KeyPairGenerator.getInstance("RSA") + keyPairGenerator.initialize(512) + val keyPair = keyPairGenerator.generateKeyPair() + val selfPrincipal = new X500Principal(s"cn=$ipAddress") + val currentDate = Calendar.getInstance + val validForOneHundredYears = Calendar.getInstance + validForOneHundredYears.add(Calendar.YEAR, 100) + val certificateBuilder = new JcaX509v3CertificateBuilder( + selfPrincipal, + new BigInteger(4096, new Random()), + currentDate.getTime, + validForOneHundredYears.getTime, + selfPrincipal, + keyPair.getPublic) + certificateBuilder.addExtension(Extension.subjectAlternativeName, false, + new GeneralNames(new GeneralName(GeneralName.iPAddress, ipAddress))) + val signer = new JcaContentSignerBuilder("SHA1WithRSA") + .setSecureRandom(new SecureRandom()) + .build(keyPair.getPrivate) + val bcCertificate = certificateBuilder.build(signer) + val jcaCertificate = new JcaX509CertificateConverter().getCertificate(bcCertificate) + val keyStore = KeyStore.getInstance("JKS") + keyStore.load(null, null) + keyStore.setKeyEntry("key", keyPair.getPrivate, + keyPassword.toCharArray, Array(jcaCertificate)) + val tempDir = Files.createTempDirectory("temp-ssl-stores").toFile() + tempDir.deleteOnExit() + val keyStoreFile = new File(tempDir, "keyStore.jks") + Utils.tryWithResource(new FileOutputStream(keyStoreFile)) { + keyStore.store(_, keyStorePassword.toCharArray) + } + val trustStore = KeyStore.getInstance("JKS") + trustStore.load(null, null) + trustStore.setCertificateEntry("key", jcaCertificate) + val trustStoreFile = new File(tempDir, "trustStore.jks") + Utils.tryWithResource(new FileOutputStream(trustStoreFile)) { + trustStore.store(_, trustStorePassword.toCharArray) + } + (keyStoreFile, trustStoreFile) + } + +} From b2e687715308a953c29ed9b597568819eabf9f65 Mon Sep 17 00:00:00 2001 From: mccheah Date: Tue, 31 Jan 2017 12:07:01 -0800 Subject: [PATCH 036/156] Extract constants and config into separate file. Launch => Submit. (#65) * Extract constants and config into separate file. Launch => Submit. * Address comments * A small shorthand * Refactor more ThreadUtils * Fix scalastyle, use cached thread pool * Tiny Scala style change --- docs/running-on-kubernetes.md | 16 +- .../spark/deploy/kubernetes/Client.scala | 251 +++++++++--------- .../spark/deploy/kubernetes/config.scala | 177 ++++++++++++ .../spark/deploy/kubernetes/constants.scala | 70 +++++ .../rest/KubernetesRestProtocolMessages.scala | 21 +- .../kubernetes/KubernetesSparkRestApi.scala | 3 +- .../KubernetesClusterSchedulerBackend.scala | 162 +++++------ .../src/main/docker/driver/Dockerfile | 2 +- .../integrationtest/KubernetesSuite.scala | 18 +- 9 files changed, 470 insertions(+), 250 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index e25e189aa6d74..e256535fbbc9d 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -140,12 +140,12 @@ Spark supports using SSL to encrypt the traffic in this bootstrapping process. I whenever possible. See the [security page](security.html) and [configuration](configuration.html) sections for more information on -configuring SSL; use the prefix `spark.ssl.kubernetes.driverlaunch` in configuring the SSL-related fields in the context +configuring SSL; use the prefix `spark.ssl.kubernetes.submit` in configuring the SSL-related fields in the context of submitting to Kubernetes. For example, to set the trustStore used when the local machine communicates with the driver -pod in starting the application, set `spark.ssl.kubernetes.driverlaunch.trustStore`. +pod in starting the application, set `spark.ssl.kubernetes.submit.trustStore`. One note about the keyStore is that it can be specified as either a file on the client machine or a file in the -container image's disk. Thus `spark.ssl.kubernetes.driverlaunch.keyStore` can be a URI with a scheme of either `file:` +container image's disk. Thus `spark.ssl.kubernetes.submit.keyStore` can be a URI with a scheme of either `file:` or `container:`. A scheme of `file:` corresponds to the keyStore being located on the client machine; it is mounted onto the driver container as a [secret volume](https://kubernetes.io/docs/user-guide/secrets/). When the URI has the scheme `container:`, the file is assumed to already be on the container's disk at the appropriate path. @@ -235,7 +235,15 @@ from the other deployment modes. See the [configuration page](configuration.html (none) Custom labels that will be added to the driver pod. This should be a comma-separated list of label key-value pairs, - where each label is in the format key=value. + where each label is in the format key=value. Note that Spark also adds its own labels to the driver pod + for bookkeeping purposes. + + + + spark.kubernetes.driverSubmitTimeout + 60s + + Time to wait for the driver pod to start running before aborting its execution. diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 07a45c7577bcd..fed9334dbbab4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -18,13 +18,13 @@ package org.apache.spark.deploy.kubernetes import java.io.{File, FileInputStream} import java.security.{KeyStore, SecureRandom} -import java.util.concurrent.{Executors, TimeoutException, TimeUnit} +import java.util.concurrent.{TimeoutException, TimeUnit} import java.util.concurrent.atomic.AtomicBoolean import javax.net.ssl.{SSLContext, TrustManagerFactory, X509TrustManager} import com.google.common.base.Charsets import com.google.common.io.Files -import com.google.common.util.concurrent.{SettableFuture, ThreadFactoryBuilder} +import com.google.common.util.concurrent.SettableFuture import io.fabric8.kubernetes.api.model._ import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient, KubernetesClient, KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action @@ -34,11 +34,13 @@ import scala.collection.mutable import scala.concurrent.{ExecutionContext, Future} import scala.concurrent.duration.DurationInt -import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SparkException, SSLOptions} +import org.apache.spark.{SecurityManager, SparkConf, SparkException, SSLOptions} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.rest.{AppResource, ContainerAppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, TarGzippedData, UploadedAppResource} import org.apache.spark.deploy.rest.kubernetes._ import org.apache.spark.internal.Logging -import org.apache.spark.util.Utils +import org.apache.spark.util.{ThreadUtils, Utils} private[spark] class Client( sparkConf: SparkConf, @@ -47,25 +49,21 @@ private[spark] class Client( appArgs: Array[String]) extends Logging { import Client._ - private val namespace = sparkConf.get("spark.kubernetes.namespace", "default") + private val namespace = sparkConf.get(KUBERNETES_NAMESPACE) private val master = resolveK8sMaster(sparkConf.get("spark.master")) private val launchTime = System.currentTimeMillis private val appName = sparkConf.getOption("spark.app.name") - .orElse(sparkConf.getOption("spark.app.id")) .getOrElse("spark") private val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") - private val secretName = s"spark-submission-server-secret-$kubernetesAppId" - private val secretDirectory = s"$SPARK_SUBMISSION_SECRET_BASE_DIR/$kubernetesAppId" - private val sslSecretsDirectory = s"$SPARK_SUBMISSION_SECRET_BASE_DIR/$kubernetesAppId-ssl" - private val sslSecretsName = s"spark-submission-server-ssl-$kubernetesAppId" - private val driverLauncherSelectorValue = s"driver-launcher-$launchTime" - private val driverDockerImage = sparkConf.get( - "spark.kubernetes.driver.docker.image", s"spark-driver:$sparkVersion") - private val uploadedJars = sparkConf.getOption("spark.kubernetes.driver.uploads.jars") + private val secretName = s"$SUBMISSION_APP_SECRET_PREFIX-$kubernetesAppId" + private val secretDirectory = s"$DRIVER_CONTAINER_SECRETS_BASE_DIR/$kubernetesAppId" + private val sslSecretsDirectory = s"$DRIVER_CONTAINER_SECRETS_BASE_DIR/$kubernetesAppId-ssl" + private val sslSecretsName = s"$SUBMISSION_SSL_SECRETS_PREFIX-$kubernetesAppId" + private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) + private val uploadedJars = sparkConf.get(KUBERNETES_DRIVER_UPLOAD_JARS) private val uiPort = sparkConf.getInt("spark.ui.port", DEFAULT_UI_PORT) - private val driverLaunchTimeoutSecs = sparkConf.getTimeAsSeconds( - "spark.kubernetes.driverLaunchTimeout", s"${DEFAULT_LAUNCH_TIMEOUT_SECONDS}s") + private val driverSubmitTimeoutSecs = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_TIMEOUT) private val secretBase64String = { val secretBytes = new Array[Byte](128) @@ -73,32 +71,27 @@ private[spark] class Client( Base64.encodeBase64String(secretBytes) } - private val serviceAccount = sparkConf.get("spark.kubernetes.submit.serviceAccountName", - "default") - - private val customLabels = sparkConf.get("spark.kubernetes.driver.labels", "") + private val serviceAccount = sparkConf.get(KUBERNETES_SERVICE_ACCOUNT_NAME) + private val customLabels = sparkConf.get(KUBERNETES_DRIVER_LABELS) private implicit val retryableExecutionContext = ExecutionContext .fromExecutorService( - Executors.newSingleThreadExecutor(new ThreadFactoryBuilder() - .setNameFormat("kubernetes-client-retryable-futures-%d") - .setDaemon(true) - .build())) + ThreadUtils.newDaemonSingleThreadExecutor("kubernetes-client-retryable-futures")) def run(): Unit = { - val (driverLaunchSslOptions, isKeyStoreLocalFile) = parseDriverLaunchSslOptions() + val (driverSubmitSslOptions, isKeyStoreLocalFile) = parseDriverSubmitSslOptions() val parsedCustomLabels = parseCustomLabels(customLabels) var k8ConfBuilder = new ConfigBuilder() .withApiVersion("v1") .withMasterUrl(master) .withNamespace(namespace) - sparkConf.getOption("spark.kubernetes.submit.caCertFile").foreach { + sparkConf.get(KUBERNETES_CA_CERT_FILE).foreach { f => k8ConfBuilder = k8ConfBuilder.withCaCertFile(f) } - sparkConf.getOption("spark.kubernetes.submit.clientKeyFile").foreach { + sparkConf.get(KUBERNETES_CLIENT_KEY_FILE).foreach { f => k8ConfBuilder = k8ConfBuilder.withClientKeyFile(f) } - sparkConf.getOption("spark.kubernetes.submit.clientCertFile").foreach { + sparkConf.get(KUBERNETES_CLIENT_CERT_FILE).foreach { f => k8ConfBuilder = k8ConfBuilder.withClientCertFile(f) } @@ -108,15 +101,16 @@ private[spark] class Client( .withNewMetadata() .withName(secretName) .endMetadata() - .withData(Map((SUBMISSION_SERVER_SECRET_NAME, secretBase64String)).asJava) + .withData(Map((SUBMISSION_APP_SECRET_NAME, secretBase64String)).asJava) .withType("Opaque") .done() val (sslEnvs, sslVolumes, sslVolumeMounts, sslSecrets) = configureSsl(kubernetesClient, - driverLaunchSslOptions, + driverSubmitSslOptions, isKeyStoreLocalFile) try { val driverKubernetesSelectors = (Map( - DRIVER_LAUNCHER_SELECTOR_LABEL -> driverLauncherSelectorValue, + SPARK_DRIVER_LABEL -> kubernetesAppId, + SPARK_APP_ID_LABEL -> kubernetesAppId, SPARK_APP_NAME_LABEL -> appName) ++ parsedCustomLabels).asJava val containerPorts = buildContainerPorts() @@ -126,7 +120,7 @@ private[spark] class Client( submitCompletedFuture, submitPending, kubernetesClient, - driverLaunchSslOptions, + driverSubmitSslOptions, Array(submitServerSecret) ++ sslSecrets, driverKubernetesSelectors) Utils.tryWithResource(kubernetesClient @@ -141,7 +135,7 @@ private[spark] class Client( .withNewSpec() .withRestartPolicy("OnFailure") .addNewVolume() - .withName(s"spark-submission-secret-volume") + .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) .withNewSecret() .withSecretName(submitServerSecret.getMetadata.getName) .endSecret() @@ -149,22 +143,22 @@ private[spark] class Client( .addToVolumes(sslVolumes: _*) .withServiceAccount(serviceAccount) .addNewContainer() - .withName(DRIVER_LAUNCHER_CONTAINER_NAME) + .withName(DRIVER_CONTAINER_NAME) .withImage(driverDockerImage) .withImagePullPolicy("IfNotPresent") .addNewVolumeMount() - .withName("spark-submission-secret-volume") + .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) .withMountPath(secretDirectory) .withReadOnly(true) .endVolumeMount() .addToVolumeMounts(sslVolumeMounts: _*) .addNewEnv() - .withName("SPARK_SUBMISSION_SECRET_LOCATION") - .withValue(s"$secretDirectory/$SUBMISSION_SERVER_SECRET_NAME") + .withName(ENV_SUBMISSION_SECRET_LOCATION) + .withValue(s"$secretDirectory/$SUBMISSION_APP_SECRET_NAME") .endEnv() .addNewEnv() - .withName("SPARK_DRIVER_LAUNCHER_SERVER_PORT") - .withValue(DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT.toString) + .withName(ENV_SUBMISSION_SERVER_PORT) + .withValue(SUBMISSION_SERVER_PORT.toString) .endEnv() .addToEnv(sslEnvs: _*) .withPorts(containerPorts.asJava) @@ -173,7 +167,7 @@ private[spark] class Client( .done() var submitSucceeded = false try { - submitCompletedFuture.get(driverLaunchTimeoutSecs, TimeUnit.SECONDS) + submitCompletedFuture.get(driverSubmitTimeoutSecs, TimeUnit.SECONDS) submitSucceeded = true } catch { case e: TimeoutException => @@ -199,8 +193,8 @@ private[spark] class Client( } } - private def parseDriverLaunchSslOptions(): (SSLOptions, Boolean) = { - val maybeKeyStore = sparkConf.getOption("spark.ssl.kubernetes.driverlaunch.keyStore") + private def parseDriverSubmitSslOptions(): (SSLOptions, Boolean) = { + val maybeKeyStore = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_KEYSTORE) val resolvedSparkConf = sparkConf.clone() val (isLocalKeyStore, resolvedKeyStore) = maybeKeyStore.map(keyStore => { val keyStoreURI = Utils.resolveURI(keyStore) @@ -214,30 +208,29 @@ private[spark] class Client( (isProvidedKeyStoreLocal, Option.apply(keyStoreURI.getPath)) }).getOrElse((true, Option.empty[String])) resolvedKeyStore.foreach { - resolvedSparkConf.set("spark.ssl.kubernetes.driverlaunch.keyStore", _) + resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_KEYSTORE, _) } - sparkConf.getOption("spark.ssl.kubernetes.driverlaunch.trustStore").foreach { trustStore => + sparkConf.get(KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE).foreach { trustStore => val trustStoreURI = Utils.resolveURI(trustStore) trustStoreURI.getScheme match { case "file" | null => - resolvedSparkConf.set("spark.ssl.kubernetes.driverlaunch.trustStore", - trustStoreURI.getPath) + resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE, trustStoreURI.getPath) case _ => throw new SparkException(s"Invalid trustStore URI $trustStore; trustStore URI" + " for submit server must have no scheme, or scheme file://") } } val securityManager = new SecurityManager(resolvedSparkConf) - (securityManager.getSSLOptions("kubernetes.driverlaunch"), isLocalKeyStore) + (securityManager.getSSLOptions(KUBERNETES_SUBMIT_SSL_NAMESPACE), isLocalKeyStore) } - private def configureSsl(kubernetesClient: KubernetesClient, driverLaunchSslOptions: SSLOptions, + private def configureSsl(kubernetesClient: KubernetesClient, driverSubmitSslOptions: SSLOptions, isKeyStoreLocalFile: Boolean): (Array[EnvVar], Array[Volume], Array[VolumeMount], Array[Secret]) = { - if (driverLaunchSslOptions.enabled) { + if (driverSubmitSslOptions.enabled) { val sslSecretsMap = mutable.HashMap[String, String]() val sslEnvs = mutable.Buffer[EnvVar]() val secrets = mutable.Buffer[Secret]() - driverLaunchSslOptions.keyStore.foreach(store => { + driverSubmitSslOptions.keyStore.foreach(store => { val resolvedKeyStoreFile = if (isKeyStoreLocalFile) { if (!store.isFile) { throw new SparkException(s"KeyStore specified at $store is not a file or" + @@ -245,40 +238,40 @@ private[spark] class Client( } val keyStoreBytes = Files.toByteArray(store) val keyStoreBase64 = Base64.encodeBase64String(keyStoreBytes) - sslSecretsMap += (SSL_KEYSTORE_SECRET_NAME -> keyStoreBase64) - s"$sslSecretsDirectory/$SSL_KEYSTORE_SECRET_NAME" + sslSecretsMap += (SUBMISSION_SSL_KEYSTORE_SECRET_NAME -> keyStoreBase64) + s"$sslSecretsDirectory/$SUBMISSION_SSL_KEYSTORE_SECRET_NAME" } else { store.getAbsolutePath } sslEnvs += new EnvVarBuilder() - .withName("SPARK_SUBMISSION_KEYSTORE_FILE") + .withName(ENV_SUBMISSION_KEYSTORE_FILE) .withValue(resolvedKeyStoreFile) .build() }) - driverLaunchSslOptions.keyStorePassword.foreach(password => { + driverSubmitSslOptions.keyStorePassword.foreach(password => { val passwordBase64 = Base64.encodeBase64String(password.getBytes(Charsets.UTF_8)) - sslSecretsMap += (SSL_KEYSTORE_PASSWORD_SECRET_NAME -> passwordBase64) + sslSecretsMap += (SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME -> passwordBase64) sslEnvs += new EnvVarBuilder() - .withName("SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE") - .withValue(s"$sslSecretsDirectory/$SSL_KEYSTORE_PASSWORD_SECRET_NAME") + .withName(ENV_SUBMISSION_KEYSTORE_PASSWORD_FILE) + .withValue(s"$sslSecretsDirectory/$SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME") .build() }) - driverLaunchSslOptions.keyPassword.foreach(password => { + driverSubmitSslOptions.keyPassword.foreach(password => { val passwordBase64 = Base64.encodeBase64String(password.getBytes(Charsets.UTF_8)) - sslSecretsMap += (SSL_KEY_PASSWORD_SECRET_NAME -> passwordBase64) + sslSecretsMap += (SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME -> passwordBase64) sslEnvs += new EnvVarBuilder() - .withName("SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE") - .withValue(s"$sslSecretsDirectory/$SSL_KEY_PASSWORD_SECRET_NAME") + .withName(ENV_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE) + .withValue(s"$sslSecretsDirectory/$SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME") .build() }) - driverLaunchSslOptions.keyStoreType.foreach(storeType => { + driverSubmitSslOptions.keyStoreType.foreach(storeType => { sslEnvs += new EnvVarBuilder() - .withName("SPARK_SUBMISSION_KEYSTORE_TYPE") + .withName(ENV_SUBMISSION_KEYSTORE_TYPE) .withValue(storeType) .build() }) sslEnvs += new EnvVarBuilder() - .withName("SPARK_SUBMISSION_USE_SSL") + .withName(ENV_SUBMISSION_USE_SSL) .withValue("true") .build() val sslSecrets = kubernetesClient.secrets().createNew() @@ -290,13 +283,13 @@ private[spark] class Client( .done() secrets += sslSecrets val sslVolume = new VolumeBuilder() - .withName("spark-submission-server-ssl-secrets") + .withName(SUBMISSION_SSL_SECRETS_VOLUME_NAME) .withNewSecret() .withSecretName(sslSecrets.getMetadata.getName) .endSecret() .build() val sslVolumeMount = new VolumeMountBuilder() - .withName("spark-submission-server-ssl-secrets") + .withName(SUBMISSION_SSL_SECRETS_VOLUME_NAME) .withReadOnly(true) .withMountPath(sslSecretsDirectory) .build() @@ -310,7 +303,7 @@ private[spark] class Client( submitCompletedFuture: SettableFuture[Boolean], submitPending: AtomicBoolean, kubernetesClient: KubernetesClient, - driverLaunchSslOptions: SSLOptions, + driverSubmitSslOptions: SSLOptions, applicationSecrets: Array[Secret], driverKubernetesSelectors: java.util.Map[String, String]) extends Watcher[Pod] { override def eventReceived(action: Action, pod: Pod): Unit = { @@ -322,7 +315,7 @@ private[spark] class Client( .getContainerStatuses .asScala .find(status => - status.getName == DRIVER_LAUNCHER_CONTAINER_NAME && status.getReady) match { + status.getName == DRIVER_CONTAINER_NAME && status.getReady) match { case Some(_) => val ownerRefs = Seq(new OwnerReferenceBuilder() .withName(pod.getMetadata.getName) @@ -337,10 +330,10 @@ private[spark] class Client( kubernetesClient.secrets().createOrReplace(secret) }) - val driverLauncherServicePort = new ServicePortBuilder() - .withName(DRIVER_LAUNCHER_SERVICE_PORT_NAME) - .withPort(DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT) - .withNewTargetPort(DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT) + val driverSubmissionServicePort = new ServicePortBuilder() + .withName(SUBMISSION_SERVER_PORT_NAME) + .withPort(SUBMISSION_SERVER_PORT) + .withNewTargetPort(SUBMISSION_SERVER_PORT) .build() val service = kubernetesClient.services().createNew() .withNewMetadata() @@ -351,20 +344,25 @@ private[spark] class Client( .withNewSpec() .withType("NodePort") .withSelector(driverKubernetesSelectors) - .withPorts(driverLauncherServicePort) + .withPorts(driverSubmissionServicePort) .endSpec() .done() try { - sparkConf.set("spark.kubernetes.driver.service.name", - service.getMetadata.getName) - sparkConf.set("spark.kubernetes.driver.pod.name", kubernetesAppId) + sparkConf.getOption("spark.app.id").foreach { id => + logWarning(s"Warning: Provided app id in spark.app.id as $id will be" + + s" overridden as $kubernetesAppId") + } + sparkConf.set(KUBERNETES_DRIVER_POD_NAME, kubernetesAppId) + sparkConf.set(KUBERNETES_DRIVER_SERVICE_NAME, service.getMetadata.getName) + sparkConf.set("spark.app.id", kubernetesAppId) + sparkConf.setIfMissing("spark.app.name", appName) sparkConf.setIfMissing("spark.driver.port", DEFAULT_DRIVER_PORT.toString) sparkConf.setIfMissing("spark.blockmanager.port", DEFAULT_BLOCKMANAGER_PORT.toString) - val driverLauncher = buildDriverLauncherClient(kubernetesClient, service, - driverLaunchSslOptions) + val driverSubmitter = buildDriverSubmissionClient(kubernetesClient, service, + driverSubmitSslOptions) val ping = Retry.retry(5, 5.seconds) { - driverLauncher.ping() + driverSubmitter.ping() } ping onFailure { case t: Throwable => @@ -375,7 +373,7 @@ private[spark] class Client( Future { sparkConf.set("spark.driver.host", pod.getStatus.getPodIP) val submitRequest = buildSubmissionRequest() - driverLauncher.create(submitRequest) + driverSubmitter.submitApplication(submitRequest) } } submitComplete onFailure { @@ -436,17 +434,17 @@ private[spark] class Client( kubernetesClient.pods().withName(kubernetesAppId).get() } catch { case throwable: Throwable => - logError(s"Timed out while waiting $driverLaunchTimeoutSecs seconds for the" + + logError(s"Timed out while waiting $driverSubmitTimeoutSecs seconds for the" + " driver pod to start, but an error occurred while fetching the driver" + " pod's details.", throwable) - throw new SparkException(s"Timed out while waiting $driverLaunchTimeoutSecs" + + throw new SparkException(s"Timed out while waiting $driverSubmitTimeoutSecs" + " seconds for the driver pod to start. Unfortunately, in attempting to fetch" + " the latest state of the pod, another error was thrown. Check the logs for" + " the error that was thrown in looking up the driver pod.", e) } val topLevelMessage = s"The driver pod with name ${driverPod.getMetadata.getName}" + s" in namespace ${driverPod.getMetadata.getNamespace} was not ready in" + - s" $driverLaunchTimeoutSecs seconds." + s" $driverSubmitTimeoutSecs seconds." val podStatusPhase = if (driverPod.getStatus.getPhase != null) { s"Latest phase from the pod is: ${driverPod.getStatus.getPhase}" } else { @@ -460,7 +458,7 @@ private[spark] class Client( val failedDriverContainerStatusString = driverPod.getStatus .getContainerStatuses .asScala - .find(_.getName == DRIVER_LAUNCHER_CONTAINER_NAME) + .find(_.getName == DRIVER_CONTAINER_NAME) .map(status => { val lastState = status.getState if (lastState.getRunning != null) { @@ -481,17 +479,21 @@ private[spark] class Client( "Driver container last state: Unknown" } }).getOrElse("The driver container wasn't found in the pod; expected to find" + - s" container with name $DRIVER_LAUNCHER_CONTAINER_NAME") + s" container with name $DRIVER_CONTAINER_NAME") s"$topLevelMessage\n" + s"$podStatusPhase\n" + s"$podStatusMessage\n\n$failedDriverContainerStatusString" } private def buildContainerPorts(): Seq[ContainerPort] = { - Seq(sparkConf.getInt("spark.driver.port", DEFAULT_DRIVER_PORT), - sparkConf.getInt("spark.blockManager.port", DEFAULT_BLOCKMANAGER_PORT), - DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT, - uiPort).map(new ContainerPortBuilder().withContainerPort(_).build()) + Seq((DRIVER_PORT_NAME, sparkConf.getInt("spark.driver.port", DEFAULT_DRIVER_PORT)), + (BLOCK_MANAGER_PORT_NAME, + sparkConf.getInt("spark.blockManager.port", DEFAULT_BLOCKMANAGER_PORT)), + (SUBMISSION_SERVER_PORT_NAME, SUBMISSION_SERVER_PORT), + (UI_PORT_NAME, uiPort)).map(port => new ContainerPortBuilder() + .withName(port._1) + .withContainerPort(port._2) + .build()) } private def buildSubmissionRequest(): KubernetesCreateSubmissionRequest = { @@ -526,22 +528,22 @@ private[spark] class Client( .map(CompressionUtils.createTarGzip(_)) } - private def buildDriverLauncherClient( + private def buildDriverSubmissionClient( kubernetesClient: KubernetesClient, service: Service, - driverLaunchSslOptions: SSLOptions): KubernetesSparkRestApi = { + driverSubmitSslOptions: SSLOptions): KubernetesSparkRestApi = { val servicePort = service .getSpec .getPorts .asScala - .filter(_.getName == DRIVER_LAUNCHER_SERVICE_PORT_NAME) + .filter(_.getName == SUBMISSION_SERVER_PORT_NAME) .head .getNodePort // NodePort is exposed on every node, so just pick one of them. // TODO be resilient to node failures and try all of them val node = kubernetesClient.nodes.list.getItems.asScala.head val nodeAddress = node.getStatus.getAddresses.asScala.head.getAddress - val urlScheme = if (driverLaunchSslOptions.enabled) { + val urlScheme = if (driverSubmitSslOptions.enabled) { "https" } else { logWarning("Submitting application details, application secret, and local" + @@ -550,8 +552,8 @@ private[spark] class Client( "http" } val (trustManager, sslContext): (X509TrustManager, SSLContext) = - if (driverLaunchSslOptions.enabled) { - buildSslConnectionConfiguration(driverLaunchSslOptions) + if (driverSubmitSslOptions.enabled) { + buildSslConnectionConfiguration(driverSubmitSslOptions) } else { (null, SSLContext.getDefault) } @@ -562,18 +564,18 @@ private[spark] class Client( trustContext = trustManager) } - private def buildSslConnectionConfiguration(driverLaunchSslOptions: SSLOptions) = { - driverLaunchSslOptions.trustStore.map(trustStoreFile => { + private def buildSslConnectionConfiguration(driverSubmitSslOptions: SSLOptions) = { + driverSubmitSslOptions.trustStore.map(trustStoreFile => { val trustManagerFactory = TrustManagerFactory.getInstance( TrustManagerFactory.getDefaultAlgorithm) val trustStore = KeyStore.getInstance( - driverLaunchSslOptions.trustStoreType.getOrElse(KeyStore.getDefaultType)) + driverSubmitSslOptions.trustStoreType.getOrElse(KeyStore.getDefaultType)) if (!trustStoreFile.isFile) { throw new SparkException(s"TrustStore file at ${trustStoreFile.getAbsolutePath}" + s" does not exist or is not a file.") } Utils.tryWithResource(new FileInputStream(trustStoreFile)) { trustStoreStream => - driverLaunchSslOptions.trustStorePassword match { + driverSubmitSslOptions.trustStorePassword match { case Some(password) => trustStore.load(trustStoreStream, password.toCharArray) case None => trustStore.load(trustStoreStream, null) @@ -587,44 +589,29 @@ private[spark] class Client( }).getOrElse((null, SSLContext.getDefault)) } - private def parseCustomLabels(labels: String): Map[String, String] = { - labels.split(",").map(_.trim).filterNot(_.isEmpty).map(label => { - label.split("=", 2).toSeq match { - case Seq(k, v) => - require(k != DRIVER_LAUNCHER_SELECTOR_LABEL, "Label with key" + - s" $DRIVER_LAUNCHER_SELECTOR_LABEL cannot be used in" + - " spark.kubernetes.driver.labels, as it is reserved for Spark's" + - " internal configuration.") - (k, v) - case _ => - throw new SparkException("Custom labels set by spark.kubernetes.driver.labels" + - " must be a comma-separated list of key-value pairs, with format =." + - s" Got label: $label. All labels: $labels") - } - }).toMap + private def parseCustomLabels(maybeLabels: Option[String]): Map[String, String] = { + maybeLabels.map(labels => { + labels.split(",").map(_.trim).filterNot(_.isEmpty).map(label => { + label.split("=", 2).toSeq match { + case Seq(k, v) => + require(k != SPARK_APP_ID_LABEL, "Label with key" + + s" $SPARK_APP_ID_LABEL cannot be used in" + + " spark.kubernetes.driver.labels, as it is reserved for Spark's" + + " internal configuration.") + (k, v) + case _ => + throw new SparkException("Custom labels set by spark.kubernetes.driver.labels" + + " must be a comma-separated list of key-value pairs, with format =." + + s" Got label: $label. All labels: $labels") + } + }).toMap + }).getOrElse(Map.empty[String, String]) } } private[spark] object Client extends Logging { - private val SUBMISSION_SERVER_SECRET_NAME = "spark-submission-server-secret" - private val SSL_KEYSTORE_SECRET_NAME = "spark-submission-server-keystore" - private val SSL_KEYSTORE_PASSWORD_SECRET_NAME = "spark-submission-server-keystore-password" - private val SSL_KEY_PASSWORD_SECRET_NAME = "spark-submission-server-key-password" - private val DRIVER_LAUNCHER_SELECTOR_LABEL = "driver-launcher-selector" - private val DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT = 7077 - private val DEFAULT_DRIVER_PORT = 7078 - private val DEFAULT_BLOCKMANAGER_PORT = 7079 - private val DEFAULT_UI_PORT = 4040 - private val UI_PORT_NAME = "spark-ui-port" - private val DRIVER_LAUNCHER_SERVICE_PORT_NAME = "driver-launcher-port" - private val DRIVER_PORT_NAME = "driver-port" - private val BLOCKMANAGER_PORT_NAME = "block-manager-port" - private val DRIVER_LAUNCHER_CONTAINER_NAME = "spark-kubernetes-driver-launcher" - private val SECURE_RANDOM = new SecureRandom() - private val SPARK_SUBMISSION_SECRET_BASE_DIR = "/var/run/secrets/spark-submission" - private val DEFAULT_LAUNCH_TIMEOUT_SECONDS = 60 - private val SPARK_APP_NAME_LABEL = "spark-app-name" + private[spark] val SECURE_RANDOM = new SecureRandom() def main(args: Array[String]): Unit = { require(args.length >= 2, s"Too few arguments. Usage: ${getClass.getName} " + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala new file mode 100644 index 0000000000000..9b145370f87d6 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import java.util.concurrent.TimeUnit + +import org.apache.spark.{SPARK_VERSION => sparkVersion} +import org.apache.spark.internal.config.ConfigBuilder + +package object config { + + private[spark] val KUBERNETES_NAMESPACE = + ConfigBuilder("spark.kubernetes.namespace") + .doc(""" + | The namespace that will be used for running the driver and + | executor pods. When using spark-submit in cluster mode, + | this can also be passed to spark-submit via the + | --kubernetes-namespace command line argument. + """.stripMargin) + .stringConf + .createWithDefault("default") + + private[spark] val DRIVER_DOCKER_IMAGE = + ConfigBuilder("spark.kubernetes.driver.docker.image") + .doc(""" + | Docker image to use for the driver. Specify this using the + | standard Docker tag format. + """.stripMargin) + .stringConf + .createWithDefault(s"spark-driver:$sparkVersion") + + private[spark] val EXECUTOR_DOCKER_IMAGE = + ConfigBuilder("spark.kubernetes.executor.docker.image") + .doc(""" + | Docker image to use for the executors. Specify this using + | the standard Docker tag format. + """.stripMargin) + .stringConf + .createWithDefault(s"spark-executor:$sparkVersion") + + private[spark] val KUBERNETES_CA_CERT_FILE = + ConfigBuilder("spark.kubernetes.submit.caCertFile") + .doc(""" + | CA cert file for connecting to Kubernetes over SSL. This + | file should be located on the submitting machine's disk. + """.stripMargin) + .stringConf + .createOptional + + private[spark] val KUBERNETES_CLIENT_KEY_FILE = + ConfigBuilder("spark.kubernetes.submit.clientKeyFile") + .doc(""" + | Client key file for authenticating against the Kubernetes + | API server. This file should be located on the submitting + | machine's disk. + """.stripMargin) + .stringConf + .createOptional + + private[spark] val KUBERNETES_CLIENT_CERT_FILE = + ConfigBuilder("spark.kubernetes.submit.clientCertFile") + .doc(""" + | Client cert file for authenticating against the + | Kubernetes API server. This file should be located on + | the submitting machine's disk. + """.stripMargin) + .stringConf + .createOptional + + private[spark] val KUBERNETES_SERVICE_ACCOUNT_NAME = + ConfigBuilder("spark.kubernetes.submit.serviceAccountName") + .doc(""" + | Service account that is used when running the driver pod. + | The driver pod uses this service account when requesting + | executor pods from the API server. + """.stripMargin) + .stringConf + .createWithDefault("default") + + private[spark] val KUBERNETES_DRIVER_UPLOAD_JARS = + ConfigBuilder("spark.kubernetes.driver.uploads.jars") + .doc(""" + | Comma-separated list of jars to sent to the driver and + | all executors when submitting the application in cluster + | mode. + """.stripMargin) + .stringConf + .createOptional + + // Note that while we set a default for this when we start up the + // scheduler, the specific default value is dynamically determined + // based on the executor memory. + private[spark] val KUBERNETES_EXECUTOR_MEMORY_OVERHEAD = + ConfigBuilder("spark.kubernetes.executor.memoryOverhead") + .doc(""" + | The amount of off-heap memory (in megabytes) to be + | allocated per executor. This is memory that accounts for + | things like VM overheads, interned strings, other native + | overheads, etc. This tends to grow with the executor size + | (typically 6-10%). + """.stripMargin) + .stringConf + .createOptional + + private[spark] val KUBERNETES_DRIVER_LABELS = + ConfigBuilder("spark.kubernetes.driver.labels") + .doc(""" + | Custom labels that will be added to the driver pod. + | This should be a comma-separated list of label key-value + | pairs, where each label is in the format key=value. Note + | that Spark also adds its own labels to the driver pod + | for bookkeeping purposes. + """.stripMargin) + .stringConf + .createOptional + + private[spark] val KUBERNETES_DRIVER_SUBMIT_TIMEOUT = + ConfigBuilder("spark.kubernetes.driverSubmitTimeout") + .doc(""" + | Time to wait for the driver process to start running + | before aborting its execution. + """.stripMargin) + .timeConf(TimeUnit.SECONDS) + .createWithDefault(60L) + + private[spark] val KUBERNETES_DRIVER_SUBMIT_KEYSTORE = + ConfigBuilder("spark.ssl.kubernetes.submit.keyStore") + .doc(""" + | KeyStore file for the driver submission server listening + | on SSL. Can be pre-mounted on the driver container + | or uploaded from the submitting client. + """.stripMargin) + .stringConf + .createOptional + + private[spark] val KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE = + ConfigBuilder("spark.ssl.kubernetes.submit.trustStore") + .doc(""" + | TrustStore containing certificates for communicating + | to the driver submission server over SSL. + """.stripMargin) + .stringConf + .createOptional + + private[spark] val KUBERNETES_DRIVER_SERVICE_NAME = + ConfigBuilder("spark.kubernetes.driver.service.name") + .doc(""" + | Kubernetes service that exposes the driver pod + | for external access. + """.stripMargin) + .internal() + .stringConf + .createOptional + + private[spark] val KUBERNETES_DRIVER_POD_NAME = + ConfigBuilder("spark.kubernetes.driver.pod.name") + .doc(""" + | Name of the driver pod. + """.stripMargin) + .internal() + .stringConf + .createOptional +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala new file mode 100644 index 0000000000000..027cc3c022b4e --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +package object constants { + // Labels + private[spark] val SPARK_DRIVER_LABEL = "spark-driver" + private[spark] val SPARK_APP_ID_LABEL = "spark-app-id" + private[spark] val SPARK_APP_NAME_LABEL = "spark-app-name" + private[spark] val SPARK_EXECUTOR_ID_LABEL = "spark-exec-id" + + // Secrets + private[spark] val DRIVER_CONTAINER_SECRETS_BASE_DIR = "/var/run/secrets/spark-submission" + private[spark] val SUBMISSION_APP_SECRET_NAME = "spark-submission-server-secret" + private[spark] val SUBMISSION_APP_SECRET_PREFIX = "spark-submission-server-secret" + private[spark] val SUBMISSION_APP_SECRET_VOLUME_NAME = "spark-submission-secret-volume" + private[spark] val SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME = + "spark-submission-server-key-password" + private[spark] val SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME = + "spark-submission-server-keystore-password" + private[spark] val SUBMISSION_SSL_KEYSTORE_SECRET_NAME = "spark-submission-server-keystore" + private[spark] val SUBMISSION_SSL_SECRETS_PREFIX = "spark-submission-server-ssl" + private[spark] val SUBMISSION_SSL_SECRETS_VOLUME_NAME = "spark-submission-server-ssl-secrets" + + // Default and fixed ports + private[spark] val SUBMISSION_SERVER_PORT = 7077 + private[spark] val DEFAULT_DRIVER_PORT = 7078 + private[spark] val DEFAULT_BLOCKMANAGER_PORT = 7079 + private[spark] val DEFAULT_UI_PORT = 4040 + private[spark] val UI_PORT_NAME = "spark-ui-port" + private[spark] val SUBMISSION_SERVER_PORT_NAME = "submit-server" + private[spark] val BLOCK_MANAGER_PORT_NAME = "blockmanager" + private[spark] val DRIVER_PORT_NAME = "driver" + private[spark] val EXECUTOR_PORT_NAME = "executor" + + // Environment Variables + private[spark] val ENV_SUBMISSION_SECRET_LOCATION = "SPARK_SUBMISSION_SECRET_LOCATION" + private[spark] val ENV_SUBMISSION_SERVER_PORT = "SPARK_SUBMISSION_SERVER_PORT" + private[spark] val ENV_SUBMISSION_KEYSTORE_FILE = "SPARK_SUBMISSION_KEYSTORE_FILE" + private[spark] val ENV_SUBMISSION_KEYSTORE_PASSWORD_FILE = + "SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE" + private[spark] val ENV_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE = + "SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE" + private[spark] val ENV_SUBMISSION_KEYSTORE_TYPE = "SPARK_SUBMISSION_KEYSTORE_TYPE" + private[spark] val ENV_SUBMISSION_USE_SSL = "SPARK_SUBMISSION_USE_SSL" + private[spark] val ENV_EXECUTOR_PORT = "SPARK_EXECUTOR_PORT" + private[spark] val ENV_DRIVER_URL = "SPARK_DRIVER_URL" + private[spark] val ENV_EXECUTOR_CORES = "SPARK_EXECUTOR_CORES" + private[spark] val ENV_EXECUTOR_MEMORY = "SPARK_EXECUTOR_MEMORY" + private[spark] val ENV_APPLICATION_ID = "SPARK_APPLICATION_ID" + private[spark] val ENV_EXECUTOR_ID = "SPARK_EXECUTOR_ID" + + // Miscellaneous + private[spark] val DRIVER_CONTAINER_NAME = "spark-kubernetes-driver" + private[spark] val KUBERNETES_SUBMIT_SSL_NAMESPACE = "kubernetes.submit" +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala index 813d070e0f876..8beba23bc8e11 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala @@ -20,23 +20,22 @@ import com.fasterxml.jackson.annotation.{JsonSubTypes, JsonTypeInfo} import org.apache.spark.SPARK_VERSION -// TODO: jars should probably be compressed. Shipping tarballs would be optimal. case class KubernetesCreateSubmissionRequest( - val appResource: AppResource, - val mainClass: String, - val appArgs: Array[String], - val sparkProperties: Map[String, String], - val secret: String, - val uploadedJarsBase64Contents: Option[TarGzippedData]) extends SubmitRestProtocolRequest { + appResource: AppResource, + mainClass: String, + appArgs: Array[String], + sparkProperties: Map[String, String], + secret: String, + uploadedJarsBase64Contents: Option[TarGzippedData]) extends SubmitRestProtocolRequest { message = "create" clientSparkVersion = SPARK_VERSION } case class TarGzippedData( - val dataBase64: String, - val blockSize: Int = 10240, - val recordSize: Int = 512, - val encoding: String + dataBase64: String, + blockSize: Int = 10240, + recordSize: Int = 512, + encoding: String ) @JsonTypeInfo( diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala index 3cbcb16293b1d..18eb9b7a12ca6 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala @@ -28,12 +28,11 @@ trait KubernetesSparkRestApi { @Consumes(Array(MediaType.APPLICATION_JSON)) @Produces(Array(MediaType.APPLICATION_JSON)) @Path("/create") - def create(request: KubernetesCreateSubmissionRequest): CreateSubmissionResponse + def submitApplication(request: KubernetesCreateSubmissionRequest): CreateSubmissionResponse @GET @Consumes(Array(MediaType.APPLICATION_JSON)) @Produces(Array(MediaType.APPLICATION_JSON)) @Path("/ping") def ping(): PingResponse - } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index dae4b2714b4e4..550ddd113fa42 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -21,17 +21,18 @@ import java.util.concurrent.Executors import java.util.concurrent.atomic.{AtomicInteger, AtomicLong} import com.google.common.util.concurrent.ThreadFactoryBuilder -import io.fabric8.kubernetes.api.model.{ContainerPort, ContainerPortBuilder, EnvVar, EnvVarBuilder, Pod, QuantityBuilder} +import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, Pod, QuantityBuilder} import scala.collection.JavaConverters._ -import scala.collection.mutable.ArrayBuffer import scala.concurrent.{ExecutionContext, Future} import org.apache.spark.{SparkContext, SparkException} import org.apache.spark.deploy.kubernetes.{Client, KubernetesClientBuilder} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.rpc.RpcEndpointAddress import org.apache.spark.scheduler.TaskSchedulerImpl import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend -import org.apache.spark.util.Utils +import org.apache.spark.util.{ThreadUtils, Utils} private[spark] class KubernetesClusterSchedulerBackend( scheduler: TaskSchedulerImpl, @@ -44,24 +45,19 @@ private[spark] class KubernetesClusterSchedulerBackend( private val runningExecutorPods = new scala.collection.mutable.HashMap[String, Pod] private val kubernetesMaster = Client.resolveK8sMaster(sc.master) - - private val executorDockerImage = conf - .get("spark.kubernetes.executor.docker.image", s"spark-executor:${sc.version}") - - private val kubernetesNamespace = conf.get("spark.kubernetes.namespace", "default") - + private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) + private val kubernetesNamespace = conf.get(KUBERNETES_NAMESPACE) private val executorPort = conf.getInt("spark.executor.port", DEFAULT_STATIC_PORT) - private val blockmanagerPort = conf .getInt("spark.blockmanager.port", DEFAULT_BLOCKMANAGER_PORT) private val kubernetesDriverServiceName = conf - .getOption("spark.kubernetes.driver.service.name") + .get(KUBERNETES_DRIVER_SERVICE_NAME) .getOrElse( throw new SparkException("Must specify the service name the driver is running with")) private val kubernetesDriverPodName = conf - .getOption("spark.kubernetes.driver.pod.name") + .get(KUBERNETES_DRIVER_POD_NAME) .getOrElse( throw new SparkException("Must specify the driver pod name")) @@ -69,7 +65,7 @@ private[spark] class KubernetesClusterSchedulerBackend( private val executorMemoryBytes = Utils.byteStringAsBytes(executorMemory) private val memoryOverheadBytes = conf - .getOption("spark.kubernetes.executor.memoryOverhead") + .get(KUBERNETES_EXECUTOR_MEMORY_OVERHEAD) .map(overhead => Utils.byteStringAsBytes(overhead)) .getOrElse(math.max((MEMORY_OVERHEAD_FACTOR * executorMemoryBytes).toInt, MEMORY_OVERHEAD_MIN)) @@ -78,16 +74,12 @@ private[spark] class KubernetesClusterSchedulerBackend( private val executorCores = conf.getOption("spark.executor.cores").getOrElse("1") private implicit val requestExecutorContext = ExecutionContext.fromExecutorService( - Executors.newCachedThreadPool( - new ThreadFactoryBuilder() - .setDaemon(true) - .setNameFormat("kubernetes-executor-requests-%d") - .build)) + ThreadUtils.newDaemonCachedThreadPool("kubernetes-executor-requests")) private val kubernetesClient = KubernetesClientBuilder .buildFromWithinPod(kubernetesMaster, kubernetesNamespace) - val driverPod = try { + private val driverPod = try { kubernetesClient.pods().inNamespace(kubernetesNamespace). withName(kubernetesDriverPodName).get() } catch { @@ -127,6 +119,8 @@ private[spark] class KubernetesClusterSchedulerBackend( } } + override def applicationId(): String = conf.get("spark.app.id", super.applicationId()) + override def sufficientResourcesRegistered(): Boolean = { totalRegisteredExecutors.get() >= initialExecutors * minRegisteredRatio } @@ -163,9 +157,9 @@ private[spark] class KubernetesClusterSchedulerBackend( private def allocateNewExecutorPod(): (String, Pod) = { val executorKubernetesId = UUID.randomUUID().toString.replaceAll("-", "") val executorId = EXECUTOR_ID_COUNTER.incrementAndGet().toString - val name = s"$kubernetesDriverServiceName-exec-$executorKubernetesId" - val selectors = Map(SPARK_EXECUTOR_SELECTOR -> executorId, - SPARK_APP_SELECTOR -> applicationId()).asJava + val name = s"${applicationId()}-exec-$executorKubernetesId" + val selectors = Map(SPARK_EXECUTOR_ID_LABEL -> executorId, + SPARK_APP_ID_LABEL -> applicationId()).asJava val executorMemoryQuantity = new QuantityBuilder(false) .withAmount(executorMemoryBytes.toString) .build() @@ -175,69 +169,61 @@ private[spark] class KubernetesClusterSchedulerBackend( val executorCpuQuantity = new QuantityBuilder(false) .withAmount(executorCores) .build() - val requiredEnv = new ArrayBuffer[EnvVar] - requiredEnv += new EnvVarBuilder() - .withName("SPARK_EXECUTOR_PORT") - .withValue(executorPort.toString) - .build() - requiredEnv += new EnvVarBuilder() - .withName("SPARK_DRIVER_URL") - .withValue(driverUrl) - .build() - requiredEnv += new EnvVarBuilder() - .withName("SPARK_EXECUTOR_CORES") - .withValue(executorCores) - .build() - requiredEnv += new EnvVarBuilder() - .withName("SPARK_EXECUTOR_MEMORY") - .withValue(executorMemory) - .build() - requiredEnv += new EnvVarBuilder() - .withName("SPARK_APPLICATION_ID") - .withValue(applicationId()) - .build() - requiredEnv += new EnvVarBuilder() - .withName("SPARK_EXECUTOR_ID") - .withValue(executorId) - .build() - val requiredPorts = new ArrayBuffer[ContainerPort] - requiredPorts += new ContainerPortBuilder() - .withName(EXECUTOR_PORT_NAME) - .withContainerPort(executorPort) - .build() - requiredPorts += new ContainerPortBuilder() - .withName(BLOCK_MANAGER_PORT_NAME) - .withContainerPort(blockmanagerPort) - .build() - (executorKubernetesId, kubernetesClient.pods().createNew() - .withNewMetadata() - .withName(name) - .withLabels(selectors) - .withOwnerReferences() - .addNewOwnerReference() - .withController(true) - .withApiVersion(driverPod.getApiVersion) - .withKind(driverPod.getKind) - .withName(driverPod.getMetadata.getName) - .withUid(driverPod.getMetadata.getUid) - .endOwnerReference() - .endMetadata() - .withNewSpec() - .addNewContainer() - .withName(s"exec-${applicationId()}-container") - .withImage(executorDockerImage) - .withImagePullPolicy("IfNotPresent") - .withNewResources() - .addToRequests("memory", executorMemoryQuantity) - .addToLimits("memory", executorMemoryLimitQuantity) - .addToRequests("cpu", executorCpuQuantity) - .addToLimits("cpu", executorCpuQuantity) - .endResources() - .withEnv(requiredEnv.asJava) - .withPorts(requiredPorts.asJava) - .endContainer() - .endSpec() - .done()) + val requiredEnv = Seq( + (ENV_EXECUTOR_PORT, executorPort.toString), + (ENV_DRIVER_URL, driverUrl), + (ENV_EXECUTOR_CORES, executorCores), + (ENV_EXECUTOR_MEMORY, executorMemory), + (ENV_APPLICATION_ID, applicationId()), + (ENV_EXECUTOR_ID, executorId) + ).map(env => new EnvVarBuilder() + .withName(env._1) + .withValue(env._2) + .build()) + val requiredPorts = Seq( + (EXECUTOR_PORT_NAME, executorPort), + (BLOCK_MANAGER_PORT_NAME, blockmanagerPort)) + .map(port => { + new ContainerPortBuilder() + .withName(port._1) + .withContainerPort(port._2) + .build() + }) + try { + (executorKubernetesId, kubernetesClient.pods().createNew() + .withNewMetadata() + .withName(name) + .withLabels(selectors) + .withOwnerReferences() + .addNewOwnerReference() + .withController(true) + .withApiVersion(driverPod.getApiVersion) + .withKind(driverPod.getKind) + .withName(driverPod.getMetadata.getName) + .withUid(driverPod.getMetadata.getUid) + .endOwnerReference() + .endMetadata() + .withNewSpec() + .addNewContainer() + .withName(s"executor") + .withImage(executorDockerImage) + .withImagePullPolicy("IfNotPresent") + .withNewResources() + .addToRequests("memory", executorMemoryQuantity) + .addToLimits("memory", executorMemoryLimitQuantity) + .addToRequests("cpu", executorCpuQuantity) + .addToLimits("cpu", executorCpuQuantity) + .endResources() + .withEnv(requiredEnv.asJava) + .withPorts(requiredPorts.asJava) + .endContainer() + .endSpec() + .done()) + } catch { + case throwable: Throwable => + logError("Failed to allocate executor pod.", throwable) + throw throwable + } } override def doRequestTotalExecutors(requestedTotal: Int): Future[Boolean] = Future[Boolean] { @@ -269,13 +255,7 @@ private[spark] class KubernetesClusterSchedulerBackend( } private object KubernetesClusterSchedulerBackend { - private val SPARK_EXECUTOR_SELECTOR = "spark-exec" - private val SPARK_APP_SELECTOR = "spark-app" private val DEFAULT_STATIC_PORT = 10000 - private val DEFAULT_BLOCKMANAGER_PORT = 7079 - private val DEFAULT_DRIVER_PORT = 7078 - private val BLOCK_MANAGER_PORT_NAME = "blockmanager" - private val EXECUTOR_PORT_NAME = "executor" private val MEMORY_OVERHEAD_FACTOR = 0.10 private val MEMORY_OVERHEAD_MIN = 384L private val EXECUTOR_ID_COUNTER = new AtomicLong(0L) diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index 070008fce7410..92fdfb8ac5f41 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -27,6 +27,6 @@ CMD SSL_ARGS="" && \ if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-key-password-file $SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE"; fi && \ exec bin/spark-class org.apache.spark.deploy.rest.kubernetes.KubernetesSparkRestServer \ --hostname $HOSTNAME \ - --port $SPARK_DRIVER_LAUNCHER_SERVER_PORT \ + --port $SPARK_SUBMISSION_SERVER_PORT \ --secret-file $SPARK_SUBMISSION_SECRET_LOCATION \ ${SSL_ARGS} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 13edea02dce9a..16de71118dec4 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -172,7 +172,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .set("spark.executor.memory", "500m") .set("spark.executor.cores", "1") .set("spark.executors.instances", "1") - .set("spark.app.id", "spark-pi") + .set("spark.app.name", "spark-pi") .set("spark.ui.enabled", "true") .set("spark.testing", "false") val mainAppResource = s"file://$EXAMPLES_JAR" @@ -298,11 +298,11 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .getLabels // We can't match all of the selectors directly since one of the selectors is based on the // launch time. - assert(driverPodLabels.size == 4, "Unexpected number of pod labels.") - assert(driverPodLabels.containsKey("driver-launcher-selector"), "Expected driver launcher" + - " selector label to be present.") + assert(driverPodLabels.size == 5, "Unexpected number of pod labels.") assert(driverPodLabels.get("spark-app-name") == "spark-pi", "Unexpected value for" + " spark-app-name label.") + assert(driverPodLabels.get("spark-app-id").startsWith("spark-pi"), "Unexpected value for" + + " spark-app-id label (should be prefixed with the app name).") assert(driverPodLabels.get("label1") == "label1value", "Unexpected value for label1") assert(driverPodLabels.get("label2") == "label2value", "Unexpected value for label2") } @@ -323,12 +323,12 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", - "--conf", "spark.ssl.kubernetes.driverlaunch.enabled=true", - "--conf", "spark.ssl.kubernetes.driverlaunch.keyStore=" + + "--conf", "spark.ssl.kubernetes.submit.enabled=true", + "--conf", "spark.ssl.kubernetes.submit.keyStore=" + s"file://${keyStoreFile.getAbsolutePath}", - "--conf", "spark.ssl.kubernetes.driverlaunch.keyStorePassword=changeit", - "--conf", "spark.ssl.kubernetes.driverlaunch.keyPassword=changeit", - "--conf", "spark.ssl.kubernetes.driverlaunch.trustStore=" + + "--conf", "spark.ssl.kubernetes.submit.keyStorePassword=changeit", + "--conf", "spark.ssl.kubernetes.submit.keyPassword=changeit", + "--conf", "spark.ssl.kubernetes.submit.trustStore=" + s"file://${trustStoreFile.getAbsolutePath}", "--conf", s"spark.ssl.kubernetes.driverlaunch.trustStorePassword=changeit", EXAMPLES_JAR) From 6ee3be5d0497131374f1ee4edab1f5071414892a Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 2 Feb 2017 10:58:15 -0800 Subject: [PATCH 037/156] Retry the submit-application request to multiple nodes (#69) * Retry the submit-application request to multiple nodes. * Fix doc style comment * Check node unschedulable, log retry failures --- .../spark/deploy/kubernetes/Client.scala | 27 ++++---- .../spark/deploy/kubernetes/Retry.scala | 28 +++++--- .../rest/kubernetes/HttpClientUtil.scala | 21 ++++-- .../kubernetes/MultiServerFeignTarget.scala | 67 +++++++++++++++++++ .../integrationtest/minikube/Minikube.scala | 2 +- 5 files changed, 117 insertions(+), 28 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index fed9334dbbab4..715df54e573c3 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -361,11 +361,13 @@ private[spark] class Client( DEFAULT_BLOCKMANAGER_PORT.toString) val driverSubmitter = buildDriverSubmissionClient(kubernetesClient, service, driverSubmitSslOptions) - val ping = Retry.retry(5, 5.seconds) { + val ping = Retry.retry(5, 5.seconds, + Some("Failed to contact the driver server")) { driverSubmitter.ping() } ping onFailure { case t: Throwable => + logError("Ping failed to the driver server", t) submitCompletedFuture.setException(t) kubernetesClient.services().delete(service) } @@ -532,17 +534,6 @@ private[spark] class Client( kubernetesClient: KubernetesClient, service: Service, driverSubmitSslOptions: SSLOptions): KubernetesSparkRestApi = { - val servicePort = service - .getSpec - .getPorts - .asScala - .filter(_.getName == SUBMISSION_SERVER_PORT_NAME) - .head - .getNodePort - // NodePort is exposed on every node, so just pick one of them. - // TODO be resilient to node failures and try all of them - val node = kubernetesClient.nodes.list.getItems.asScala.head - val nodeAddress = node.getStatus.getAddresses.asScala.head.getAddress val urlScheme = if (driverSubmitSslOptions.enabled) { "https" } else { @@ -551,15 +542,23 @@ private[spark] class Client( " to secure this step.") "http" } + val servicePort = service.getSpec.getPorts.asScala + .filter(_.getName == SUBMISSION_SERVER_PORT_NAME) + .head.getNodePort + val nodeUrls = kubernetesClient.nodes.list.getItems.asScala + .filterNot(_.getSpec.getUnschedulable) + .flatMap(_.getStatus.getAddresses.asScala.map(address => { + s"$urlScheme://${address.getAddress}:$servicePort" + })).toArray + require(nodeUrls.nonEmpty, "No nodes found to contact the driver!") val (trustManager, sslContext): (X509TrustManager, SSLContext) = if (driverSubmitSslOptions.enabled) { buildSslConnectionConfiguration(driverSubmitSslOptions) } else { (null, SSLContext.getDefault) } - val url = s"$urlScheme://$nodeAddress:$servicePort" HttpClientUtil.createClient[KubernetesSparkRestApi]( - url, + uris = nodeUrls, sslSocketFactory = sslContext.getSocketFactory, trustContext = trustManager) } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala index e5ce0bcd606b2..378583b29c547 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala @@ -19,24 +19,36 @@ package org.apache.spark.deploy.kubernetes import scala.concurrent.{ExecutionContext, Future} import scala.concurrent.duration.Duration -private[spark] object Retry { +import org.apache.spark.SparkException +import org.apache.spark.internal.Logging + +private[spark] object Retry extends Logging { private def retryableFuture[T] - (times: Int, interval: Duration) + (attempt: Int, maxAttempts: Int, interval: Duration, retryMessage: Option[String]) (f: => Future[T]) (implicit executionContext: ExecutionContext): Future[T] = { f recoverWith { - case _ if times > 0 => { - Thread.sleep(interval.toMillis) - retryableFuture(times - 1, interval)(f) - } + case error: Throwable => + if (attempt <= maxAttempts) { + retryMessage.foreach { message => + logWarning(s"$message - attempt $attempt of $maxAttempts", error) + } + Thread.sleep(interval.toMillis) + retryableFuture(attempt + 1, maxAttempts, interval, retryMessage)(f) + } else { + Future.failed(retryMessage.map(message => + new SparkException(s"$message - reached $maxAttempts attempts," + + s" and aborting task.", error) + ).getOrElse(error)) + } } } def retry[T] - (times: Int, interval: Duration) + (times: Int, interval: Duration, retryMessage: Option[String] = None) (f: => T) (implicit executionContext: ExecutionContext): Future[T] = { - retryableFuture(times, interval)(Future[T] { f }) + retryableFuture(1, times, interval, retryMessage)(Future[T] { f }) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala index eb7d411700829..1cabfbad656eb 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala @@ -20,7 +20,7 @@ import javax.net.ssl.{SSLContext, SSLSocketFactory, X509TrustManager} import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} import com.fasterxml.jackson.module.scala.DefaultScalaModule -import feign.Feign +import feign.{Client, Feign, Request, Response} import feign.Request.Options import feign.jackson.{JacksonDecoder, JacksonEncoder} import feign.jaxrs.JAXRSContract @@ -32,7 +32,7 @@ import org.apache.spark.status.api.v1.JacksonMessageWriter private[spark] object HttpClientUtil { def createClient[T: ClassTag]( - uri: String, + uris: Array[String], sslSocketFactory: SSLSocketFactory = SSLContext.getDefault.getSocketFactory, trustContext: X509TrustManager = null, readTimeoutMillis: Int = 20000, @@ -45,13 +45,24 @@ private[spark] object HttpClientUtil { .registerModule(new DefaultScalaModule) .setDateFormat(JacksonMessageWriter.makeISODateFormat) objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) - val clazz = implicitly[ClassTag[T]].runtimeClass.asInstanceOf[Class[T]] + val target = new MultiServerFeignTarget[T](uris) + val baseHttpClient = new feign.okhttp.OkHttpClient(httpClientBuilder.build()) + val resetTargetHttpClient = new Client { + override def execute(request: Request, options: Options): Response = { + val response = baseHttpClient.execute(request, options) + if (response.status() >= 200 && response.status() < 300) { + target.reset() + } + response + } + } Feign.builder() - .client(new feign.okhttp.OkHttpClient(httpClientBuilder.build())) + .client(resetTargetHttpClient) .contract(new JAXRSContract) .encoder(new JacksonEncoder(objectMapper)) .decoder(new JacksonDecoder(objectMapper)) .options(new Options(connectTimeoutMillis, readTimeoutMillis)) - .target(clazz, uri) + .retryer(target) + .target(target) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala new file mode 100644 index 0000000000000..fea7f057cfa1b --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import feign.{Request, RequestTemplate, RetryableException, Retryer, Target} +import scala.reflect.ClassTag +import scala.util.Random + +private[kubernetes] class MultiServerFeignTarget[T : ClassTag]( + private val servers: Seq[String]) extends Target[T] with Retryer { + require(servers.nonEmpty, "Must provide at least one server URI.") + + private val threadLocalShuffledServers = new ThreadLocal[Seq[String]] { + override def initialValue(): Seq[String] = Random.shuffle(servers) + } + + override def `type`(): Class[T] = { + implicitly[ClassTag[T]].runtimeClass.asInstanceOf[Class[T]] + } + + override def url(): String = threadLocalShuffledServers.get.head + + /** + * Cloning the target is done on every request, for use on the current + * thread - thus it's important that clone returns a "fresh" target. + */ + override def clone(): Retryer = { + reset() + this + } + + override def name(): String = { + s"${getClass.getSimpleName} with servers [${servers.mkString(",")}]" + } + + override def apply(requestTemplate: RequestTemplate): Request = { + if (!requestTemplate.url().startsWith("http")) { + requestTemplate.insert(0, url()) + } + requestTemplate.request() + } + + override def continueOrPropagate(e: RetryableException): Unit = { + threadLocalShuffledServers.set(threadLocalShuffledServers.get.drop(1)) + if (threadLocalShuffledServers.get.isEmpty) { + throw e + } + } + + def reset(): Unit = { + threadLocalShuffledServers.set(Random.shuffle(servers)) + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala index 60c6564579a6e..b42f97952394e 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala @@ -123,7 +123,7 @@ private[spark] object Minikube extends Logging { .build() val sslContext = SSLUtils.sslContext(kubernetesConf) val trustManager = SSLUtils.trustManagers(kubernetesConf)(0).asInstanceOf[X509TrustManager] - HttpClientUtil.createClient[T](url, sslContext.getSocketFactory, trustManager) + HttpClientUtil.createClient[T](Array(url), sslContext.getSocketFactory, trustManager) } def executeMinikubeSsh(command: String): Unit = { From d0f95dbec40fb686b42b8371ce532f12d69662c0 Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 2 Feb 2017 12:22:54 -0800 Subject: [PATCH 038/156] Allow adding arbitrary files (#71) * Allow adding arbitrary files * Address comments and add documentation --- .../org/apache/spark/deploy/SparkSubmit.scala | 2 + .../spark/deploy/SparkSubmitArguments.scala | 7 ++ docs/running-on-kubernetes.md | 12 ++- .../launcher/SparkSubmitOptionParser.java | 4 +- .../spark/deploy/kubernetes/Client.scala | 34 ++++++-- .../spark/deploy/kubernetes/config.scala | 16 +++- .../rest/KubernetesRestProtocolMessages.scala | 3 +- .../rest/kubernetes/CompressionUtils.scala | 4 +- .../KubernetesSparkRestServer.scala | 53 ++++++++---- .../jobs/FileExistenceTest.scala | 54 ++++++++++++ .../integrationtest/KubernetesSuite.scala | 85 +++++++++++++++++-- .../integration-tests/test-data/input.txt | 1 + 12 files changed, 243 insertions(+), 32 deletions(-) create mode 100644 resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/FileExistenceTest.scala create mode 100644 resource-managers/kubernetes/integration-tests/test-data/input.txt diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index db6ec22ca919f..51eb23560defe 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -474,6 +474,8 @@ object SparkSubmit { sysProp = "spark.kubernetes.namespace"), OptionAssigner(args.kubernetesUploadJars, KUBERNETES, CLUSTER, sysProp = "spark.kubernetes.driver.uploads.jars"), + OptionAssigner(args.kubernetesUploadFiles, KUBERNETES, CLUSTER, + sysProp = "spark.kubernetes.driver.uploads.files"), // Other options OptionAssigner(args.executorCores, STANDALONE | YARN, ALL_DEPLOY_MODES, diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index ae1bee7ee4d14..f771755244f31 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -74,6 +74,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S // Kubernetes only var kubernetesNamespace: String = null var kubernetesUploadJars: String = null + var kubernetesUploadFiles: String = null // Standalone cluster mode only var supervise: Boolean = false @@ -196,6 +197,9 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S kubernetesUploadJars = Option(kubernetesUploadJars) .orElse(sparkProperties.get("spark.kubernetes.driver.uploads.jars")) .orNull + kubernetesUploadFiles = Option(kubernetesUploadFiles) + .orElse(sparkProperties.get("spark.kubernetes.driver.uploads.files")) + .orNull // Try to set main class from JAR if no --class argument is given if (mainClass == null && !isPython && !isR && primaryResource != null) { @@ -442,6 +446,9 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S case KUBERNETES_UPLOAD_JARS => kubernetesUploadJars = value + case KUBERNETES_UPLOAD_FILES => + kubernetesUploadFiles = value + case HELP => printUsageAndExit(0) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index e256535fbbc9d..5a48bb254a6df 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -217,10 +217,20 @@ from the other deployment modes. See the [configuration page](configuration.html spark.kubernetes.driver.uploads.jars (none) - Comma-separated list of jars to sent to the driver and all executors when submitting the application in cluster + Comma-separated list of jars to send to the driver and all executors when submitting the application in cluster mode. Refer to adding other jars for more information. + + spark.kubernetes.driver.uploads.files + (none) + + Comma-separated list of files to send to the driver and all executors when submitting the application in cluster + mode. The files are added in a flat hierarchy to the current working directory of the driver, having the same + names as the names of the original files. Note that two files with the same name cannot be added, even if they + were in different source directories on the client disk. + + spark.kubernetes.executor.memoryOverhead executorMemory * 0.10, with minimum of 384 diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java index f1dac20f52f0d..3369b5d8301be 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java @@ -80,6 +80,7 @@ class SparkSubmitOptionParser { protected final String KUBERNETES_MASTER = "--kubernetes-master"; protected final String KUBERNETES_NAMESPACE = "--kubernetes-namespace"; protected final String KUBERNETES_UPLOAD_JARS = "--upload-jars"; + protected final String KUBERNETES_UPLOAD_FILES = "--upload-files"; /** * This is the canonical list of spark-submit options. Each entry in the array contains the @@ -122,7 +123,8 @@ class SparkSubmitOptionParser { { TOTAL_EXECUTOR_CORES }, { KUBERNETES_MASTER }, { KUBERNETES_NAMESPACE }, - { KUBERNETES_UPLOAD_JARS } + { KUBERNETES_UPLOAD_JARS }, + { KUBERNETES_UPLOAD_FILES } }; /** diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 715df54e573c3..c350c4817664d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -61,7 +61,9 @@ private[spark] class Client( private val sslSecretsDirectory = s"$DRIVER_CONTAINER_SECRETS_BASE_DIR/$kubernetesAppId-ssl" private val sslSecretsName = s"$SUBMISSION_SSL_SECRETS_PREFIX-$kubernetesAppId" private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) - private val uploadedJars = sparkConf.get(KUBERNETES_DRIVER_UPLOAD_JARS) + private val uploadedJars = sparkConf.get(KUBERNETES_DRIVER_UPLOAD_JARS).filter(_.nonEmpty) + private val uploadedFiles = sparkConf.get(KUBERNETES_DRIVER_UPLOAD_FILES).filter(_.nonEmpty) + uploadedFiles.foreach(validateNoDuplicateUploadFileNames) private val uiPort = sparkConf.getInt("spark.ui.port", DEFAULT_UI_PORT) private val driverSubmitTimeoutSecs = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_TIMEOUT) @@ -513,18 +515,40 @@ private[spark] class Client( case "container" => ContainerAppResource(appResourceUri.getPath) case other => RemoteAppResource(other) } - - val uploadJarsBase64Contents = compressJars(uploadedJars) + val uploadJarsBase64Contents = compressFiles(uploadedJars) + val uploadFilesBase64Contents = compressFiles(uploadedFiles) KubernetesCreateSubmissionRequest( appResource = resolvedAppResource, mainClass = mainClass, appArgs = appArgs, secret = secretBase64String, sparkProperties = sparkConf.getAll.toMap, - uploadedJarsBase64Contents = uploadJarsBase64Contents) + uploadedJarsBase64Contents = uploadJarsBase64Contents, + uploadedFilesBase64Contents = uploadFilesBase64Contents) + } + + // Because uploaded files should be added to the working directory of the driver, they + // need to not have duplicate file names. They are added to the working directory so the + // user can reliably locate them in their application. This is similar in principle to how + // YARN handles its `spark.files` setting. + private def validateNoDuplicateUploadFileNames(uploadedFilesCommaSeparated: String): Unit = { + val pathsWithDuplicateNames = uploadedFilesCommaSeparated + .split(",") + .groupBy(new File(_).getName) + .filter(_._2.length > 1) + if (pathsWithDuplicateNames.nonEmpty) { + val pathsWithDuplicateNamesSorted = pathsWithDuplicateNames + .values + .flatten + .toList + .sortBy(new File(_).getName) + throw new SparkException("Cannot upload files with duplicate names via" + + s" ${KUBERNETES_DRIVER_UPLOAD_FILES.key}. The following paths have a duplicated" + + s" file name: ${pathsWithDuplicateNamesSorted.mkString(",")}") + } } - private def compressJars(maybeFilePaths: Option[String]): Option[TarGzippedData] = { + private def compressFiles(maybeFilePaths: Option[String]): Option[TarGzippedData] = { maybeFilePaths .map(_.split(",")) .map(CompressionUtils.createTarGzip(_)) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 9b145370f87d6..3e0c400febca1 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -94,13 +94,27 @@ package object config { private[spark] val KUBERNETES_DRIVER_UPLOAD_JARS = ConfigBuilder("spark.kubernetes.driver.uploads.jars") .doc(""" - | Comma-separated list of jars to sent to the driver and + | Comma-separated list of jars to send to the driver and | all executors when submitting the application in cluster | mode. """.stripMargin) .stringConf .createOptional + private[spark] val KUBERNETES_DRIVER_UPLOAD_FILES = + ConfigBuilder("spark.kubernetes.driver.uploads.files") + .doc(""" + | Comma-separated list of files to send to the driver and + | all executors when submitting the application in cluster + | mode. The files are added in a flat hierarchy to the + | current working directory of the driver, having the same + | names as the names of the original files. Note that two + | files with the same name cannot be added, even if they + | were in different source directories on the client disk. + """.stripMargin) + .stringConf + .createOptional + // Note that while we set a default for this when we start up the // scheduler, the specific default value is dynamically determined // based on the executor memory. diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala index 8beba23bc8e11..6aeb851a16bf4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala @@ -26,7 +26,8 @@ case class KubernetesCreateSubmissionRequest( appArgs: Array[String], sparkProperties: Map[String, String], secret: String, - uploadedJarsBase64Contents: Option[TarGzippedData]) extends SubmitRestProtocolRequest { + uploadedJarsBase64Contents: Option[TarGzippedData], + uploadedFilesBase64Contents: Option[TarGzippedData]) extends SubmitRestProtocolRequest { message = "create" clientSparkVersion = SPARK_VERSION } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala index 1c95dacc7eb01..7204cb874aaec 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala @@ -68,8 +68,8 @@ private[spark] object CompressionUtils extends Logging { while (usedFileNames.contains(resolvedFileName)) { val oldResolvedFileName = resolvedFileName resolvedFileName = s"$nameWithoutExtension-$deduplicationCounter.$extension" - logWarning(s"File with name $oldResolvedFileName already exists. Trying to add with" + - s" file name $resolvedFileName instead.") + logWarning(s"File with name $oldResolvedFileName already exists. Trying to add" + + s" with file name $resolvedFileName instead.") deduplicationCounter += 1 } usedFileNames += resolvedFileName diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala index 451dc96dd65ed..c5a7e27b15927 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala @@ -18,6 +18,7 @@ package org.apache.spark.deploy.rest.kubernetes import java.io.File import java.net.URI +import java.nio.file.Paths import java.util.concurrent.CountDownLatch import javax.servlet.http.{HttpServletRequest, HttpServletResponse} @@ -27,7 +28,7 @@ import org.apache.commons.codec.binary.Base64 import scala.collection.mutable import scala.collection.mutable.ArrayBuffer -import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SparkException, SSLOptions} +import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SSLOptions} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.rest._ import org.apache.spark.util.{ShutdownHookManager, ThreadUtils, Utils} @@ -149,7 +150,8 @@ private[spark] class KubernetesSparkRestServer( appArgs, sparkProperties, secret, - uploadedJars) => + uploadedJars, + uploadedFiles) => val decodedSecret = Base64.decodeBase64(secret) if (!expectedApplicationSecret.sameElements(decodedSecret)) { responseServlet.setStatus(HttpServletResponse.SC_UNAUTHORIZED) @@ -157,29 +159,33 @@ private[spark] class KubernetesSparkRestServer( } else { val tempDir = Utils.createTempDir() val appResourcePath = resolvedAppResource(appResource, tempDir) - val jarsDirectory = new File(tempDir, "jars") - if (!jarsDirectory.mkdir) { - throw new IllegalStateException("Failed to create jars dir at" + - s"${jarsDirectory.getAbsolutePath}") - } - val writtenJars = writeBase64ContentsToFiles(uploadedJars, jarsDirectory) - val driverExtraClasspath = sparkProperties - .get("spark.driver.extraClassPath") - .map(_.split(",")) - .getOrElse(Array.empty[String]) + val writtenJars = writeUploadedJars(uploadedJars, tempDir) + val writtenFiles = writeUploadedFiles(uploadedFiles) + val resolvedSparkProperties = new mutable.HashMap[String, String] + resolvedSparkProperties ++= sparkProperties + + // Resolve driver classpath and jars val originalJars = sparkProperties.get("spark.jars") .map(_.split(",")) .getOrElse(Array.empty[String]) val resolvedJars = writtenJars ++ originalJars ++ Array(appResourcePath) val sparkJars = new File(sparkHome, "jars").listFiles().map(_.getAbsolutePath) + val driverExtraClasspath = sparkProperties + .get("spark.driver.extraClassPath") + .map(_.split(",")) + .getOrElse(Array.empty[String]) val driverClasspath = driverExtraClasspath ++ resolvedJars ++ - sparkJars ++ - Array(appResourcePath) - val resolvedSparkProperties = new mutable.HashMap[String, String] - resolvedSparkProperties ++= sparkProperties + sparkJars resolvedSparkProperties("spark.jars") = resolvedJars.mkString(",") + // Resolve spark.files + val originalFiles = sparkProperties.get("spark.files") + .map(_.split(",")) + .getOrElse(Array.empty[String]) + val resolvedFiles = originalFiles ++ writtenFiles + resolvedSparkProperties("spark.files") = resolvedFiles.mkString(",") + val command = new ArrayBuffer[String] command += javaExecutable command += "-cp" @@ -229,6 +235,21 @@ private[spark] class KubernetesSparkRestServer( } } + private def writeUploadedJars(files: Option[TarGzippedData], rootTempDir: File): + Seq[String] = { + val resolvedDirectory = new File(rootTempDir, "jars") + if (!resolvedDirectory.mkdir()) { + throw new IllegalStateException(s"Failed to create jars dir at " + + resolvedDirectory.getAbsolutePath) + } + writeBase64ContentsToFiles(files, resolvedDirectory) + } + + private def writeUploadedFiles(files: Option[TarGzippedData]): Seq[String] = { + val workingDir = Paths.get("").toFile.getAbsoluteFile + writeBase64ContentsToFiles(files, workingDir) + } + def resolvedAppResource(appResource: AppResource, tempDir: File): String = { val appResourcePath = appResource match { case UploadedAppResource(resourceContentsBase64, resourceName) => diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/FileExistenceTest.scala b/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/FileExistenceTest.scala new file mode 100644 index 0000000000000..8b8d5e05f6479 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/FileExistenceTest.scala @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest.jobs + +import java.nio.file.Paths + +import com.google.common.base.Charsets +import com.google.common.io.Files + +import org.apache.spark.SparkException +import org.apache.spark.sql.SparkSession + +private[spark] object FileExistenceTest { + + def main(args: Array[String]): Unit = { + if (args.length < 2) { + throw new IllegalArgumentException("Usage: WordCount ") + } + // Can't use SparkContext.textFile since the file is local to the driver + val file = Paths.get(args(0)).toFile + if (!file.exists()) { + throw new SparkException(s"Failed to find file at ${file.getAbsolutePath}") + } else { + // scalastyle:off println + val contents = Files.toString(file, Charsets.UTF_8) + if (args(1) != contents) { + throw new SparkException(s"Contents do not match. Expected: ${args(1)}," + + s" actual, $contents") + } else { + println(s"File found at ${file.getAbsolutePath} with correct contents.") + } + // scalastyle:on println + } + val spark = SparkSession.builder() + .appName("Test") + .getOrCreate() + spark.stop() + } + +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 16de71118dec4..40867c40d4474 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -21,7 +21,9 @@ import java.nio.file.Paths import java.util.UUID import java.util.concurrent.TimeUnit +import com.google.common.base.Charsets import com.google.common.collect.ImmutableList +import com.google.common.io.Files import com.google.common.util.concurrent.SettableFuture import io.fabric8.kubernetes.api.model.Pod import io.fabric8.kubernetes.client.{Config, KubernetesClient, KubernetesClientException, Watcher} @@ -62,10 +64,14 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .getOrElse(throw new IllegalStateException("Expected to find spark-examples jar; was the" + " pre-integration-test phase run?")) + private val TEST_EXISTENCE_FILE = Paths.get("test-data", "input.txt").toFile + private val TEST_EXISTENCE_FILE_CONTENTS = Files.toString(TEST_EXISTENCE_FILE, Charsets.UTF_8) private val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) private val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) - private val MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + + private val SPARK_PI_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + ".integrationtest.jobs.SparkPiWithInfiniteWait" + private val FILE_EXISTENCE_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + + ".integrationtest.jobs.FileExistenceTest" private val NAMESPACE = UUID.randomUUID().toString.replaceAll("-", "") private var minikubeKubernetesClient: KubernetesClient = _ private var clientConfig: Config = _ @@ -179,7 +185,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { new Client( sparkConf = sparkConf, - mainClass = MAIN_CLASS, + mainClass = SPARK_PI_MAIN_CLASS, mainAppResource = mainAppResource, appArgs = Array.empty[String]).run() val sparkMetricsService = getSparkMetricsService("spark-pi") @@ -196,7 +202,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--executor-cores", "1", "--num-executors", "1", "--upload-jars", HELPER_JAR, - "--class", MAIN_CLASS, + "--class", SPARK_PI_MAIN_CLASS, "--conf", "spark.ui.enabled=true", "--conf", "spark.testing=false", "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", @@ -279,7 +285,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--executor-cores", "1", "--num-executors", "1", "--upload-jars", HELPER_JAR, - "--class", MAIN_CLASS, + "--class", SPARK_PI_MAIN_CLASS, "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", @@ -317,7 +323,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--executor-cores", "1", "--num-executors", "1", "--upload-jars", HELPER_JAR, - "--class", MAIN_CLASS, + "--class", SPARK_PI_MAIN_CLASS, "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", @@ -334,4 +340,73 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { EXAMPLES_JAR) SparkSubmit.main(args) } + + test("Added files should exist on the driver.") { + val args = Array( + "--master", s"k8s://https://${Minikube.getMinikubeIp}:8443", + "--deploy-mode", "cluster", + "--kubernetes-namespace", NAMESPACE, + "--name", "spark-file-existence-test", + "--executor-memory", "512m", + "--executor-cores", "1", + "--num-executors", "1", + "--upload-jars", HELPER_JAR, + "--upload-files", TEST_EXISTENCE_FILE.getAbsolutePath, + "--class", FILE_EXISTENCE_MAIN_CLASS, + "--conf", "spark.ui.enabled=false", + "--conf", "spark.testing=true", + "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", + "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", + "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", + "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", + "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", + EXAMPLES_JAR, + TEST_EXISTENCE_FILE.getName, + TEST_EXISTENCE_FILE_CONTENTS) + val podCompletedFuture = SettableFuture.create[Boolean] + val watch = new Watcher[Pod] { + override def eventReceived(action: Action, pod: Pod): Unit = { + val containerStatuses = pod.getStatus.getContainerStatuses.asScala + val allSuccessful = containerStatuses.nonEmpty && containerStatuses + .forall(status => { + status.getState.getTerminated != null && status.getState.getTerminated.getExitCode == 0 + }) + if (allSuccessful) { + podCompletedFuture.set(true) + } else { + val failedContainers = containerStatuses.filter(container => { + container.getState.getTerminated != null && + container.getState.getTerminated.getExitCode != 0 + }) + if (failedContainers.nonEmpty) { + podCompletedFuture.setException(new SparkException( + "One or more containers in the driver failed with a nonzero exit code.")) + } + } + } + + override def onClose(e: KubernetesClientException): Unit = { + logWarning("Watch closed", e) + } + } + Utils.tryWithResource(minikubeKubernetesClient + .pods + .withLabel("spark-app-name", "spark-file-existence-test") + .watch(watch)) { _ => + SparkSubmit.main(args) + assert(podCompletedFuture.get, "Failed to run driver pod") + val driverPod = minikubeKubernetesClient + .pods + .withLabel("spark-app-name", "spark-file-existence-test") + .list() + .getItems + .get(0) + val podLog = minikubeKubernetesClient + .pods + .withName(driverPod.getMetadata.getName) + .getLog + assert(podLog.contains(s"File found at /opt/spark/${TEST_EXISTENCE_FILE.getName}" + + s" with correct contents."), "Job did not find the file as expected.") + } + } } diff --git a/resource-managers/kubernetes/integration-tests/test-data/input.txt b/resource-managers/kubernetes/integration-tests/test-data/input.txt new file mode 100644 index 0000000000000..dfe437bdebebc --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/test-data/input.txt @@ -0,0 +1 @@ +Contents From de9a82e95687c753450afd535a5365f7c53b5a72 Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Thu, 2 Feb 2017 15:13:39 -0800 Subject: [PATCH 039/156] Fix NPE around unschedulable pod specs (#79) --- .../main/scala/org/apache/spark/deploy/kubernetes/Client.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index c350c4817664d..bef5a605f173b 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -570,7 +570,8 @@ private[spark] class Client( .filter(_.getName == SUBMISSION_SERVER_PORT_NAME) .head.getNodePort val nodeUrls = kubernetesClient.nodes.list.getItems.asScala - .filterNot(_.getSpec.getUnschedulable) + .filterNot(node => node.getSpec.getUnschedulable != null && + node.getSpec.getUnschedulable) .flatMap(_.getStatus.getAddresses.asScala.map(address => { s"$urlScheme://${address.getAddress}:$servicePort" })).toArray From fae76a07b9357bed889d5fe66bd80d6cca2c2e87 Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Thu, 2 Feb 2017 17:34:15 -0800 Subject: [PATCH 040/156] Introduce blocking submit to kubernetes by default (#53) * Introduce blocking submit to kubernetes by default Two new configuration settings: - spark.kubernetes.submit.waitAppCompletion - spark.kubernetes.report.interval * Minor touchups * More succinct logging for pod state * Fix import order * Switch to watch-based logging * Spaces in comma-joined volumes, labels, and containers * Use CountDownLatch instead of SettableFuture * Match parallel ConfigBuilder style * Disable logging in fire-and-forget mode Which is enabled with spark.kubernetes.submit.waitAppCompletion=false (default: true) * Additional log line for when application is launched * Minor wording changes * More logging * Drop log to DEBUG --- .../spark/deploy/kubernetes/Client.scala | 154 +++++++++++------- .../kubernetes/LoggingPodStatusWatcher.scala | 114 +++++++++++++ .../spark/deploy/kubernetes/config.scala | 19 +++ 3 files changed, 225 insertions(+), 62 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index bef5a605f173b..433c45d51fd6b 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -18,7 +18,7 @@ package org.apache.spark.deploy.kubernetes import java.io.{File, FileInputStream} import java.security.{KeyStore, SecureRandom} -import java.util.concurrent.{TimeoutException, TimeUnit} +import java.util.concurrent.{CountDownLatch, TimeoutException, TimeUnit} import java.util.concurrent.atomic.AtomicBoolean import javax.net.ssl.{SSLContext, TrustManagerFactory, X509TrustManager} @@ -26,7 +26,7 @@ import com.google.common.base.Charsets import com.google.common.io.Files import com.google.common.util.concurrent.SettableFuture import io.fabric8.kubernetes.api.model._ -import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient, KubernetesClient, KubernetesClientException, Watcher} +import io.fabric8.kubernetes.client.{ConfigBuilder => K8SConfigBuilder, DefaultKubernetesClient, KubernetesClient, KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action import org.apache.commons.codec.binary.Base64 import scala.collection.JavaConverters._ @@ -67,6 +67,8 @@ private[spark] class Client( private val uiPort = sparkConf.getInt("spark.ui.port", DEFAULT_UI_PORT) private val driverSubmitTimeoutSecs = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_TIMEOUT) + private val waitForAppCompletion: Boolean = sparkConf.get(WAIT_FOR_APP_COMPLETION) + private val secretBase64String = { val secretBytes = new Array[Byte](128) SECURE_RANDOM.nextBytes(secretBytes) @@ -81,9 +83,11 @@ private[spark] class Client( ThreadUtils.newDaemonSingleThreadExecutor("kubernetes-client-retryable-futures")) def run(): Unit = { + logInfo(s"Starting application $kubernetesAppId in Kubernetes...") val (driverSubmitSslOptions, isKeyStoreLocalFile) = parseDriverSubmitSslOptions() + val parsedCustomLabels = parseCustomLabels(customLabels) - var k8ConfBuilder = new ConfigBuilder() + var k8ConfBuilder = new K8SConfigBuilder() .withApiVersion("v1") .withMasterUrl(master) .withNamespace(namespace) @@ -116,73 +120,97 @@ private[spark] class Client( SPARK_APP_NAME_LABEL -> appName) ++ parsedCustomLabels).asJava val containerPorts = buildContainerPorts() - val submitCompletedFuture = SettableFuture.create[Boolean] - val submitPending = new AtomicBoolean(false) - val podWatcher = new DriverPodWatcher( - submitCompletedFuture, - submitPending, - kubernetesClient, - driverSubmitSslOptions, - Array(submitServerSecret) ++ sslSecrets, - driverKubernetesSelectors) + + // start outer watch for status logging of driver pod + val driverPodCompletedLatch = new CountDownLatch(1) + // only enable interval logging if in waitForAppCompletion mode + val loggingInterval = if (waitForAppCompletion) sparkConf.get(REPORT_INTERVAL) else 0 + val loggingWatch = new LoggingPodStatusWatcher(driverPodCompletedLatch, kubernetesAppId, + loggingInterval) Utils.tryWithResource(kubernetesClient .pods() .withLabels(driverKubernetesSelectors) - .watch(podWatcher)) { _ => - kubernetesClient.pods().createNew() - .withNewMetadata() - .withName(kubernetesAppId) + .watch(loggingWatch)) { _ => + + // launch driver pod with inner watch to upload jars when it's ready + val submitCompletedFuture = SettableFuture.create[Boolean] + val submitPending = new AtomicBoolean(false) + val podWatcher = new DriverPodWatcher( + submitCompletedFuture, + submitPending, + kubernetesClient, + driverSubmitSslOptions, + Array(submitServerSecret) ++ sslSecrets, + driverKubernetesSelectors) + Utils.tryWithResource(kubernetesClient + .pods() .withLabels(driverKubernetesSelectors) - .endMetadata() - .withNewSpec() - .withRestartPolicy("OnFailure") - .addNewVolume() - .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) - .withNewSecret() - .withSecretName(submitServerSecret.getMetadata.getName) - .endSecret() - .endVolume - .addToVolumes(sslVolumes: _*) - .withServiceAccount(serviceAccount) - .addNewContainer() - .withName(DRIVER_CONTAINER_NAME) - .withImage(driverDockerImage) - .withImagePullPolicy("IfNotPresent") - .addNewVolumeMount() + .watch(podWatcher)) { _ => + kubernetesClient.pods().createNew() + .withNewMetadata() + .withName(kubernetesAppId) + .withLabels(driverKubernetesSelectors) + .endMetadata() + .withNewSpec() + .withRestartPolicy("OnFailure") + .addNewVolume() .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) - .withMountPath(secretDirectory) - .withReadOnly(true) - .endVolumeMount() - .addToVolumeMounts(sslVolumeMounts: _*) - .addNewEnv() - .withName(ENV_SUBMISSION_SECRET_LOCATION) - .withValue(s"$secretDirectory/$SUBMISSION_APP_SECRET_NAME") - .endEnv() - .addNewEnv() - .withName(ENV_SUBMISSION_SERVER_PORT) - .withValue(SUBMISSION_SERVER_PORT.toString) - .endEnv() - .addToEnv(sslEnvs: _*) - .withPorts(containerPorts.asJava) - .endContainer() - .endSpec() - .done() - var submitSucceeded = false - try { - submitCompletedFuture.get(driverSubmitTimeoutSecs, TimeUnit.SECONDS) - submitSucceeded = true - } catch { - case e: TimeoutException => - val finalErrorMessage: String = buildSubmitFailedErrorMessage(kubernetesClient, e) - logError(finalErrorMessage, e) - throw new SparkException(finalErrorMessage, e) - } finally { - if (!submitSucceeded) { - Utils.tryLogNonFatalError { - kubernetesClient.pods.withName(kubernetesAppId).delete() + .withNewSecret() + .withSecretName(submitServerSecret.getMetadata.getName) + .endSecret() + .endVolume + .addToVolumes(sslVolumes: _*) + .withServiceAccount(serviceAccount) + .addNewContainer() + .withName(DRIVER_CONTAINER_NAME) + .withImage(driverDockerImage) + .withImagePullPolicy("IfNotPresent") + .addNewVolumeMount() + .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) + .withMountPath(secretDirectory) + .withReadOnly(true) + .endVolumeMount() + .addToVolumeMounts(sslVolumeMounts: _*) + .addNewEnv() + .withName(ENV_SUBMISSION_SECRET_LOCATION) + .withValue(s"$secretDirectory/$SUBMISSION_APP_SECRET_NAME") + .endEnv() + .addNewEnv() + .withName(ENV_SUBMISSION_SERVER_PORT) + .withValue(SUBMISSION_SERVER_PORT.toString) + .endEnv() + .addToEnv(sslEnvs: _*) + .withPorts(containerPorts.asJava) + .endContainer() + .endSpec() + .done() + var submitSucceeded = false + try { + submitCompletedFuture.get(driverSubmitTimeoutSecs, TimeUnit.SECONDS) + submitSucceeded = true + logInfo(s"Finished launching local resources to application $kubernetesAppId") + } catch { + case e: TimeoutException => + val finalErrorMessage: String = buildSubmitFailedErrorMessage(kubernetesClient, e) + logError(finalErrorMessage, e) + throw new SparkException(finalErrorMessage, e) + } finally { + if (!submitSucceeded) { + Utils.tryLogNonFatalError { + kubernetesClient.pods.withName(kubernetesAppId).delete() + } } } } + + // wait if configured to do so + if (waitForAppCompletion) { + logInfo(s"Waiting for application $kubernetesAppId to finish...") + driverPodCompletedLatch.await() + logInfo(s"Application $kubernetesAppId finished.") + } else { + logInfo(s"Application $kubernetesAppId successfully launched.") + } } } finally { Utils.tryLogNonFatalError { @@ -377,6 +405,8 @@ private[spark] class Client( Future { sparkConf.set("spark.driver.host", pod.getStatus.getPodIP) val submitRequest = buildSubmissionRequest() + logInfo(s"Submitting local resources to driver pod for application " + + s"$kubernetesAppId ...") driverSubmitter.submitApplication(submitRequest) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala new file mode 100644 index 0000000000000..cbacaf6bda854 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import java.util.concurrent.{CountDownLatch, Executors, TimeUnit} + +import scala.collection.JavaConverters._ + +import io.fabric8.kubernetes.api.model.Pod +import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} +import io.fabric8.kubernetes.client.Watcher.Action + +import org.apache.spark.internal.Logging + +/** + * A monitor for the running Kubernetes pod of a Spark application. Status logging occurs on + * every state change and also at an interval for liveness. + * + * @param podCompletedFuture a CountDownLatch that is set to true when the watched pod finishes + * @param appId + * @param interval ms between each state request. If set to 0 or a negative number, the periodic + * logging will be disabled. + */ +private[kubernetes] class LoggingPodStatusWatcher(podCompletedFuture: CountDownLatch, + appId: String, + interval: Long) + extends Watcher[Pod] with Logging { + + // start timer for periodic logging + private val scheduler = Executors.newScheduledThreadPool(1) + private val logRunnable: Runnable = new Runnable { + override def run() = logShortStatus() + } + if (interval > 0) { + scheduler.scheduleWithFixedDelay(logRunnable, 0, interval, TimeUnit.MILLISECONDS) + } + + private var pod: Option[Pod] = Option.empty + private var prevPhase: String = null + private def phase: String = pod.map(_.getStatus().getPhase()).getOrElse("unknown") + + override def eventReceived(action: Action, pod: Pod): Unit = { + this.pod = Option(pod) + + logShortStatus() + if (prevPhase != phase) { + logLongStatus() + } + prevPhase = phase + + if (phase == "Succeeded" || phase == "Failed") { + podCompletedFuture.countDown() + } + } + + override def onClose(e: KubernetesClientException): Unit = { + scheduler.shutdown() + logDebug(s"Stopped watching application $appId with last-observed phase $phase") + } + + private def logShortStatus() = { + logInfo(s"Application status for $appId (phase: $phase)") + } + + private def logLongStatus() = { + logInfo("Phase changed, new state: " + pod.map(formatPodState(_)).getOrElse("unknown")) + } + + private def formatPodState(pod: Pod): String = { + + val details = Seq[(String, String)]( + // pod metadata + ("pod name", pod.getMetadata.getName()), + ("namespace", pod.getMetadata.getNamespace()), + ("labels", pod.getMetadata.getLabels().asScala.mkString(", ")), + ("pod uid", pod.getMetadata.getUid), + ("creation time", pod.getMetadata.getCreationTimestamp()), + + // spec details + ("service account name", pod.getSpec.getServiceAccountName()), + ("volumes", pod.getSpec.getVolumes().asScala.map(_.getName).mkString(", ")), + ("node name", pod.getSpec.getNodeName()), + + // status + ("start time", pod.getStatus.getStartTime), + ("container images", + pod.getStatus.getContainerStatuses() + .asScala + .map(_.getImage) + .mkString(", ")), + ("phase", pod.getStatus.getPhase()) + ) + + // Use more loggable format if value is null or empty + details.map { case (k, v) => + val newValue = Option(v).filter(_.nonEmpty).getOrElse("N/A") + s"\n\t $k: $newValue" + }.mkString("") + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 3e0c400febca1..cb4cd42142ca4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -188,4 +188,23 @@ package object config { .internal() .stringConf .createOptional + + private[spark] val WAIT_FOR_APP_COMPLETION = + ConfigBuilder("spark.kubernetes.submit.waitAppCompletion") + .doc( + """ + | In cluster mode, whether to wait for the application to finish before exiting the + | launcher process. + """.stripMargin) + .booleanConf + .createWithDefault(true) + + private[spark] val REPORT_INTERVAL = + ConfigBuilder("spark.kubernetes.report.interval") + .doc( + """ + | Interval between reports of the current app status in cluster mode. + """.stripMargin) + .timeConf(TimeUnit.MILLISECONDS) + .createWithDefaultString("1s") } From 4bc7c523d5c102ae4fbb55c624cb56efbf4dd3da Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Fri, 3 Feb 2017 19:40:32 +0000 Subject: [PATCH 041/156] Do not wait for pod finishing in integration tests. (#84) Since the example job are patched to never finish. --- .../deploy/kubernetes/integrationtest/KubernetesSuite.scala | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 40867c40d4474..c5458eccf830d 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -181,6 +181,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .set("spark.app.name", "spark-pi") .set("spark.ui.enabled", "true") .set("spark.testing", "false") + .set("spark.kubernetes.submit.waitAppCompletion", "false") val mainAppResource = s"file://$EXAMPLES_JAR" new Client( @@ -210,6 +211,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", + "--conf", "spark.kubernetes.submit.waitAppCompletion=false", EXAMPLES_JAR) SparkSubmit.main(args) val sparkMetricsService = getSparkMetricsService("spark-pi") @@ -231,6 +233,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", + "--conf", "spark.kubernetes.submit.waitAppCompletion=false", s"container:///opt/spark/examples/jars/$EXAMPLES_JAR_FILE_NAME") val allContainersSucceeded = SettableFuture.create[Boolean] val watcher = new Watcher[Pod] { @@ -292,6 +295,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", "--conf", "spark.kubernetes.driver.labels=label1=label1value,label2=label2value", + "--conf", "spark.kubernetes.submit.waitAppCompletion=false", EXAMPLES_JAR) SparkSubmit.main(args) val driverPodLabels = minikubeKubernetesClient @@ -337,6 +341,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--conf", "spark.ssl.kubernetes.submit.trustStore=" + s"file://${trustStoreFile.getAbsolutePath}", "--conf", s"spark.ssl.kubernetes.driverlaunch.trustStorePassword=changeit", + "--conf", "spark.kubernetes.submit.waitAppCompletion=false", EXAMPLES_JAR) SparkSubmit.main(args) } @@ -360,6 +365,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", + "--conf", "spark.kubernetes.submit.waitAppCompletion=false", EXAMPLES_JAR, TEST_EXISTENCE_FILE.getName, TEST_EXISTENCE_FILE_CONTENTS) From 52a7ab2c8b8071ed78f4d5d4bd32ecaa1a7051db Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Wed, 8 Feb 2017 20:47:41 +0000 Subject: [PATCH 042/156] Check for user jars/files existence before creating the driver pod. (#86) * Check for user jars/files existence before creating the driver pod. Close apache-spark-on-k8s/spark#85 * CR --- .../spark/deploy/kubernetes/Client.scala | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 433c45d51fd6b..b9b275c190fee 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -84,8 +84,10 @@ private[spark] class Client( def run(): Unit = { logInfo(s"Starting application $kubernetesAppId in Kubernetes...") - val (driverSubmitSslOptions, isKeyStoreLocalFile) = parseDriverSubmitSslOptions() + Seq(uploadedFiles, uploadedJars, Some(mainAppResource)).foreach(checkForFilesExistence) + + val (driverSubmitSslOptions, isKeyStoreLocalFile) = parseDriverSubmitSslOptions() val parsedCustomLabels = parseCustomLabels(customLabels) var k8ConfBuilder = new K8SConfigBuilder() .withApiVersion("v1") @@ -661,6 +663,22 @@ private[spark] class Client( }).toMap }).getOrElse(Map.empty[String, String]) } + + private def checkForFilesExistence(maybePaths: Option[String]): Unit = { + maybePaths.foreach { paths => + paths.split(",").foreach { path => + val uri = Utils.resolveURI(path) + uri.getScheme match { + case "file" | null => + val file = new File(uri.getPath) + if (!file.isFile) { + throw new SparkException(s"""file "${uri}" does not exist!""") + } + case _ => + } + } + } + } } private[spark] object Client extends Logging { From 487d1e160e8f953cef0e59f0a8a06e6ad50bcf99 Mon Sep 17 00:00:00 2001 From: mccheah Date: Wed, 8 Feb 2017 17:59:50 -0800 Subject: [PATCH 043/156] Use readiness probe instead of client-side ping. (#75) * Use readiness probe instead of client-side ping. Keep one ping() just as a sanity check, but otherwise set up the readiness probe to report the container as ready only when the ping endpoint can be reached. Also add a liveliness probe for convenience and symmetry. * Extract common HTTP get action * Remove some code * Add delay to liveliness check * Fix merge conflicts. * Fix more merge conflicts * Fix more merge conflicts * Revamp readiness check logic * Add addresses ready condition to endpoints watch * Rearrange the logic some more. * Remove liveness probe, retry against servers * Fix compiler error * Fix another compiler error * Delay between retries. Remove unintended test modification * FIx another compiler error * Extract method * Address comments * Deduplicate node addresses, use lower initial connect timeout * Drop maxRetriesPerServer from 10 to 3 --- .../spark/deploy/kubernetes/Client.scala | 643 +++++++++++------- .../spark/deploy/kubernetes/Retry.scala | 54 -- .../rest/kubernetes/HttpClientUtil.scala | 7 +- .../kubernetes/MultiServerFeignTarget.scala | 34 +- .../integrationtest/minikube/Minikube.scala | 2 +- 5 files changed, 426 insertions(+), 314 deletions(-) delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index b9b275c190fee..9eed9bfd2cd79 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -18,8 +18,8 @@ package org.apache.spark.deploy.kubernetes import java.io.{File, FileInputStream} import java.security.{KeyStore, SecureRandom} -import java.util.concurrent.{CountDownLatch, TimeoutException, TimeUnit} -import java.util.concurrent.atomic.AtomicBoolean +import java.util +import java.util.concurrent.{CountDownLatch, TimeUnit} import javax.net.ssl.{SSLContext, TrustManagerFactory, X509TrustManager} import com.google.common.base.Charsets @@ -31,8 +31,6 @@ import io.fabric8.kubernetes.client.Watcher.Action import org.apache.commons.codec.binary.Base64 import scala.collection.JavaConverters._ import scala.collection.mutable -import scala.concurrent.{ExecutionContext, Future} -import scala.concurrent.duration.DurationInt import org.apache.spark.{SecurityManager, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.kubernetes.config._ @@ -40,7 +38,7 @@ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.rest.{AppResource, ContainerAppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, TarGzippedData, UploadedAppResource} import org.apache.spark.deploy.rest.kubernetes._ import org.apache.spark.internal.Logging -import org.apache.spark.util.{ThreadUtils, Utils} +import org.apache.spark.util.Utils private[spark] class Client( sparkConf: SparkConf, @@ -78,10 +76,6 @@ private[spark] class Client( private val serviceAccount = sparkConf.get(KUBERNETES_SERVICE_ACCOUNT_NAME) private val customLabels = sparkConf.get(KUBERNETES_DRIVER_LABELS) - private implicit val retryableExecutionContext = ExecutionContext - .fromExecutorService( - ThreadUtils.newDaemonSingleThreadExecutor("kubernetes-client-retryable-futures")) - def run(): Unit = { logInfo(s"Starting application $kubernetesAppId in Kubernetes...") @@ -112,119 +106,398 @@ private[spark] class Client( .withData(Map((SUBMISSION_APP_SECRET_NAME, secretBase64String)).asJava) .withType("Opaque") .done() - val (sslEnvs, sslVolumes, sslVolumeMounts, sslSecrets) = configureSsl(kubernetesClient, - driverSubmitSslOptions, - isKeyStoreLocalFile) try { - val driverKubernetesSelectors = (Map( - SPARK_DRIVER_LABEL -> kubernetesAppId, - SPARK_APP_ID_LABEL -> kubernetesAppId, - SPARK_APP_NAME_LABEL -> appName) - ++ parsedCustomLabels).asJava - val containerPorts = buildContainerPorts() - - // start outer watch for status logging of driver pod - val driverPodCompletedLatch = new CountDownLatch(1) - // only enable interval logging if in waitForAppCompletion mode - val loggingInterval = if (waitForAppCompletion) sparkConf.get(REPORT_INTERVAL) else 0 - val loggingWatch = new LoggingPodStatusWatcher(driverPodCompletedLatch, kubernetesAppId, - loggingInterval) - Utils.tryWithResource(kubernetesClient - .pods() - .withLabels(driverKubernetesSelectors) - .watch(loggingWatch)) { _ => - - // launch driver pod with inner watch to upload jars when it's ready - val submitCompletedFuture = SettableFuture.create[Boolean] - val submitPending = new AtomicBoolean(false) - val podWatcher = new DriverPodWatcher( - submitCompletedFuture, - submitPending, - kubernetesClient, - driverSubmitSslOptions, - Array(submitServerSecret) ++ sslSecrets, - driverKubernetesSelectors) + val (sslEnvs, sslVolumes, sslVolumeMounts, sslSecrets) = configureSsl(kubernetesClient, + driverSubmitSslOptions, + isKeyStoreLocalFile) + try { + // start outer watch for status logging of driver pod + val driverPodCompletedLatch = new CountDownLatch(1) + // only enable interval logging if in waitForAppCompletion mode + val loggingInterval = if (waitForAppCompletion) sparkConf.get(REPORT_INTERVAL) else 0 + val loggingWatch = new LoggingPodStatusWatcher(driverPodCompletedLatch, kubernetesAppId, + loggingInterval) Utils.tryWithResource(kubernetesClient .pods() - .withLabels(driverKubernetesSelectors) - .watch(podWatcher)) { _ => - kubernetesClient.pods().createNew() - .withNewMetadata() - .withName(kubernetesAppId) - .withLabels(driverKubernetesSelectors) - .endMetadata() - .withNewSpec() - .withRestartPolicy("OnFailure") - .addNewVolume() - .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) - .withNewSecret() - .withSecretName(submitServerSecret.getMetadata.getName) - .endSecret() - .endVolume - .addToVolumes(sslVolumes: _*) - .withServiceAccount(serviceAccount) - .addNewContainer() - .withName(DRIVER_CONTAINER_NAME) - .withImage(driverDockerImage) - .withImagePullPolicy("IfNotPresent") - .addNewVolumeMount() - .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) - .withMountPath(secretDirectory) - .withReadOnly(true) - .endVolumeMount() - .addToVolumeMounts(sslVolumeMounts: _*) - .addNewEnv() - .withName(ENV_SUBMISSION_SECRET_LOCATION) - .withValue(s"$secretDirectory/$SUBMISSION_APP_SECRET_NAME") - .endEnv() - .addNewEnv() - .withName(ENV_SUBMISSION_SERVER_PORT) - .withValue(SUBMISSION_SERVER_PORT.toString) - .endEnv() - .addToEnv(sslEnvs: _*) - .withPorts(containerPorts.asJava) - .endContainer() - .endSpec() - .done() - var submitSucceeded = false - try { - submitCompletedFuture.get(driverSubmitTimeoutSecs, TimeUnit.SECONDS) - submitSucceeded = true - logInfo(s"Finished launching local resources to application $kubernetesAppId") + .withName(kubernetesAppId) + .watch(loggingWatch)) { _ => + val (driverPod, driverService) = launchDriverKubernetesComponents( + kubernetesClient, + parsedCustomLabels, + submitServerSecret, + driverSubmitSslOptions, + sslSecrets, + sslVolumes, + sslVolumeMounts, + sslEnvs, + isKeyStoreLocalFile) + val ownerReferenceConfiguredDriverService = try { + configureOwnerReferences( + kubernetesClient, + submitServerSecret, + sslSecrets, + driverPod, + driverService) } catch { - case e: TimeoutException => - val finalErrorMessage: String = buildSubmitFailedErrorMessage(kubernetesClient, e) - logError(finalErrorMessage, e) - throw new SparkException(finalErrorMessage, e) - } finally { - if (!submitSucceeded) { - Utils.tryLogNonFatalError { - kubernetesClient.pods.withName(kubernetesAppId).delete() - } + case e: Throwable => + cleanupPodAndService(kubernetesClient, driverPod, driverService) + throw new SparkException("Failed to set owner references to the driver pod.", e) + } + try { + submitApplicationToDriverServer(kubernetesClient, driverSubmitSslOptions, + ownerReferenceConfiguredDriverService) + // wait if configured to do so + if (waitForAppCompletion) { + logInfo(s"Waiting for application $kubernetesAppId to finish...") + driverPodCompletedLatch.await() + logInfo(s"Application $kubernetesAppId finished.") + } else { + logInfo(s"Application $kubernetesAppId successfully launched.") } + } catch { + case e: Throwable => + cleanupPodAndService(kubernetesClient, driverPod, + ownerReferenceConfiguredDriverService) + throw new SparkException("Failed to submit the application to the driver pod.", e) } } - - // wait if configured to do so - if (waitForAppCompletion) { - logInfo(s"Waiting for application $kubernetesAppId to finish...") - driverPodCompletedLatch.await() - logInfo(s"Application $kubernetesAppId finished.") - } else { - logInfo(s"Application $kubernetesAppId successfully launched.") + } finally { + Utils.tryLogNonFatalError { + // Secrets may have been mutated so delete by name to avoid problems with not having + // the latest version. + sslSecrets.foreach { secret => + kubernetesClient.secrets().withName(secret.getMetadata.getName).delete() + } } } } finally { Utils.tryLogNonFatalError { - kubernetesClient.secrets().delete(submitServerSecret) + kubernetesClient.secrets().withName(submitServerSecret.getMetadata.getName).delete() } - Utils.tryLogNonFatalError { - kubernetesClient.secrets().delete(sslSecrets: _*) + } + } + } + + private def cleanupPodAndService( + kubernetesClient: KubernetesClient, + driverPod: Pod, + driverService: Service): Unit = { + Utils.tryLogNonFatalError { + kubernetesClient.services().delete(driverService) + } + Utils.tryLogNonFatalError { + kubernetesClient.pods().delete(driverPod) + } + } + + private def submitApplicationToDriverServer( + kubernetesClient: KubernetesClient, + driverSubmitSslOptions: SSLOptions, + driverService: Service) = { + sparkConf.getOption("spark.app.id").foreach { id => + logWarning(s"Warning: Provided app id in spark.app.id as $id will be" + + s" overridden as $kubernetesAppId") + } + sparkConf.set(KUBERNETES_DRIVER_POD_NAME, kubernetesAppId) + sparkConf.set(KUBERNETES_DRIVER_SERVICE_NAME, driverService.getMetadata.getName) + sparkConf.set("spark.app.id", kubernetesAppId) + sparkConf.setIfMissing("spark.app.name", appName) + sparkConf.setIfMissing("spark.driver.port", DEFAULT_DRIVER_PORT.toString) + sparkConf.setIfMissing("spark.blockmanager.port", + DEFAULT_BLOCKMANAGER_PORT.toString) + val driverSubmitter = buildDriverSubmissionClient(kubernetesClient, driverService, + driverSubmitSslOptions) + // Sanity check to see if the driver submitter is even reachable. + driverSubmitter.ping() + logInfo(s"Submitting local resources to driver pod for application " + + s"$kubernetesAppId ...") + val submitRequest = buildSubmissionRequest() + driverSubmitter.submitApplication(submitRequest) + logInfo("Successfully submitted local resources and driver configuration to" + + " driver pod.") + // After submitting, adjust the service to only expose the Spark UI + val uiServicePort = new ServicePortBuilder() + .withName(UI_PORT_NAME) + .withPort(uiPort) + .withNewTargetPort(uiPort) + .build() + kubernetesClient.services().withName(kubernetesAppId).edit().editSpec() + .withType("ClusterIP") + .withPorts(uiServicePort) + .endSpec() + .done() + logInfo("Finished submitting application to Kubernetes.") + } + + private def launchDriverKubernetesComponents( + kubernetesClient: KubernetesClient, + parsedCustomLabels: Map[String, String], + submitServerSecret: Secret, + driverSubmitSslOptions: SSLOptions, + sslSecrets: Array[Secret], + sslVolumes: Array[Volume], + sslVolumeMounts: Array[VolumeMount], + sslEnvs: Array[EnvVar], + isKeyStoreLocalFile: Boolean): (Pod, Service) = { + val endpointsReadyFuture = SettableFuture.create[Endpoints] + val endpointsReadyWatcher = new DriverEndpointsReadyWatcher(endpointsReadyFuture) + val serviceReadyFuture = SettableFuture.create[Service] + val driverKubernetesSelectors = (Map( + SPARK_DRIVER_LABEL -> kubernetesAppId, + SPARK_APP_ID_LABEL -> kubernetesAppId, + SPARK_APP_NAME_LABEL -> appName) + ++ parsedCustomLabels).asJava + val serviceReadyWatcher = new DriverServiceReadyWatcher(serviceReadyFuture) + val podReadyFuture = SettableFuture.create[Pod] + val podWatcher = new DriverPodReadyWatcher(podReadyFuture) + Utils.tryWithResource(kubernetesClient + .pods() + .withName(kubernetesAppId) + .watch(podWatcher)) { _ => + Utils.tryWithResource(kubernetesClient + .services() + .withName(kubernetesAppId) + .watch(serviceReadyWatcher)) { _ => + Utils.tryWithResource(kubernetesClient + .endpoints() + .withName(kubernetesAppId) + .watch(endpointsReadyWatcher)) { _ => + val driverService = createDriverService( + kubernetesClient, + driverKubernetesSelectors, + submitServerSecret) + val driverPod = try { + createDriverPod( + kubernetesClient, + driverKubernetesSelectors, + submitServerSecret, + driverSubmitSslOptions, + sslVolumes, + sslVolumeMounts, + sslEnvs) + } catch { + case e: Throwable => + Utils.tryLogNonFatalError { + kubernetesClient.services().delete(driverService) + } + throw new SparkException("Failed to create the driver pod.", e) + } + try { + waitForReadyKubernetesComponents(kubernetesClient, endpointsReadyFuture, + serviceReadyFuture, podReadyFuture) + (driverPod, driverService) + } catch { + case e: Throwable => + Utils.tryLogNonFatalError { + kubernetesClient.services().delete(driverService) + } + Utils.tryLogNonFatalError { + kubernetesClient.pods().delete(driverPod) + } + throw new SparkException("Timed out while waiting for a Kubernetes component to be" + + " ready.", e) + } } } } } + /** + * Sets the owner reference for all the kubernetes components to link to the driver pod. + * + * @return The driver service after it has been adjusted to reflect the new owner + * reference. + */ + private def configureOwnerReferences( + kubernetesClient: KubernetesClient, + submitServerSecret: Secret, + sslSecrets: Array[Secret], + driverPod: Pod, + driverService: Service): Service = { + val driverPodOwnerRef = new OwnerReferenceBuilder() + .withName(driverPod.getMetadata.getName) + .withUid(driverPod.getMetadata.getUid) + .withApiVersion(driverPod.getApiVersion) + .withKind(driverPod.getKind) + .withController(true) + .build() + sslSecrets.foreach(secret => { + kubernetesClient.secrets().withName(secret.getMetadata.getName).edit() + .editMetadata() + .addToOwnerReferences(driverPodOwnerRef) + .endMetadata() + .done() + }) + kubernetesClient.secrets().withName(submitServerSecret.getMetadata.getName).edit() + .editMetadata() + .addToOwnerReferences(driverPodOwnerRef) + .endMetadata() + .done() + kubernetesClient.services().withName(driverService.getMetadata.getName).edit() + .editMetadata() + .addToOwnerReferences(driverPodOwnerRef) + .endMetadata() + .done() + } + + private def waitForReadyKubernetesComponents( + kubernetesClient: KubernetesClient, + endpointsReadyFuture: SettableFuture[Endpoints], + serviceReadyFuture: SettableFuture[Service], + podReadyFuture: SettableFuture[Pod]) = { + try { + podReadyFuture.get(driverSubmitTimeoutSecs, TimeUnit.SECONDS) + logInfo("Driver pod successfully created in Kubernetes cluster.") + } catch { + case e: Throwable => + val finalErrorMessage: String = buildSubmitFailedErrorMessage(kubernetesClient, e) + logError(finalErrorMessage, e) + throw new SparkException(finalErrorMessage, e) + } + try { + serviceReadyFuture.get(driverSubmitTimeoutSecs, TimeUnit.SECONDS) + logInfo("Driver service created successfully in Kubernetes.") + } catch { + case e: Throwable => + throw new SparkException(s"The driver service was not ready" + + s" in $driverSubmitTimeoutSecs seconds.", e) + } + try { + endpointsReadyFuture.get(driverSubmitTimeoutSecs, TimeUnit.SECONDS) + logInfo("Driver endpoints ready to receive application submission") + } catch { + case e: Throwable => + throw new SparkException(s"The driver service endpoint was not ready" + + s" in $driverSubmitTimeoutSecs seconds.", e) + } + } + + private def createDriverService( + kubernetesClient: KubernetesClient, + driverKubernetesSelectors: java.util.Map[String, String], + submitServerSecret: Secret): Service = { + val driverSubmissionServicePort = new ServicePortBuilder() + .withName(SUBMISSION_SERVER_PORT_NAME) + .withPort(SUBMISSION_SERVER_PORT) + .withNewTargetPort(SUBMISSION_SERVER_PORT) + .build() + kubernetesClient.services().createNew() + .withNewMetadata() + .withName(kubernetesAppId) + .withLabels(driverKubernetesSelectors) + .endMetadata() + .withNewSpec() + .withType("NodePort") + .withSelector(driverKubernetesSelectors) + .withPorts(driverSubmissionServicePort) + .endSpec() + .done() + } + + private def createDriverPod( + kubernetesClient: KubernetesClient, + driverKubernetesSelectors: util.Map[String, String], + submitServerSecret: Secret, + driverSubmitSslOptions: SSLOptions, + sslVolumes: Array[Volume], + sslVolumeMounts: Array[VolumeMount], + sslEnvs: Array[EnvVar]) = { + val containerPorts = buildContainerPorts() + val probePingHttpGet = new HTTPGetActionBuilder() + .withScheme(if (driverSubmitSslOptions.enabled) "HTTPS" else "HTTP") + .withPath("/v1/submissions/ping") + .withNewPort(SUBMISSION_SERVER_PORT_NAME) + .build() + kubernetesClient.pods().createNew() + .withNewMetadata() + .withName(kubernetesAppId) + .withLabels(driverKubernetesSelectors) + .endMetadata() + .withNewSpec() + .withRestartPolicy("OnFailure") + .addNewVolume() + .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) + .withNewSecret() + .withSecretName(submitServerSecret.getMetadata.getName) + .endSecret() + .endVolume() + .addToVolumes(sslVolumes: _*) + .withServiceAccount(serviceAccount) + .addNewContainer() + .withName(DRIVER_CONTAINER_NAME) + .withImage(driverDockerImage) + .withImagePullPolicy("IfNotPresent") + .addNewVolumeMount() + .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) + .withMountPath(secretDirectory) + .withReadOnly(true) + .endVolumeMount() + .addToVolumeMounts(sslVolumeMounts: _*) + .addNewEnv() + .withName(ENV_SUBMISSION_SECRET_LOCATION) + .withValue(s"$secretDirectory/$SUBMISSION_APP_SECRET_NAME") + .endEnv() + .addNewEnv() + .withName(ENV_SUBMISSION_SERVER_PORT) + .withValue(SUBMISSION_SERVER_PORT.toString) + .endEnv() + .addToEnv(sslEnvs: _*) + .withPorts(containerPorts.asJava) + .withNewReadinessProbe().withHttpGet(probePingHttpGet).endReadinessProbe() + .endContainer() + .endSpec() + .done() + } + + private class DriverPodReadyWatcher(resolvedDriverPod: SettableFuture[Pod]) extends Watcher[Pod] { + override def eventReceived(action: Action, pod: Pod): Unit = { + if ((action == Action.ADDED || action == Action.MODIFIED) + && pod.getStatus.getPhase == "Running" + && !resolvedDriverPod.isDone) { + pod.getStatus + .getContainerStatuses + .asScala + .find(status => + status.getName == DRIVER_CONTAINER_NAME && status.getReady) + .foreach { _ => resolvedDriverPod.set(pod) } + } + } + + override def onClose(cause: KubernetesClientException): Unit = { + logDebug("Driver pod readiness watch closed.", cause) + } + } + + private class DriverEndpointsReadyWatcher(resolvedDriverEndpoints: SettableFuture[Endpoints]) + extends Watcher[Endpoints] { + override def eventReceived(action: Action, endpoints: Endpoints): Unit = { + if ((action == Action.ADDED) || (action == Action.MODIFIED) + && endpoints.getSubsets.asScala.nonEmpty + && endpoints.getSubsets.asScala.exists(_.getAddresses.asScala.nonEmpty) + && !resolvedDriverEndpoints.isDone) { + resolvedDriverEndpoints.set(endpoints) + } + } + + override def onClose(cause: KubernetesClientException): Unit = { + logDebug("Driver endpoints readiness watch closed.", cause) + } + } + + private class DriverServiceReadyWatcher(resolvedDriverService: SettableFuture[Service]) + extends Watcher[Service] { + override def eventReceived(action: Action, service: Service): Unit = { + if ((action == Action.ADDED) || (action == Action.MODIFIED) + && !resolvedDriverService.isDone) { + resolvedDriverService.set(service) + } + } + + override def onClose(cause: KubernetesClientException): Unit = { + logDebug("Driver service readiness watch closed.", cause) + } + } + private def parseDriverSubmitSslOptions(): (SSLOptions, Boolean) = { val maybeKeyStore = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_KEYSTORE) val resolvedSparkConf = sparkConf.clone() @@ -306,18 +579,10 @@ private[spark] class Client( .withName(ENV_SUBMISSION_USE_SSL) .withValue("true") .build() - val sslSecrets = kubernetesClient.secrets().createNew() - .withNewMetadata() - .withName(sslSecretsName) - .endMetadata() - .withData(sslSecretsMap.asJava) - .withType("Opaque") - .done() - secrets += sslSecrets val sslVolume = new VolumeBuilder() .withName(SUBMISSION_SSL_SECRETS_VOLUME_NAME) .withNewSecret() - .withSecretName(sslSecrets.getMetadata.getName) + .withSecretName(sslSecretsName) .endSecret() .build() val sslVolumeMount = new VolumeMountBuilder() @@ -325,147 +590,23 @@ private[spark] class Client( .withReadOnly(true) .withMountPath(sslSecretsDirectory) .build() + val sslSecrets = kubernetesClient.secrets().createNew() + .withNewMetadata() + .withName(sslSecretsName) + .endMetadata() + .withData(sslSecretsMap.asJava) + .withType("Opaque") + .done() + secrets += sslSecrets (sslEnvs.toArray, Array(sslVolume), Array(sslVolumeMount), secrets.toArray) } else { (Array[EnvVar](), Array[Volume](), Array[VolumeMount](), Array[Secret]()) } } - private class DriverPodWatcher( - submitCompletedFuture: SettableFuture[Boolean], - submitPending: AtomicBoolean, - kubernetesClient: KubernetesClient, - driverSubmitSslOptions: SSLOptions, - applicationSecrets: Array[Secret], - driverKubernetesSelectors: java.util.Map[String, String]) extends Watcher[Pod] { - override def eventReceived(action: Action, pod: Pod): Unit = { - if ((action == Action.ADDED || action == Action.MODIFIED) - && pod.getStatus.getPhase == "Running" - && !submitCompletedFuture.isDone) { - if (!submitPending.getAndSet(true)) { - pod.getStatus - .getContainerStatuses - .asScala - .find(status => - status.getName == DRIVER_CONTAINER_NAME && status.getReady) match { - case Some(_) => - val ownerRefs = Seq(new OwnerReferenceBuilder() - .withName(pod.getMetadata.getName) - .withUid(pod.getMetadata.getUid) - .withApiVersion(pod.getApiVersion) - .withKind(pod.getKind) - .withController(true) - .build()) - - applicationSecrets.foreach(secret => { - secret.getMetadata.setOwnerReferences(ownerRefs.asJava) - kubernetesClient.secrets().createOrReplace(secret) - }) - - val driverSubmissionServicePort = new ServicePortBuilder() - .withName(SUBMISSION_SERVER_PORT_NAME) - .withPort(SUBMISSION_SERVER_PORT) - .withNewTargetPort(SUBMISSION_SERVER_PORT) - .build() - val service = kubernetesClient.services().createNew() - .withNewMetadata() - .withName(kubernetesAppId) - .withLabels(driverKubernetesSelectors) - .withOwnerReferences(ownerRefs.asJava) - .endMetadata() - .withNewSpec() - .withType("NodePort") - .withSelector(driverKubernetesSelectors) - .withPorts(driverSubmissionServicePort) - .endSpec() - .done() - try { - sparkConf.getOption("spark.app.id").foreach { id => - logWarning(s"Warning: Provided app id in spark.app.id as $id will be" + - s" overridden as $kubernetesAppId") - } - sparkConf.set(KUBERNETES_DRIVER_POD_NAME, kubernetesAppId) - sparkConf.set(KUBERNETES_DRIVER_SERVICE_NAME, service.getMetadata.getName) - sparkConf.set("spark.app.id", kubernetesAppId) - sparkConf.setIfMissing("spark.app.name", appName) - sparkConf.setIfMissing("spark.driver.port", DEFAULT_DRIVER_PORT.toString) - sparkConf.setIfMissing("spark.blockmanager.port", - DEFAULT_BLOCKMANAGER_PORT.toString) - val driverSubmitter = buildDriverSubmissionClient(kubernetesClient, service, - driverSubmitSslOptions) - val ping = Retry.retry(5, 5.seconds, - Some("Failed to contact the driver server")) { - driverSubmitter.ping() - } - ping onFailure { - case t: Throwable => - logError("Ping failed to the driver server", t) - submitCompletedFuture.setException(t) - kubernetesClient.services().delete(service) - } - val submitComplete = ping.flatMap { _ => - Future { - sparkConf.set("spark.driver.host", pod.getStatus.getPodIP) - val submitRequest = buildSubmissionRequest() - logInfo(s"Submitting local resources to driver pod for application " + - s"$kubernetesAppId ...") - driverSubmitter.submitApplication(submitRequest) - } - } - submitComplete onFailure { - case t: Throwable => - submitCompletedFuture.setException(t) - kubernetesClient.services().delete(service) - } - val adjustServicePort = submitComplete.flatMap { _ => - Future { - // After submitting, adjust the service to only expose the Spark UI - val uiServicePort = new ServicePortBuilder() - .withName(UI_PORT_NAME) - .withPort(uiPort) - .withNewTargetPort(uiPort) - .build() - kubernetesClient.services().withName(kubernetesAppId).edit() - .editSpec() - .withType("ClusterIP") - .withPorts(uiServicePort) - .endSpec() - .done - } - } - adjustServicePort onSuccess { - case _ => - submitCompletedFuture.set(true) - } - adjustServicePort onFailure { - case throwable: Throwable => - submitCompletedFuture.setException(throwable) - kubernetesClient.services().delete(service) - } - } catch { - case e: Throwable => - submitCompletedFuture.setException(e) - Utils.tryLogNonFatalError({ - kubernetesClient.services().delete(service) - }) - throw e - } - case None => - } - } - } - } - - override def onClose(e: KubernetesClientException): Unit = { - if (!submitCompletedFuture.isDone) { - submitCompletedFuture.setException(e) - } - } - } - private def buildSubmitFailedErrorMessage( - kubernetesClient: DefaultKubernetesClient, - e: TimeoutException): String = { + kubernetesClient: KubernetesClient, + e: Throwable): String = { val driverPod = try { kubernetesClient.pods().withName(kubernetesAppId).get() } catch { @@ -606,7 +747,7 @@ private[spark] class Client( node.getSpec.getUnschedulable) .flatMap(_.getStatus.getAddresses.asScala.map(address => { s"$urlScheme://${address.getAddress}:$servicePort" - })).toArray + })).toSet require(nodeUrls.nonEmpty, "No nodes found to contact the driver!") val (trustManager, sslContext): (X509TrustManager, SSLContext) = if (driverSubmitSslOptions.enabled) { @@ -616,8 +757,10 @@ private[spark] class Client( } HttpClientUtil.createClient[KubernetesSparkRestApi]( uris = nodeUrls, + maxRetriesPerServer = 3, sslSocketFactory = sslContext.getSocketFactory, - trustContext = trustManager) + trustContext = trustManager, + connectTimeoutMillis = 5000) } private def buildSslConnectionConfiguration(driverSubmitSslOptions: SSLOptions) = { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala deleted file mode 100644 index 378583b29c547..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes - -import scala.concurrent.{ExecutionContext, Future} -import scala.concurrent.duration.Duration - -import org.apache.spark.SparkException -import org.apache.spark.internal.Logging - -private[spark] object Retry extends Logging { - - private def retryableFuture[T] - (attempt: Int, maxAttempts: Int, interval: Duration, retryMessage: Option[String]) - (f: => Future[T]) - (implicit executionContext: ExecutionContext): Future[T] = { - f recoverWith { - case error: Throwable => - if (attempt <= maxAttempts) { - retryMessage.foreach { message => - logWarning(s"$message - attempt $attempt of $maxAttempts", error) - } - Thread.sleep(interval.toMillis) - retryableFuture(attempt + 1, maxAttempts, interval, retryMessage)(f) - } else { - Future.failed(retryMessage.map(message => - new SparkException(s"$message - reached $maxAttempts attempts," + - s" and aborting task.", error) - ).getOrElse(error)) - } - } - } - - def retry[T] - (times: Int, interval: Duration, retryMessage: Option[String] = None) - (f: => T) - (implicit executionContext: ExecutionContext): Future[T] = { - retryableFuture(1, times, interval, retryMessage)(Future[T] { f }) - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala index 1cabfbad656eb..576f7058f20ee 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala @@ -32,7 +32,8 @@ import org.apache.spark.status.api.v1.JacksonMessageWriter private[spark] object HttpClientUtil { def createClient[T: ClassTag]( - uris: Array[String], + uris: Set[String], + maxRetriesPerServer: Int = 1, sslSocketFactory: SSLSocketFactory = SSLContext.getDefault.getSocketFactory, trustContext: X509TrustManager = null, readTimeoutMillis: Int = 20000, @@ -45,12 +46,12 @@ private[spark] object HttpClientUtil { .registerModule(new DefaultScalaModule) .setDateFormat(JacksonMessageWriter.makeISODateFormat) objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) - val target = new MultiServerFeignTarget[T](uris) + val target = new MultiServerFeignTarget[T](uris.toSeq, maxRetriesPerServer) val baseHttpClient = new feign.okhttp.OkHttpClient(httpClientBuilder.build()) val resetTargetHttpClient = new Client { override def execute(request: Request, options: Options): Response = { val response = baseHttpClient.execute(request, options) - if (response.status() >= 200 && response.status() < 300) { + if (response.status() / 100 == 2) { target.reset() } response diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala index fea7f057cfa1b..51313e00ce2da 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala @@ -20,20 +20,25 @@ import feign.{Request, RequestTemplate, RetryableException, Retryer, Target} import scala.reflect.ClassTag import scala.util.Random +import org.apache.spark.internal.Logging + private[kubernetes] class MultiServerFeignTarget[T : ClassTag]( - private val servers: Seq[String]) extends Target[T] with Retryer { + private val servers: Seq[String], + private val maxRetriesPerServer: Int = 1, + private val delayBetweenRetriesMillis: Int = 1000) extends Target[T] with Retryer with Logging { require(servers.nonEmpty, "Must provide at least one server URI.") private val threadLocalShuffledServers = new ThreadLocal[Seq[String]] { override def initialValue(): Seq[String] = Random.shuffle(servers) } + private val threadLocalCurrentAttempt = new ThreadLocal[Int] { + override def initialValue(): Int = 0 + } override def `type`(): Class[T] = { implicitly[ClassTag[T]].runtimeClass.asInstanceOf[Class[T]] } - override def url(): String = threadLocalShuffledServers.get.head - /** * Cloning the target is done on every request, for use on the current * thread - thus it's important that clone returns a "fresh" target. @@ -54,14 +59,31 @@ private[kubernetes] class MultiServerFeignTarget[T : ClassTag]( requestTemplate.request() } + override def url(): String = threadLocalShuffledServers.get.head + override def continueOrPropagate(e: RetryableException): Unit = { - threadLocalShuffledServers.set(threadLocalShuffledServers.get.drop(1)) - if (threadLocalShuffledServers.get.isEmpty) { - throw e + threadLocalCurrentAttempt.set(threadLocalCurrentAttempt.get + 1) + val currentAttempt = threadLocalCurrentAttempt.get + if (threadLocalCurrentAttempt.get < maxRetriesPerServer) { + logWarning(s"Attempt $currentAttempt of $maxRetriesPerServer failed for" + + s" server ${url()}. Retrying request...", e) + Thread.sleep(delayBetweenRetriesMillis) + } else { + val previousUrl = url() + threadLocalShuffledServers.set(threadLocalShuffledServers.get.drop(1)) + if (threadLocalShuffledServers.get.isEmpty) { + logError(s"Failed request to all servers $maxRetriesPerServer times.", e) + throw e + } else { + logWarning(s"Failed request to $previousUrl $maxRetriesPerServer times." + + s" Trying to access ${url()} instead.", e) + threadLocalCurrentAttempt.set(0) + } } } def reset(): Unit = { threadLocalShuffledServers.set(Random.shuffle(servers)) + threadLocalCurrentAttempt.set(0) } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala index b42f97952394e..736b92cc2d628 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala @@ -123,7 +123,7 @@ private[spark] object Minikube extends Logging { .build() val sslContext = SSLUtils.sslContext(kubernetesConf) val trustManager = SSLUtils.trustManagers(kubernetesConf)(0).asInstanceOf[X509TrustManager] - HttpClientUtil.createClient[T](Array(url), sslContext.getSocketFactory, trustManager) + HttpClientUtil.createClient[T](Set(url), 5, sslContext.getSocketFactory, trustManager) } def executeMinikubeSsh(command: String): Unit = { From bdfc4e122cfcf2782ac41028a7fa398bd2ae4e4b Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Thu, 9 Feb 2017 17:54:39 -0800 Subject: [PATCH 044/156] Note integration tests require Java 8 (#99) --- resource-managers/kubernetes/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/resource-managers/kubernetes/README.md b/resource-managers/kubernetes/README.md index 5e4ffaa54cb55..92ec305513f42 100644 --- a/resource-managers/kubernetes/README.md +++ b/resource-managers/kubernetes/README.md @@ -41,6 +41,8 @@ Below is a list of the submodules for this cluster manager and what they do. Note that the integration test framework is currently being heavily revised and is subject to change. +Note that currently the integration tests only run with Java 8. + Running any of the integration tests requires including `kubernetes-integration-tests` profile in the build command. In order to prepare the environment for running the integration tests, the `pre-integration-test` step must be run in Maven on the `resource-managers/kubernetes/integration-tests` module: From fe8b45c3543c8ae2af5f774c47a5f80142533307 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Fri, 10 Feb 2017 14:50:38 -0800 Subject: [PATCH 045/156] Bumping up kubernetes-client version to fix GKE and local proxy (#105) * Bumping up kubernetes-client version to add fixes * Modify wording * Addressed comments --- docs/running-on-kubernetes.md | 30 ++++++++++++++++++- resource-managers/kubernetes/core/pom.xml | 2 +- .../KubernetesClusterSchedulerBackend.scala | 2 +- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 5a48bb254a6df..19f406039e261 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -51,7 +51,7 @@ connect without SSL on a different port, the master would be set to `k8s://http: Note that applications can currently only be executed in cluster mode, where the driver and its executors are running on the cluster. - + ### Adding Other JARs Spark allows users to provide dependencies that are bundled into the driver's Docker image, or that are on the local @@ -150,6 +150,34 @@ or `container:`. A scheme of `file:` corresponds to the keyStore being located o the driver container as a [secret volume](https://kubernetes.io/docs/user-guide/secrets/). When the URI has the scheme `container:`, the file is assumed to already be on the container's disk at the appropriate path. +### Kubernetes Clusters and the authenticated proxy endpoint + +Spark-submit also supports submission through the +[local kubectl proxy](https://kubernetes.io/docs/user-guide/connecting-to-applications-proxy/). One can use the +authenticating proxy to communicate with the api server directly without passing credentials to spark-submit. + +The local proxy can be started by running: + + kubectl proxy + +If our local proxy were listening on port 8001, we would have our submission looking like the following: + + bin/spark-submit \ + --deploy-mode cluster \ + --class org.apache.spark.examples.SparkPi \ + --master k8s://http://127.0.0.1:8001 \ + --kubernetes-namespace default \ + --conf spark.executor.instances=5 \ + --conf spark.app.name=spark-pi \ + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \ + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ + examples/jars/spark_examples_2.11-2.2.0.jar + +Communication between Spark and Kubernetes clusters is performed using the fabric8 kubernetes-client library. +The above mechanism using `kubectl proxy` can be used when we have authentication providers that the fabric8 +kubernetes-client library does not support. Authentication using X509 Client Certs and oauth tokens +is currently supported. + ### Spark Properties Below are some other common properties that are specific to Kubernetes. Most of the other configurations are the same diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 86d7dec2c076f..a7eba625cd56c 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -29,7 +29,7 @@ Spark Project Kubernetes kubernetes - 1.4.34 + 2.0.3 diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 550ddd113fa42..83225098bc651 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -44,7 +44,7 @@ private[spark] class KubernetesClusterSchedulerBackend( private val EXECUTOR_MODIFICATION_LOCK = new Object private val runningExecutorPods = new scala.collection.mutable.HashMap[String, Pod] - private val kubernetesMaster = Client.resolveK8sMaster(sc.master) + private val kubernetesMaster = "https://kubernetes" private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) private val kubernetesNamespace = conf.get(KUBERNETES_NAMESPACE) private val executorPort = conf.getInt("spark.executor.port", DEFAULT_STATIC_PORT) From 7a4075f3a2f063ed26240d66841deeffcf3b5980 Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Fri, 10 Feb 2017 18:38:29 -0800 Subject: [PATCH 046/156] Truncate k8s hostnames to be no longer than 63 characters (#102) * Truncate k8s hostnames to be no longer than 63 characters * Use only executorId not executorKubernetesId --- .../KubernetesClusterSchedulerBackend.scala | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 83225098bc651..d4e7da464be4a 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -155,9 +155,14 @@ private[spark] class KubernetesClusterSchedulerBackend( } private def allocateNewExecutorPod(): (String, Pod) = { - val executorKubernetesId = UUID.randomUUID().toString.replaceAll("-", "") val executorId = EXECUTOR_ID_COUNTER.incrementAndGet().toString - val name = s"${applicationId()}-exec-$executorKubernetesId" + val name = s"${applicationId()}-exec-$executorId" + + // hostname must be no longer than 63 characters, so take the last 63 characters of the pod + // name as the hostname. This preserves uniqueness since the end of name contains + // executorId and applicationId + val hostname = name.substring(Math.max(0, name.length - 63)) + val selectors = Map(SPARK_EXECUTOR_ID_LABEL -> executorId, SPARK_APP_ID_LABEL -> applicationId()).asJava val executorMemoryQuantity = new QuantityBuilder(false) @@ -190,7 +195,7 @@ private[spark] class KubernetesClusterSchedulerBackend( .build() }) try { - (executorKubernetesId, kubernetesClient.pods().createNew() + (executorId, kubernetesClient.pods().createNew() .withNewMetadata() .withName(name) .withLabels(selectors) @@ -204,6 +209,7 @@ private[spark] class KubernetesClusterSchedulerBackend( .endOwnerReference() .endMetadata() .withNewSpec() + .withHostname(hostname) .addNewContainer() .withName(s"executor") .withImage(executorDockerImage) From 3d80fffea60cb0e6ed14644adfec52d4e6185701 Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Mon, 13 Feb 2017 15:43:30 +0000 Subject: [PATCH 047/156] Fixed loading the executors page through the kubectl proxy. (#95) Fix apache-spark-on-k8s/spark#87 --- .../apache/spark/ui/static/executorspage.js | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js index fe5db6aa26b65..fa0282678d1f4 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js +++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js @@ -54,7 +54,28 @@ $(document).ajaxStart(function () { $.blockUI({message: '

    Loading Executors Page...

    '}); }); +function findKubernetesServiceBaseURI() { + var k8sProxyPattern = '/api/v1/proxy/namespaces/'; + var k8sProxyPatternPos = document.baseURI.indexOf(k8sProxyPattern); + if (k8sProxyPatternPos > 0) { + // Spark is running in a kubernetes cluster, and the web ui is served + // through the kubectl proxy. + var remaining = document.baseURI.substr(k8sProxyPatternPos + k8sProxyPattern.length); + var urlSlashesCount = remaining.split('/').length - 3; + var words = document.baseURI.split('/'); + var baseURI = words.slice(0, words.length - urlSlashesCount).join('/'); + return baseURI; + } + + return null; +} + function createTemplateURI(appId) { + var kubernetesBaseURI = findKubernetesServiceBaseURI(); + if (kubernetesBaseURI) { + return kubernetesBaseURI + '/static/executorspage-template.html'; + } + var words = document.baseURI.split('/'); var ind = words.indexOf("proxy"); if (ind > 0) { @@ -70,6 +91,14 @@ function createTemplateURI(appId) { } function getStandAloneppId(cb) { + var kubernetesBaseURI = findKubernetesServiceBaseURI(); + if (kubernetesBaseURI) { + var appIdAndPort = kubernetesBaseURI.split('/').slice(-1)[0]; + var appId = appIdAndPort.split(':')[0]; + cb(appId); + return; + } + var words = document.baseURI.split('/'); var ind = words.indexOf("proxy"); if (ind > 0) { @@ -95,6 +124,11 @@ function getStandAloneppId(cb) { } function createRESTEndPoint(appId) { + var kubernetesBaseURI = findKubernetesServiceBaseURI(); + if (kubernetesBaseURI) { + return kubernetesBaseURI + "/api/v1/applications/" + appId + "/allexecutors"; + } + var words = document.baseURI.split('/'); var ind = words.indexOf("proxy"); if (ind > 0) { From a34a11416d5891bdc4a102c97aed4760d70b3ecd Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Mon, 13 Feb 2017 12:49:16 -0800 Subject: [PATCH 048/156] Filter nodes to only try and send files to external IPs (#106) * Filter node addresses * Added comment --- .../org/apache/spark/deploy/kubernetes/Client.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 9eed9bfd2cd79..d3aa515484f78 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -745,9 +745,14 @@ private[spark] class Client( val nodeUrls = kubernetesClient.nodes.list.getItems.asScala .filterNot(node => node.getSpec.getUnschedulable != null && node.getSpec.getUnschedulable) - .flatMap(_.getStatus.getAddresses.asScala.map(address => { + .flatMap(_.getStatus.getAddresses.asScala) + // The list contains hostnames, internal and external IP addresses. + // we want only external IP addresses in our list + // (https://kubernetes.io/docs/admin/node/#addresses) + .filter(_.getType == "ExternalIP") + .map(address => { s"$urlScheme://${address.getAddress}:$servicePort" - })).toSet + }).toSet require(nodeUrls.nonEmpty, "No nodes found to contact the driver!") val (trustManager, sslContext): (X509TrustManager, SSLContext) = if (driverSubmitSslOptions.enabled) { From ac4dd917326dad358568549f3d60fa0c91c86d85 Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Mon, 13 Feb 2017 23:18:01 +0000 Subject: [PATCH 049/156] Parse results of minikube status more rigorously (#97) * Parse results of minikube status more rigorously Prior code assumes the minikubeVM status line is always the first row output from minikube status, and it is not when the version upgrade notifier prints an upgrade suggestion message. * Also filter ip response to expected rows --- .../kubernetes/integrationtest/minikube/Minikube.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala index 736b92cc2d628..e7eea679adf79 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala @@ -19,6 +19,7 @@ package org.apache.spark.deploy.kubernetes.integrationtest.minikube import java.io.{BufferedReader, InputStreamReader} import java.nio.file.Paths import java.util.concurrent.TimeUnit +import java.util.regex.Pattern import javax.net.ssl.X509TrustManager import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient} @@ -58,13 +59,17 @@ private[spark] object Minikube extends Logging { def getMinikubeIp: String = synchronized { assert(MINIKUBE_EXECUTABLE_DEST.exists(), EXPECTED_DOWNLOADED_MINIKUBE_MESSAGE) val outputs = executeMinikube("ip") + .filter(_.matches("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$")) assert(outputs.size == 1, "Unexpected amount of output from minikube ip") outputs.head } def getMinikubeStatus: MinikubeStatus.Value = synchronized { assert(MINIKUBE_EXECUTABLE_DEST.exists(), EXPECTED_DOWNLOADED_MINIKUBE_MESSAGE) - val statusString = executeMinikube("status").head.replaceFirst("minikubeVM: ", "") + val statusString = executeMinikube("status") + .filter(_.contains("minikubeVM: ")) + .head + .replaceFirst("minikubeVM: ", "") MinikubeStatus.unapply(statusString) .getOrElse(throw new IllegalStateException(s"Unknown status $statusString")) } From 2112c4a4e76ec594bec2e5e47e7ab5f3d03b7a64 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Tue, 14 Feb 2017 14:32:54 -0800 Subject: [PATCH 050/156] Adding legacyHostIP to the list of IPs we look at (#114) --- .../scala/org/apache/spark/deploy/kubernetes/Client.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index d3aa515484f78..279ee505de609 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -747,9 +747,11 @@ private[spark] class Client( node.getSpec.getUnschedulable) .flatMap(_.getStatus.getAddresses.asScala) // The list contains hostnames, internal and external IP addresses. - // we want only external IP addresses in our list // (https://kubernetes.io/docs/admin/node/#addresses) - .filter(_.getType == "ExternalIP") + // we want only external IP addresses and legacyHostIP addresses in our list + // legacyHostIPs are deprecated and will be removed in the future. + // (https://github.com/kubernetes/kubernetes/issues/9267) + .filter(address => address.getType == "ExternalIP" || address.getType == "LegacyHostIP") .map(address => { s"$urlScheme://${address.getAddress}:$servicePort" }).toSet From 043cdd9677e4c7a88e415029e4c988a58375429f Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Wed, 15 Feb 2017 01:29:31 +0000 Subject: [PATCH 051/156] Add -DskipTests to dev docs (#115) * Add -DskipTests to dev docs * Remove extraneous skipTests --- resource-managers/kubernetes/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/resource-managers/kubernetes/README.md b/resource-managers/kubernetes/README.md index 92ec305513f42..25b62ba35a193 100644 --- a/resource-managers/kubernetes/README.md +++ b/resource-managers/kubernetes/README.md @@ -17,7 +17,7 @@ important matters to keep in mind when developing this feature. To build Spark with Kubernetes support, use the `kubernetes` profile when invoking Maven. For example, to simply compile the Kubernetes core implementation module along with its dependencies: - build/mvn compile -Pkubernetes -pl resource-managers/kubernetes/core -am + build/mvn compile -Pkubernetes -pl resource-managers/kubernetes/core -am -DskipTests To build a distribution of Spark with Kubernetes support, use the `dev/make-distribution.sh` script, and add the `kubernetes` profile as part of the build arguments. Any other build arguments can be specified as one would expect when @@ -47,7 +47,7 @@ Running any of the integration tests requires including `kubernetes-integration- order to prepare the environment for running the integration tests, the `pre-integration-test` step must be run in Maven on the `resource-managers/kubernetes/integration-tests` module: - build/mvn pre-integration-test -Pkubernetes -Pkubernetes-integration-tests -pl resource-managers/kubernetes/integration-tests -am + build/mvn pre-integration-test -Pkubernetes -Pkubernetes-integration-tests -pl resource-managers/kubernetes/integration-tests -am -DskipTests Afterwards, the integration tests can be executed with Maven or your IDE. Note that when running tests from an IDE, the `pre-integration-test` phase must be run every time the Spark main code changes. When running tests from the From 0e6df1145d9ede8c18612d9d9096e2fdd4274879 Mon Sep 17 00:00:00 2001 From: Varun Date: Wed, 15 Feb 2017 16:38:46 -0800 Subject: [PATCH 052/156] Shutdown the thread scheduler in LoggingPodStatusWatcher on receiving job finish event notifications (#121) --- .../apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala index cbacaf6bda854..b7a29fedcbd2d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala @@ -64,6 +64,7 @@ private[kubernetes] class LoggingPodStatusWatcher(podCompletedFuture: CountDownL if (phase == "Succeeded" || phase == "Failed") { podCompletedFuture.countDown() + scheduler.shutdown() } } From a800e20950c47fe03a8d0d06c983501debb72f8c Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Wed, 15 Feb 2017 17:22:36 -0800 Subject: [PATCH 053/156] Trigger scalatest plugin in the integration-test phase (#93) * Trigger scalatest plugin in the integration-test phase * Clean up unnecessary config section --- .../kubernetes/integration-tests/pom.xml | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index f6a322f18cd75..3de10f94c4aca 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -216,6 +216,33 @@ + + + org.scalatest + scalatest-maven-plugin + + + test + + test + + + + (?<!Suite) + + + + integration-test + integration-test + + test + + + +
    From 2773b778b55f08f6279eae645a919ab140de585f Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Wed, 15 Feb 2017 17:56:35 -0800 Subject: [PATCH 054/156] Fix issue with DNS resolution (#118) * Fix issue with DNS resolution * Address comments --- .../spark/deploy/kubernetes/KubernetesClientBuilder.scala | 5 +++-- .../scala/org/apache/spark/deploy/kubernetes/constants.scala | 1 + .../kubernetes/KubernetesClusterSchedulerBackend.scala | 3 +-- .../kubernetes/integrationtest/minikube/Minikube.scala | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala index 61d3ac17ac34a..89369b30694ee 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala @@ -22,6 +22,8 @@ import com.google.common.base.Charsets import com.google.common.io.Files import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient} +import org.apache.spark.deploy.kubernetes.constants._ + private[spark] object KubernetesClientBuilder { private val API_SERVER_TOKEN = new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH) private val CA_CERT_FILE = new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH) @@ -33,11 +35,10 @@ private[spark] object KubernetesClientBuilder { * into the pod's disk space. */ def buildFromWithinPod( - kubernetesMaster: String, kubernetesNamespace: String): DefaultKubernetesClient = { var clientConfigBuilder = new ConfigBuilder() .withApiVersion("v1") - .withMasterUrl(kubernetesMaster) + .withMasterUrl(KUBERNETES_MASTER_INTERNAL_URL) .withNamespace(kubernetesNamespace) if (CA_CERT_FILE.isFile) { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 027cc3c022b4e..688cd858e79ff 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -67,4 +67,5 @@ package object constants { // Miscellaneous private[spark] val DRIVER_CONTAINER_NAME = "spark-kubernetes-driver" private[spark] val KUBERNETES_SUBMIT_SSL_NAMESPACE = "kubernetes.submit" + private[spark] val KUBERNETES_MASTER_INTERNAL_URL = "https://kubernetes.default.svc" } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index d4e7da464be4a..898b215b92d04 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -44,7 +44,6 @@ private[spark] class KubernetesClusterSchedulerBackend( private val EXECUTOR_MODIFICATION_LOCK = new Object private val runningExecutorPods = new scala.collection.mutable.HashMap[String, Pod] - private val kubernetesMaster = "https://kubernetes" private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) private val kubernetesNamespace = conf.get(KUBERNETES_NAMESPACE) private val executorPort = conf.getInt("spark.executor.port", DEFAULT_STATIC_PORT) @@ -77,7 +76,7 @@ private[spark] class KubernetesClusterSchedulerBackend( ThreadUtils.newDaemonCachedThreadPool("kubernetes-executor-requests")) private val kubernetesClient = KubernetesClientBuilder - .buildFromWithinPod(kubernetesMaster, kubernetesNamespace) + .buildFromWithinPod(kubernetesNamespace) private val driverPod = try { kubernetesClient.pods().inNamespace(kubernetesNamespace). diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala index e7eea679adf79..07274bf962dde 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala @@ -76,7 +76,7 @@ private[spark] object Minikube extends Logging { def getDockerEnv: Map[String, String] = synchronized { assert(MINIKUBE_EXECUTABLE_DEST.exists(), EXPECTED_DOWNLOADED_MINIKUBE_MESSAGE) - executeMinikube("docker-env") + executeMinikube("docker-env", "--shell", "bash") .filter(_.startsWith("export")) .map(_.replaceFirst("export ", "").split('=')) .map(arr => (arr(0), arr(1).replaceAllLiterally("\"", ""))) From 6a999cab598edc5fd87974a74f460c131885d012 Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 16 Feb 2017 15:28:42 -0800 Subject: [PATCH 055/156] Change the API contract for uploading local files (#107) * Change the API contract for uploading local jars. This mirrors similarly to what YARN and Mesos expects. * Address comments * Fix test --- .../org/apache/spark/deploy/SparkSubmit.scala | 9 +- .../spark/deploy/SparkSubmitArguments.scala | 14 -- docs/running-on-kubernetes.md | 108 ++-------------- .../launcher/SparkSubmitOptionParser.java | 8 +- .../spark/deploy/kubernetes/Client.scala | 121 ++++++------------ .../spark/deploy/kubernetes/config.scala | 24 ---- .../rest/KubernetesRestProtocolMessages.scala | 4 +- .../rest/kubernetes/KubernetesFileUtils.scala | 44 +++++++ .../KubernetesSparkRestServer.scala | 115 +++++++++++------ .../kubernetes/docker-minimal-bundle/pom.xml | 6 - .../src/main/assembly/driver-assembly.xml | 11 -- .../src/main/assembly/executor-assembly.xml | 11 -- .../kubernetes/integration-tests/pom.xml | 50 ++++++++ .../integrationtest/KubernetesSuite.scala | 87 +++---------- 14 files changed, 244 insertions(+), 368 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesFileUtils.scala diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 51eb23560defe..002b29d5564e1 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -472,10 +472,6 @@ object SparkSubmit { OptionAssigner(args.kubernetesNamespace, KUBERNETES, ALL_DEPLOY_MODES, sysProp = "spark.kubernetes.namespace"), - OptionAssigner(args.kubernetesUploadJars, KUBERNETES, CLUSTER, - sysProp = "spark.kubernetes.driver.uploads.jars"), - OptionAssigner(args.kubernetesUploadFiles, KUBERNETES, CLUSTER, - sysProp = "spark.kubernetes.driver.uploads.files"), // Other options OptionAssigner(args.executorCores, STANDALONE | YARN, ALL_DEPLOY_MODES, @@ -484,10 +480,11 @@ object SparkSubmit { sysProp = "spark.executor.memory"), OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS, ALL_DEPLOY_MODES, sysProp = "spark.cores.max"), - OptionAssigner(args.files, LOCAL | STANDALONE | MESOS, ALL_DEPLOY_MODES, + OptionAssigner(args.files, LOCAL | STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES, sysProp = "spark.files"), OptionAssigner(args.jars, LOCAL, CLIENT, sysProp = "spark.jars"), - OptionAssigner(args.jars, STANDALONE | MESOS, ALL_DEPLOY_MODES, sysProp = "spark.jars"), + OptionAssigner(args.jars, STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES, + sysProp = "spark.jars"), OptionAssigner(args.driverMemory, STANDALONE | MESOS | YARN, CLUSTER, sysProp = "spark.driver.memory"), OptionAssigner(args.driverCores, STANDALONE | MESOS | YARN, CLUSTER, diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index f771755244f31..4e297fe3b0e3b 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -73,8 +73,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S // Kubernetes only var kubernetesNamespace: String = null - var kubernetesUploadJars: String = null - var kubernetesUploadFiles: String = null // Standalone cluster mode only var supervise: Boolean = false @@ -194,12 +192,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S kubernetesNamespace = Option(kubernetesNamespace) .orElse(sparkProperties.get("spark.kubernetes.namespace")) .orNull - kubernetesUploadJars = Option(kubernetesUploadJars) - .orElse(sparkProperties.get("spark.kubernetes.driver.uploads.jars")) - .orNull - kubernetesUploadFiles = Option(kubernetesUploadFiles) - .orElse(sparkProperties.get("spark.kubernetes.driver.uploads.files")) - .orNull // Try to set main class from JAR if no --class argument is given if (mainClass == null && !isPython && !isR && primaryResource != null) { @@ -443,12 +435,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S case KUBERNETES_NAMESPACE => kubernetesNamespace = value - case KUBERNETES_UPLOAD_JARS => - kubernetesUploadJars = value - - case KUBERNETES_UPLOAD_FILES => - kubernetesUploadFiles = value - case HELP => printUsageAndExit(0) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 19f406039e261..e5c7e9bb69448 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -51,87 +51,15 @@ connect without SSL on a different port, the master would be set to `k8s://http: Note that applications can currently only be executed in cluster mode, where the driver and its executors are running on the cluster. - -### Adding Other JARs - -Spark allows users to provide dependencies that are bundled into the driver's Docker image, or that are on the local -disk of the submitter's machine. These two types of dependencies are specified via different configuration options to -`spark-submit`: -* Local jars provided by specifying the `--jars` command line argument to `spark-submit`, or by setting `spark.jars` in - the application's configuration, will be treated as jars that are located on the *disk of the driver container*. This - only applies to jar paths that do not specify a scheme or that have the scheme `file://`. Paths with other schemes are - fetched from their appropriate locations. -* Local jars provided by specifying the `--upload-jars` command line argument to `spark-submit`, or by setting - `spark.kubernetes.driver.uploads.jars` in the application's configuration, will be treated as jars that are located on - the *disk of the submitting machine*. These jars are uploaded to the driver docker container before executing the - application. -* A main application resource path that does not have a scheme or that has the scheme `file://` is assumed to be on the - *disk of the submitting machine*. This resource is uploaded to the driver docker container before executing the - application. A remote path can still be specified and the resource will be fetched from the appropriate location. -* A main application resource path that has the scheme `container://` is assumed to be on the *disk of the driver - container*. - -In all of these cases, the jars are placed on the driver's classpath, and are also sent to the executors. Below are some -examples of providing application dependencies. - -To submit an application with both the main resource and two other jars living on the submitting user's machine: - - bin/spark-submit \ - --deploy-mode cluster \ - --class com.example.applications.SampleApplication \ - --master k8s://192.168.99.100 \ - --upload-jars /home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar \ - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \ - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ - /home/exampleuser/exampleapplication/main.jar - -Note that since passing the jars through the `--upload-jars` command line argument is equivalent to setting the -`spark.kubernetes.driver.uploads.jars` Spark property, the above will behave identically to this command: - - bin/spark-submit \ - --deploy-mode cluster \ - --class com.example.applications.SampleApplication \ - --master k8s://192.168.99.100 \ - --conf spark.kubernetes.driver.uploads.jars=/home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar \ - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \ - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ - /home/exampleuser/exampleapplication/main.jar - -To specify a main application resource that can be downloaded from an HTTP service, and if a plugin for that application -is located in the jar `/opt/spark-plugins/app-plugin.jar` on the docker image's disk: - - bin/spark-submit \ - --deploy-mode cluster \ - --class com.example.applications.PluggableApplication \ - --master k8s://192.168.99.100 \ - --jars /opt/spark-plugins/app-plugin.jar \ - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest \ - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ - http://example.com:8080/applications/sparkpluggable/app.jar - -Note that since passing the jars through the `--jars` command line argument is equivalent to setting the `spark.jars` -Spark property, the above will behave identically to this command: - - bin/spark-submit \ - --deploy-mode cluster \ - --class com.example.applications.PluggableApplication \ - --master k8s://192.168.99.100 \ - --conf spark.jars=file:///opt/spark-plugins/app-plugin.jar \ - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest \ - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ - http://example.com:8080/applications/sparkpluggable/app.jar - -To specify a main application resource that is in the Docker image, and if it has no other dependencies: - - bin/spark-submit \ - --deploy-mode cluster \ - --class com.example.applications.PluggableApplication \ - --master k8s://192.168.99.100:8443 \ - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest \ - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ - container:///home/applications/examples/example.jar +### Dependency Management and Docker Containers +Spark supports specifying JAR paths that are either on the submitting host's disk, or are located on the disk of the +driver and executors. Refer to the [application submission](submitting-applications.html#advanced-dependency-management) +section for details. Note that files specified with the `local` scheme should be added to the container image of both +the driver and the executors. Files without a scheme or with the scheme `file://` are treated as being on the disk of +the submitting machine, and are uploaded to the driver running in Kubernetes before launching the application. + ### Setting Up SSL For Submitting the Driver When submitting to Kubernetes, a pod is started for the driver, and the pod starts an HTTP server. This HTTP server @@ -146,9 +74,9 @@ pod in starting the application, set `spark.ssl.kubernetes.submit.trustStore`. One note about the keyStore is that it can be specified as either a file on the client machine or a file in the container image's disk. Thus `spark.ssl.kubernetes.submit.keyStore` can be a URI with a scheme of either `file:` -or `container:`. A scheme of `file:` corresponds to the keyStore being located on the client machine; it is mounted onto +or `local:`. A scheme of `file:` corresponds to the keyStore being located on the client machine; it is mounted onto the driver container as a [secret volume](https://kubernetes.io/docs/user-guide/secrets/). When the URI has the scheme -`container:`, the file is assumed to already be on the container's disk at the appropriate path. +`local:`, the file is assumed to already be on the container's disk at the appropriate path. ### Kubernetes Clusters and the authenticated proxy endpoint @@ -241,24 +169,6 @@ from the other deployment modes. See the [configuration page](configuration.html executor pods from the API server. - - spark.kubernetes.driver.uploads.jars - (none) - - Comma-separated list of jars to send to the driver and all executors when submitting the application in cluster - mode. Refer to adding other jars for more information. - - - - spark.kubernetes.driver.uploads.files - (none) - - Comma-separated list of files to send to the driver and all executors when submitting the application in cluster - mode. The files are added in a flat hierarchy to the current working directory of the driver, having the same - names as the names of the original files. Note that two files with the same name cannot be added, even if they - were in different source directories on the client disk. - - spark.kubernetes.executor.memoryOverhead executorMemory * 0.10, with minimum of 384 diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java index 3369b5d8301be..a4d43c0795abc 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java @@ -77,10 +77,7 @@ class SparkSubmitOptionParser { protected final String QUEUE = "--queue"; // Kubernetes-only options. - protected final String KUBERNETES_MASTER = "--kubernetes-master"; protected final String KUBERNETES_NAMESPACE = "--kubernetes-namespace"; - protected final String KUBERNETES_UPLOAD_JARS = "--upload-jars"; - protected final String KUBERNETES_UPLOAD_FILES = "--upload-files"; /** * This is the canonical list of spark-submit options. Each entry in the array contains the @@ -121,10 +118,7 @@ class SparkSubmitOptionParser { { REPOSITORIES }, { STATUS }, { TOTAL_EXECUTOR_CORES }, - { KUBERNETES_MASTER }, - { KUBERNETES_NAMESPACE }, - { KUBERNETES_UPLOAD_JARS }, - { KUBERNETES_UPLOAD_FILES } + { KUBERNETES_NAMESPACE } }; /** diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 279ee505de609..aa273a024f6f9 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -35,7 +35,7 @@ import scala.collection.mutable import org.apache.spark.{SecurityManager, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.{AppResource, ContainerAppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, TarGzippedData, UploadedAppResource} +import org.apache.spark.deploy.rest.{AppResource, ContainerAppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, UploadedAppResource} import org.apache.spark.deploy.rest.kubernetes._ import org.apache.spark.internal.Logging import org.apache.spark.util.Utils @@ -59,11 +59,10 @@ private[spark] class Client( private val sslSecretsDirectory = s"$DRIVER_CONTAINER_SECRETS_BASE_DIR/$kubernetesAppId-ssl" private val sslSecretsName = s"$SUBMISSION_SSL_SECRETS_PREFIX-$kubernetesAppId" private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) - private val uploadedJars = sparkConf.get(KUBERNETES_DRIVER_UPLOAD_JARS).filter(_.nonEmpty) - private val uploadedFiles = sparkConf.get(KUBERNETES_DRIVER_UPLOAD_FILES).filter(_.nonEmpty) - uploadedFiles.foreach(validateNoDuplicateUploadFileNames) private val uiPort = sparkConf.getInt("spark.ui.port", DEFAULT_UI_PORT) private val driverSubmitTimeoutSecs = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_TIMEOUT) + private val sparkFiles = sparkConf.getOption("spark.files") + private val sparkJars = sparkConf.getOption("spark.jars") private val waitForAppCompletion: Boolean = sparkConf.get(WAIT_FOR_APP_COMPLETION) @@ -78,9 +77,18 @@ private[spark] class Client( def run(): Unit = { logInfo(s"Starting application $kubernetesAppId in Kubernetes...") - - Seq(uploadedFiles, uploadedJars, Some(mainAppResource)).foreach(checkForFilesExistence) - + val submitterLocalFiles = KubernetesFileUtils.getOnlySubmitterLocalFiles(sparkFiles) + val submitterLocalJars = KubernetesFileUtils.getOnlySubmitterLocalFiles(sparkJars) + (submitterLocalFiles ++ submitterLocalJars).foreach { file => + if (!new File(Utils.resolveURI(file).getPath).isFile) { + throw new SparkException(s"File $file does not exist or is a directory.") + } + } + if (KubernetesFileUtils.isUriLocalFile(mainAppResource) && + !new File(Utils.resolveURI(mainAppResource).getPath).isFile) { + throw new SparkException(s"Main app resource file $mainAppResource is not a file or" + + s" is a directory.") + } val (driverSubmitSslOptions, isKeyStoreLocalFile) = parseDriverSubmitSslOptions() val parsedCustomLabels = parseCustomLabels(customLabels) var k8ConfBuilder = new K8SConfigBuilder() @@ -145,7 +153,7 @@ private[spark] class Client( } try { submitApplicationToDriverServer(kubernetesClient, driverSubmitSslOptions, - ownerReferenceConfiguredDriverService) + ownerReferenceConfiguredDriverService, submitterLocalFiles, submitterLocalJars) // wait if configured to do so if (waitForAppCompletion) { logInfo(s"Waiting for application $kubernetesAppId to finish...") @@ -193,7 +201,9 @@ private[spark] class Client( private def submitApplicationToDriverServer( kubernetesClient: KubernetesClient, driverSubmitSslOptions: SSLOptions, - driverService: Service) = { + driverService: Service, + submitterLocalFiles: Iterable[String], + submitterLocalJars: Iterable[String]): Unit = { sparkConf.getOption("spark.app.id").foreach { id => logWarning(s"Warning: Provided app id in spark.app.id as $id will be" + s" overridden as $kubernetesAppId") @@ -211,7 +221,7 @@ private[spark] class Client( driverSubmitter.ping() logInfo(s"Submitting local resources to driver pod for application " + s"$kubernetesAppId ...") - val submitRequest = buildSubmissionRequest() + val submitRequest = buildSubmissionRequest(submitterLocalFiles, submitterLocalJars) driverSubmitter.submitApplication(submitRequest) logInfo("Successfully submitted local resources and driver configuration to" + " driver pod.") @@ -502,25 +512,18 @@ private[spark] class Client( val maybeKeyStore = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_KEYSTORE) val resolvedSparkConf = sparkConf.clone() val (isLocalKeyStore, resolvedKeyStore) = maybeKeyStore.map(keyStore => { - val keyStoreURI = Utils.resolveURI(keyStore) - val isProvidedKeyStoreLocal = keyStoreURI.getScheme match { - case "file" | null => true - case "container" => false - case _ => throw new SparkException(s"Invalid KeyStore URI $keyStore; keyStore URI" + - " for submit server must have scheme file:// or container:// (no scheme defaults" + - " to file://)") - } - (isProvidedKeyStoreLocal, Option.apply(keyStoreURI.getPath)) - }).getOrElse((true, Option.empty[String])) + (KubernetesFileUtils.isUriLocalFile(keyStore), + Option.apply(Utils.resolveURI(keyStore).getPath)) + }).getOrElse((false, Option.empty[String])) resolvedKeyStore.foreach { resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_KEYSTORE, _) } sparkConf.get(KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE).foreach { trustStore => - val trustStoreURI = Utils.resolveURI(trustStore) - trustStoreURI.getScheme match { - case "file" | null => - resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE, trustStoreURI.getPath) - case _ => throw new SparkException(s"Invalid trustStore URI $trustStore; trustStore URI" + + if (KubernetesFileUtils.isUriLocalFile(trustStore)) { + resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE, + Utils.resolveURI(trustStore).getPath) + } else { + throw new SparkException(s"Invalid trustStore URI $trustStore; trustStore URI" + " for submit server must have no scheme, or scheme file://") } } @@ -673,23 +676,24 @@ private[spark] class Client( .build()) } - private def buildSubmissionRequest(): KubernetesCreateSubmissionRequest = { - val appResourceUri = Utils.resolveURI(mainAppResource) - val resolvedAppResource: AppResource = appResourceUri.getScheme match { - case "file" | null => - val appFile = new File(appResourceUri.getPath) - if (!appFile.isFile) { - throw new IllegalStateException("Provided local file path does not exist" + - s" or is not a file: ${appFile.getAbsolutePath}") - } + private def buildSubmissionRequest( + submitterLocalFiles: Iterable[String], + submitterLocalJars: Iterable[String]): KubernetesCreateSubmissionRequest = { + val mainResourceUri = Utils.resolveURI(mainAppResource) + val resolvedAppResource: AppResource = Option(mainResourceUri.getScheme) + .getOrElse("file") match { + case "file" => + val appFile = new File(mainResourceUri.getPath) val fileBytes = Files.toByteArray(appFile) val fileBase64 = Base64.encodeBase64String(fileBytes) UploadedAppResource(resourceBase64Contents = fileBase64, name = appFile.getName) - case "container" => ContainerAppResource(appResourceUri.getPath) + case "local" => ContainerAppResource(mainAppResource) case other => RemoteAppResource(other) } - val uploadJarsBase64Contents = compressFiles(uploadedJars) - val uploadFilesBase64Contents = compressFiles(uploadedFiles) + val uploadFilesBase64Contents = CompressionUtils.createTarGzip(submitterLocalFiles.map( + Utils.resolveURI(_).getPath)) + val uploadJarsBase64Contents = CompressionUtils.createTarGzip(submitterLocalJars.map( + Utils.resolveURI(_).getPath)) KubernetesCreateSubmissionRequest( appResource = resolvedAppResource, mainClass = mainClass, @@ -700,33 +704,6 @@ private[spark] class Client( uploadedFilesBase64Contents = uploadFilesBase64Contents) } - // Because uploaded files should be added to the working directory of the driver, they - // need to not have duplicate file names. They are added to the working directory so the - // user can reliably locate them in their application. This is similar in principle to how - // YARN handles its `spark.files` setting. - private def validateNoDuplicateUploadFileNames(uploadedFilesCommaSeparated: String): Unit = { - val pathsWithDuplicateNames = uploadedFilesCommaSeparated - .split(",") - .groupBy(new File(_).getName) - .filter(_._2.length > 1) - if (pathsWithDuplicateNames.nonEmpty) { - val pathsWithDuplicateNamesSorted = pathsWithDuplicateNames - .values - .flatten - .toList - .sortBy(new File(_).getName) - throw new SparkException("Cannot upload files with duplicate names via" + - s" ${KUBERNETES_DRIVER_UPLOAD_FILES.key}. The following paths have a duplicated" + - s" file name: ${pathsWithDuplicateNamesSorted.mkString(",")}") - } - } - - private def compressFiles(maybeFilePaths: Option[String]): Option[TarGzippedData] = { - maybeFilePaths - .map(_.split(",")) - .map(CompressionUtils.createTarGzip(_)) - } - private def buildDriverSubmissionClient( kubernetesClient: KubernetesClient, service: Service, @@ -813,22 +790,6 @@ private[spark] class Client( }).toMap }).getOrElse(Map.empty[String, String]) } - - private def checkForFilesExistence(maybePaths: Option[String]): Unit = { - maybePaths.foreach { paths => - paths.split(",").foreach { path => - val uri = Utils.resolveURI(path) - uri.getScheme match { - case "file" | null => - val file = new File(uri.getPath) - if (!file.isFile) { - throw new SparkException(s"""file "${uri}" does not exist!""") - } - case _ => - } - } - } - } } private[spark] object Client extends Logging { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index cb4cd42142ca4..ad83b0446538e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -91,30 +91,6 @@ package object config { .stringConf .createWithDefault("default") - private[spark] val KUBERNETES_DRIVER_UPLOAD_JARS = - ConfigBuilder("spark.kubernetes.driver.uploads.jars") - .doc(""" - | Comma-separated list of jars to send to the driver and - | all executors when submitting the application in cluster - | mode. - """.stripMargin) - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_UPLOAD_FILES = - ConfigBuilder("spark.kubernetes.driver.uploads.files") - .doc(""" - | Comma-separated list of files to send to the driver and - | all executors when submitting the application in cluster - | mode. The files are added in a flat hierarchy to the - | current working directory of the driver, having the same - | names as the names of the original files. Note that two - | files with the same name cannot be added, even if they - | were in different source directories on the client disk. - """.stripMargin) - .stringConf - .createOptional - // Note that while we set a default for this when we start up the // scheduler, the specific default value is dynamically determined // based on the executor memory. diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala index 6aeb851a16bf4..0d2d1a1c6f5e3 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala @@ -26,8 +26,8 @@ case class KubernetesCreateSubmissionRequest( appArgs: Array[String], sparkProperties: Map[String, String], secret: String, - uploadedJarsBase64Contents: Option[TarGzippedData], - uploadedFilesBase64Contents: Option[TarGzippedData]) extends SubmitRestProtocolRequest { + uploadedJarsBase64Contents: TarGzippedData, + uploadedFilesBase64Contents: TarGzippedData) extends SubmitRestProtocolRequest { message = "create" clientSparkVersion = SPARK_VERSION } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesFileUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesFileUtils.scala new file mode 100644 index 0000000000000..f30be1535f81c --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesFileUtils.scala @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import org.apache.spark.util.Utils + +private[spark] object KubernetesFileUtils { + + private def filterUriStringsByScheme( + uris: Iterable[String], schemeFilter: (String => Boolean)): Iterable[String] = { + uris.filter(uri => schemeFilter(Option(Utils.resolveURI(uri).getScheme).getOrElse("file"))) + } + + def getNonSubmitterLocalFiles(uris: Iterable[String]): Iterable[String] = { + filterUriStringsByScheme(uris, _ != "file") + } + + def getOnlyContainerLocalFiles(uris: Iterable[String]): Iterable[String] = { + filterUriStringsByScheme(uris, _ == "local") + } + + def getOnlySubmitterLocalFiles(uris: Iterable[String]): Iterable[String] = { + filterUriStringsByScheme(uris, _ == "file") + } + + def isUriLocalFile(uri: String): Boolean = { + Option(Utils.resolveURI(uri).getScheme).getOrElse("file") == "file" + } + +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala index c5a7e27b15927..f0b01b2320982 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala @@ -16,14 +16,14 @@ */ package org.apache.spark.deploy.rest.kubernetes -import java.io.File +import java.io.{File, FileOutputStream, StringReader} import java.net.URI import java.nio.file.Paths import java.util.concurrent.CountDownLatch import javax.servlet.http.{HttpServletRequest, HttpServletResponse} import com.google.common.base.Charsets -import com.google.common.io.Files +import com.google.common.io.{BaseEncoding, ByteStreams, Files} import org.apache.commons.codec.binary.Base64 import scala.collection.mutable import scala.collection.mutable.ArrayBuffer @@ -145,46 +145,73 @@ private[spark] class KubernetesSparkRestServer( } else { requestMessage match { case KubernetesCreateSubmissionRequest( - appResource, - mainClass, - appArgs, - sparkProperties, - secret, - uploadedJars, - uploadedFiles) => + appResource, + mainClass, + appArgs, + sparkProperties, + secret, + uploadedJars, + uploadedFiles) => val decodedSecret = Base64.decodeBase64(secret) if (!expectedApplicationSecret.sameElements(decodedSecret)) { responseServlet.setStatus(HttpServletResponse.SC_UNAUTHORIZED) handleError("Unauthorized to submit application.") } else { val tempDir = Utils.createTempDir() - val appResourcePath = resolvedAppResource(appResource, tempDir) + val resolvedAppResource = resolveAppResource(appResource, tempDir) val writtenJars = writeUploadedJars(uploadedJars, tempDir) val writtenFiles = writeUploadedFiles(uploadedFiles) val resolvedSparkProperties = new mutable.HashMap[String, String] resolvedSparkProperties ++= sparkProperties - - // Resolve driver classpath and jars val originalJars = sparkProperties.get("spark.jars") .map(_.split(",")) - .getOrElse(Array.empty[String]) - val resolvedJars = writtenJars ++ originalJars ++ Array(appResourcePath) - val sparkJars = new File(sparkHome, "jars").listFiles().map(_.getAbsolutePath) + .getOrElse(Array.empty) + + // The driver at this point has handed us the value of spark.jars verbatim as + // specified in spark-submit. At this point, remove all jars that were local + // to the submitting user's disk, and replace them with the paths that were + // written to disk above. + val onlyContainerLocalOrRemoteJars = KubernetesFileUtils + .getNonSubmitterLocalFiles(originalJars) + val resolvedJars = (writtenJars ++ + onlyContainerLocalOrRemoteJars ++ + Array(resolvedAppResource.sparkJarPath)).toSet + if (resolvedJars.nonEmpty) { + resolvedSparkProperties("spark.jars") = resolvedJars.mkString(",") + } else { + resolvedSparkProperties.remove("spark.jars") + } + + // Determining the driver classpath is similar. It's the combination of: + // - Jars written from uploads + // - Jars in (spark.jars + mainAppResource) that has a "local" prefix + // - spark.driver.extraClasspath + // - Spark core jars from the installation + val sparkCoreJars = new File(sparkHome, "jars").listFiles().map(_.getAbsolutePath) val driverExtraClasspath = sparkProperties .get("spark.driver.extraClassPath") .map(_.split(",")) .getOrElse(Array.empty[String]) + val onlyContainerLocalJars = KubernetesFileUtils + .getOnlyContainerLocalFiles(originalJars) val driverClasspath = driverExtraClasspath ++ - resolvedJars ++ - sparkJars - resolvedSparkProperties("spark.jars") = resolvedJars.mkString(",") + Seq(resolvedAppResource.localPath) ++ + writtenJars ++ + onlyContainerLocalJars ++ + sparkCoreJars - // Resolve spark.files + // Resolve spark.files similarly to spark.jars. val originalFiles = sparkProperties.get("spark.files") .map(_.split(",")) .getOrElse(Array.empty[String]) - val resolvedFiles = originalFiles ++ writtenFiles - resolvedSparkProperties("spark.files") = resolvedFiles.mkString(",") + val onlyContainerLocalOrRemoteFiles = KubernetesFileUtils + .getNonSubmitterLocalFiles(originalFiles) + val resolvedFiles = writtenFiles ++ onlyContainerLocalOrRemoteFiles + if (resolvedFiles.nonEmpty) { + resolvedSparkProperties("spark.files") = resolvedFiles.mkString(",") + } else { + resolvedSparkProperties.remove("spark.files") + } val command = new ArrayBuffer[String] command += javaExecutable @@ -235,35 +262,50 @@ private[spark] class KubernetesSparkRestServer( } } - private def writeUploadedJars(files: Option[TarGzippedData], rootTempDir: File): + private def writeUploadedJars(jars: TarGzippedData, rootTempDir: File): Seq[String] = { val resolvedDirectory = new File(rootTempDir, "jars") if (!resolvedDirectory.mkdir()) { throw new IllegalStateException(s"Failed to create jars dir at " + resolvedDirectory.getAbsolutePath) } - writeBase64ContentsToFiles(files, resolvedDirectory) + CompressionUtils.unpackAndWriteCompressedFiles(jars, resolvedDirectory) } - private def writeUploadedFiles(files: Option[TarGzippedData]): Seq[String] = { + private def writeUploadedFiles(files: TarGzippedData): Seq[String] = { val workingDir = Paths.get("").toFile.getAbsoluteFile - writeBase64ContentsToFiles(files, workingDir) + CompressionUtils.unpackAndWriteCompressedFiles(files, workingDir) } - def resolvedAppResource(appResource: AppResource, tempDir: File): String = { - val appResourcePath = appResource match { + + /** + * Retrieve the path on the driver container where the main app resource is, and what value it + * ought to have in the spark.jars property. The two may be different because for non-local + * dependencies, we have to fetch the resource (if it is not "local") but still want to use + * the full URI in spark.jars. + */ + private def resolveAppResource(appResource: AppResource, tempDir: File): + ResolvedAppResource = { + appResource match { case UploadedAppResource(resourceContentsBase64, resourceName) => val resourceFile = new File(tempDir, resourceName) val resourceFilePath = resourceFile.getAbsolutePath if (resourceFile.createNewFile()) { - val resourceContentsBytes = Base64.decodeBase64(resourceContentsBase64) - Files.write(resourceContentsBytes, resourceFile) - resourceFile.getAbsolutePath + Utils.tryWithResource(new StringReader(resourceContentsBase64)) { reader => + Utils.tryWithResource(new FileOutputStream(resourceFile)) { os => + Utils.tryWithResource(BaseEncoding.base64().decodingStream(reader)) { + decodingStream => + ByteStreams.copy(decodingStream, os) + } + } + } + ResolvedAppResource(resourceFile.getAbsolutePath, resourceFile.getAbsolutePath) } else { throw new IllegalStateException(s"Failed to write main app resource file" + s" to $resourceFilePath") } - case ContainerAppResource(resource) => resource + case ContainerAppResource(resource) => + ResolvedAppResource(Utils.resolveURI(resource).getPath, resource) case RemoteAppResource(resource) => Utils.fetchFile(resource, tempDir, conf, securityManager, SparkHadoopUtil.get.newConfiguration(conf), @@ -275,19 +317,12 @@ private[spark] class KubernetesSparkRestServer( throw new IllegalStateException(s"Main app resource is not a file or" + s" does not exist at $downloadedFilePath") } - downloadedFilePath + ResolvedAppResource(downloadedFilePath, resource) } - appResourcePath } } - private def writeBase64ContentsToFiles( - maybeCompressedFiles: Option[TarGzippedData], - rootDir: File): Seq[String] = { - maybeCompressedFiles.map { compressedFiles => - CompressionUtils.unpackAndWriteCompressedFiles(compressedFiles, rootDir) - }.getOrElse(Seq.empty[String]) - } + private case class ResolvedAppResource(localPath: String, sparkJarPath: String) } private[spark] object KubernetesSparkRestServer { diff --git a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml index 0ec2f36075db3..7f4d935e0e243 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml @@ -44,12 +44,6 @@ pom - - org.apache.spark - spark-examples_${scala.binary.version} - ${project.version} - provided - diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index e6b2e31568653..7e700b569a3fb 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -161,7 +161,7 @@ private[spark] class Client( driverServiceManager.handleSubmissionError( new SparkException("Submission shutting down early..."))) try { - val sslConfigurationProvider = new SslConfigurationProvider( + val sslConfigurationProvider = new DriverSubmitSslConfigurationProvider( sparkConf, kubernetesAppId, kubernetesClient, kubernetesResourceCleaner) val submitServerSecret = kubernetesClient.secrets().createNew() .withNewMetadata() @@ -182,7 +182,7 @@ private[spark] class Client( configureOwnerReferences( kubernetesClient, submitServerSecret, - sslConfiguration.sslSecrets, + sslConfiguration.sslSecret, driverPod, driverService) submitApplicationToDriverServer( @@ -209,7 +209,6 @@ private[spark] class Client( Utils.tryLogNonFatalError { driverServiceManager.stop() } - // Remove the shutdown hooks that would be redundant Utils.tryLogNonFatalError { ShutdownHookManager.removeShutdownHook(resourceCleanShutdownHook) @@ -236,7 +235,7 @@ private[spark] class Client( private def submitApplicationToDriverServer( kubernetesClient: KubernetesClient, driverServiceManager: DriverServiceManager, - sslConfiguration: SslConfiguration, + sslConfiguration: DriverSubmitSslConfiguration, driverService: Service, submitterLocalFiles: Iterable[String], submitterLocalJars: Iterable[String], @@ -298,7 +297,7 @@ private[spark] class Client( customLabels: Map[String, String], customAnnotations: Map[String, String], submitServerSecret: Secret, - sslConfiguration: SslConfiguration): (Pod, Service) = { + sslConfiguration: DriverSubmitSslConfiguration): (Pod, Service) = { val driverKubernetesSelectors = (Map( SPARK_DRIVER_LABEL -> kubernetesAppId, SPARK_APP_ID_LABEL -> kubernetesAppId, @@ -349,7 +348,7 @@ private[spark] class Client( private def configureOwnerReferences( kubernetesClient: KubernetesClient, submitServerSecret: Secret, - sslSecrets: Array[Secret], + sslSecret: Option[Secret], driverPod: Pod, driverService: Service): Service = { val driverPodOwnerRef = new OwnerReferenceBuilder() @@ -359,7 +358,7 @@ private[spark] class Client( .withKind(driverPod.getKind) .withController(true) .build() - sslSecrets.foreach(secret => { + sslSecret.foreach(secret => { val updatedSecret = kubernetesClient.secrets().withName(secret.getMetadata.getName).edit() .editMetadata() .addToOwnerReferences(driverPodOwnerRef) @@ -425,10 +424,10 @@ private[spark] class Client( driverKubernetesSelectors: Map[String, String], customAnnotations: Map[String, String], submitServerSecret: Secret, - sslConfiguration: SslConfiguration): Pod = { + sslConfiguration: DriverSubmitSslConfiguration): Pod = { val containerPorts = buildContainerPorts() val probePingHttpGet = new HTTPGetActionBuilder() - .withScheme(if (sslConfiguration.sslOptions.enabled) "HTTPS" else "HTTP") + .withScheme(if (sslConfiguration.enabled) "HTTPS" else "HTTP") .withPath("/v1/submissions/ping") .withNewPort(SUBMISSION_SERVER_PORT_NAME) .build() @@ -452,7 +451,7 @@ private[spark] class Client( .withSecretName(submitServerSecret.getMetadata.getName) .endSecret() .endVolume() - .addToVolumes(sslConfiguration.sslPodVolumes: _*) + .addToVolumes(sslConfiguration.sslPodVolume.toSeq: _*) .withServiceAccount(serviceAccount.getOrElse("default")) .addNewContainer() .withName(DRIVER_CONTAINER_NAME) @@ -463,7 +462,7 @@ private[spark] class Client( .withMountPath(secretDirectory) .withReadOnly(true) .endVolumeMount() - .addToVolumeMounts(sslConfiguration.sslPodVolumeMounts: _*) + .addToVolumeMounts(sslConfiguration.sslPodVolumeMount.toSeq: _*) .addNewEnv() .withName(ENV_SUBMISSION_SECRET_LOCATION) .withValue(s"$secretDirectory/$SUBMISSION_APP_SECRET_NAME") @@ -661,7 +660,7 @@ private[spark] class Client( kubernetesClient: KubernetesClient, driverServiceManager: DriverServiceManager, service: Service, - sslConfiguration: SslConfiguration): KubernetesSparkRestApi = { + sslConfiguration: DriverSubmitSslConfiguration): KubernetesSparkRestApi = { val serviceUris = driverServiceManager.getDriverServiceSubmissionServerUris(service) require(serviceUris.nonEmpty, "No uris found to contact the driver!") HttpClientUtil.createClient[KubernetesSparkRestApi]( diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverSubmitSslConfigurationProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverSubmitSslConfigurationProvider.scala new file mode 100644 index 0000000000000..a83c9a9896a08 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverSubmitSslConfigurationProvider.scala @@ -0,0 +1,353 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import java.io.{File, FileInputStream} +import java.security.{KeyStore, SecureRandom} +import javax.net.ssl.{SSLContext, TrustManagerFactory, X509TrustManager} + +import com.google.common.base.Charsets +import com.google.common.io.{BaseEncoding, Files} +import io.fabric8.kubernetes.api.model.{EnvVar, EnvVarBuilder, Secret, Volume, VolumeBuilder, VolumeMount, VolumeMountBuilder} +import io.fabric8.kubernetes.client.KubernetesClient +import scala.collection.JavaConverters._ + +import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException, SSLOptions} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.rest.kubernetes.{KubernetesFileUtils, PemsToKeyStoreConverter} +import org.apache.spark.util.Utils + +/** + * Raw SSL configuration as the user specified in SparkConf for setting up the driver + * submission server. + */ +private case class DriverSubmitSslConfigurationParameters( + storeBasedSslOptions: SSLOptions, + isKeyStoreLocalFile: Boolean, + driverSubmitServerKeyPem: Option[File], + isDriverSubmitKeyPemLocalFile: Boolean, + driverSubmitServerCertPem: Option[File], + isDriverSubmitServerCertPemLocalFile: Boolean, + submissionClientCertPem: Option[File]) + +/** + * Resolved from translating options provided in + * {@link DriverSubmitSslConfigurationParameters} into Kubernetes volumes, environment variables + * for the driver pod, Kubernetes secrets, client-side trust managers, and the client-side SSL + * context. This is used for setting up the SSL connection for the submission server where the + * application local dependencies and configuration is provided from. + */ +private[spark] case class DriverSubmitSslConfiguration( + enabled: Boolean, + sslPodEnvVars: Array[EnvVar], + sslPodVolume: Option[Volume], + sslPodVolumeMount: Option[VolumeMount], + sslSecret: Option[Secret], + driverSubmitClientTrustManager: Option[X509TrustManager], + driverSubmitClientSslContext: SSLContext) + +/** + * Provides the SSL configuration for bootstrapping the driver pod to listen for the driver + * submission over SSL, and then supply the client-side configuration for establishing the + * SSL connection. This is done in two phases: first, interpreting the raw configuration + * values from the SparkConf object; then second, converting the configuration parameters + * into the appropriate Kubernetes constructs, namely the volume and volume mount to add to the + * driver pod, and the secret to create at the API server; and finally, constructing the + * client-side trust manager and SSL context for sending the local dependencies. + */ +private[spark] class DriverSubmitSslConfigurationProvider( + sparkConf: SparkConf, + kubernetesAppId: String, + kubernetesClient: KubernetesClient, + kubernetesResourceCleaner: KubernetesResourceCleaner) { + private val SECURE_RANDOM = new SecureRandom() + private val sslSecretsName = s"$SUBMISSION_SSL_SECRETS_PREFIX-$kubernetesAppId" + private val sslSecretsDirectory = DRIVER_CONTAINER_SUBMISSION_SECRETS_BASE_DIR + + s"/$kubernetesAppId-ssl" + + def getSslConfiguration(): DriverSubmitSslConfiguration = { + val sslConfigurationParameters = parseSslConfigurationParameters() + if (sslConfigurationParameters.storeBasedSslOptions.enabled) { + val storeBasedSslOptions = sslConfigurationParameters.storeBasedSslOptions + val keyStoreSecret = resolveFileToSecretMapping( + sslConfigurationParameters.isKeyStoreLocalFile, + SUBMISSION_SSL_KEYSTORE_SECRET_NAME, + storeBasedSslOptions.keyStore, + "KeyStore") + val keyStorePathEnv = resolveFilePathEnv( + sslConfigurationParameters.isKeyStoreLocalFile, + ENV_SUBMISSION_KEYSTORE_FILE, + SUBMISSION_SSL_KEYSTORE_SECRET_NAME, + storeBasedSslOptions.keyStore) + val storePasswordSecret = storeBasedSslOptions.keyStorePassword.map(password => { + val passwordBase64 = BaseEncoding.base64().encode(password.getBytes(Charsets.UTF_8)) + (SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME, passwordBase64) + }).toMap + val storePasswordLocationEnv = storeBasedSslOptions.keyStorePassword.map(_ => { + new EnvVarBuilder() + .withName(ENV_SUBMISSION_KEYSTORE_PASSWORD_FILE) + .withValue(s"$sslSecretsDirectory/$SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME") + .build() + }) + val storeKeyPasswordSecret = storeBasedSslOptions.keyPassword.map(password => { + val passwordBase64 = BaseEncoding.base64().encode(password.getBytes(Charsets.UTF_8)) + (SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME, passwordBase64) + }).toMap + val storeKeyPasswordEnv = storeBasedSslOptions.keyPassword.map(_ => { + new EnvVarBuilder() + .withName(ENV_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE) + .withValue(s"$sslSecretsDirectory/$SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME") + .build() + }) + val storeTypeEnv = storeBasedSslOptions.keyStoreType.map(storeType => { + new EnvVarBuilder() + .withName(ENV_SUBMISSION_KEYSTORE_TYPE) + .withValue(storeType) + .build() + }) + val keyPemSecret = resolveFileToSecretMapping( + sslConfigurationParameters.isDriverSubmitKeyPemLocalFile, + secretName = SUBMISSION_SSL_KEY_PEM_SECRET_NAME, + secretType = "Key pem", + secretFile = sslConfigurationParameters.driverSubmitServerKeyPem) + val keyPemLocationEnv = resolveFilePathEnv( + sslConfigurationParameters.isDriverSubmitKeyPemLocalFile, + envName = ENV_SUBMISSION_KEY_PEM_FILE, + secretName = SUBMISSION_SSL_KEY_PEM_SECRET_NAME, + maybeFile = sslConfigurationParameters.driverSubmitServerKeyPem) + val certPemSecret = resolveFileToSecretMapping( + sslConfigurationParameters.isDriverSubmitServerCertPemLocalFile, + secretName = SUBMISSION_SSL_CERT_PEM_SECRET_NAME, + secretType = "Cert pem", + secretFile = sslConfigurationParameters.driverSubmitServerCertPem) + val certPemLocationEnv = resolveFilePathEnv( + sslConfigurationParameters.isDriverSubmitServerCertPemLocalFile, + envName = ENV_SUBMISSION_CERT_PEM_FILE, + secretName = SUBMISSION_SSL_CERT_PEM_SECRET_NAME, + maybeFile = sslConfigurationParameters.driverSubmitServerCertPem) + val useSslEnv = new EnvVarBuilder() + .withName(ENV_SUBMISSION_USE_SSL) + .withValue("true") + .build() + val sslVolume = new VolumeBuilder() + .withName(SUBMISSION_SSL_SECRETS_VOLUME_NAME) + .withNewSecret() + .withSecretName(sslSecretsName) + .endSecret() + .build() + val sslVolumeMount = new VolumeMountBuilder() + .withName(SUBMISSION_SSL_SECRETS_VOLUME_NAME) + .withReadOnly(true) + .withMountPath(sslSecretsDirectory) + .build() + val allSecrets = keyStoreSecret ++ + storePasswordSecret ++ + storeKeyPasswordSecret ++ + keyPemSecret ++ + certPemSecret + val sslSecret = kubernetesClient.secrets().createNew() + .withNewMetadata() + .withName(sslSecretsName) + .endMetadata() + .withData(allSecrets.asJava) + .withType("Opaque") + .done() + kubernetesResourceCleaner.registerOrUpdateResource(sslSecret) + val allSslEnvs = keyStorePathEnv ++ + storePasswordLocationEnv ++ + storeKeyPasswordEnv ++ + storeTypeEnv ++ + keyPemLocationEnv ++ + Array(useSslEnv) ++ + certPemLocationEnv + val (driverSubmitClientTrustManager, driverSubmitClientSslContext) = + buildSslConnectionConfiguration(sslConfigurationParameters) + DriverSubmitSslConfiguration( + true, + allSslEnvs.toArray, + Some(sslVolume), + Some(sslVolumeMount), + Some(sslSecret), + driverSubmitClientTrustManager, + driverSubmitClientSslContext) + } else { + DriverSubmitSslConfiguration( + false, + Array[EnvVar](), + None, + None, + None, + None, + SSLContext.getDefault) + } + } + + private def resolveFilePathEnv( + isLocal: Boolean, + envName: String, + secretName: String, + maybeFile: Option[File]): Option[EnvVar] = { + maybeFile.map(file => { + val pemPath = if (isLocal) { + s"$sslSecretsDirectory/$secretName" + } else { + file.getAbsolutePath + } + new EnvVarBuilder() + .withName(envName) + .withValue(pemPath) + .build() + }) + } + + private def resolveFileToSecretMapping( + isLocal: Boolean, + secretName: String, + secretFile: Option[File], + secretType: String): Map[String, String] = { + secretFile.filter(_ => isLocal).map(file => { + if (!file.isFile) { + throw new SparkException(s"$secretType specified at ${file.getAbsolutePath} is not" + + s" a file or does not exist.") + } + val keyStoreBytes = Files.toByteArray(file) + (secretName, BaseEncoding.base64().encode(keyStoreBytes)) + }).toMap + } + + private def parseSslConfigurationParameters(): DriverSubmitSslConfigurationParameters = { + val maybeKeyStore = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE) + val maybeTrustStore = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE) + val maybeKeyPem = sparkConf.get(DRIVER_SUBMIT_SSL_KEY_PEM) + val maybeDriverSubmitServerCertPem = sparkConf.get(DRIVER_SUBMIT_SSL_SERVER_CERT_PEM) + val maybeDriverSubmitClientCertPem = sparkConf.get(DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM) + validatePemsDoNotConflictWithStores( + maybeKeyStore, + maybeTrustStore, + maybeKeyPem, + maybeDriverSubmitServerCertPem, + maybeDriverSubmitClientCertPem) + val resolvedSparkConf = sparkConf.clone() + val (isLocalKeyStore, resolvedKeyStore) = resolveLocalFile(maybeKeyStore, "keyStore") + resolvedKeyStore.foreach { + resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE, _) + } + val (isLocalDriverSubmitServerCertPem, resolvedDriverSubmitServerCertPem) = + resolveLocalFile(maybeDriverSubmitServerCertPem, "server cert PEM") + val (isLocalKeyPem, resolvedKeyPem) = resolveLocalFile(maybeKeyPem, "key PEM") + maybeTrustStore.foreach { trustStore => + require(KubernetesFileUtils.isUriLocalFile(trustStore), s"Invalid trustStore URI" + + s" $trustStore; trustStore URI for submit server must have no scheme, or scheme file://") + resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE, + Utils.resolveURI(trustStore).getPath) + } + val driverSubmitClientCertPem = maybeDriverSubmitClientCertPem.map { driverSubmitClientCert => + require(KubernetesFileUtils.isUriLocalFile(driverSubmitClientCert), + "Invalid client certificate PEM URI $driverSubmitClientCert: client certificate URI must" + + " have no scheme, or scheme file://") + Utils.resolveURI(driverSubmitClientCert).getPath + } + val securityManager = new SparkSecurityManager(resolvedSparkConf) + val storeBasedSslOptions = securityManager.getSSLOptions(DRIVER_SUBMIT_SSL_NAMESPACE) + DriverSubmitSslConfigurationParameters( + storeBasedSslOptions, + isLocalKeyStore, + resolvedKeyPem.map(new File(_)), + isLocalKeyPem, + resolvedDriverSubmitServerCertPem.map(new File(_)), + isLocalDriverSubmitServerCertPem, + driverSubmitClientCertPem.map(new File(_))) + } + + private def resolveLocalFile(file: Option[String], + fileType: String): (Boolean, Option[String]) = { + file.map { f => + require(isValidSslFileScheme(f), s"Invalid $fileType URI $f, $fileType URI" + + s" for submit server must have scheme file:// or local:// (no scheme defaults to file://") + val isLocal = KubernetesFileUtils.isUriLocalFile(f) + (isLocal, Option.apply(Utils.resolveURI(f).getPath)) + }.getOrElse(false, None) + } + + private def validatePemsDoNotConflictWithStores( + maybeKeyStore: Option[String], + maybeTrustStore: Option[String], + maybeKeyPem: Option[String], + maybeDriverSubmitServerCertPem: Option[String], + maybeSubmitClientCertPem: Option[String]) = { + maybeKeyPem.orElse(maybeDriverSubmitServerCertPem).foreach { _ => + require(maybeKeyStore.isEmpty, + "Cannot specify server PEM files and key store files; must specify only one or the other.") + } + maybeKeyPem.foreach { _ => + require(maybeDriverSubmitServerCertPem.isDefined, + "When specifying the key PEM file, the server certificate PEM file must also be provided.") + } + maybeDriverSubmitServerCertPem.foreach { _ => + require(maybeKeyPem.isDefined, + "When specifying the server certificate PEM file, the key PEM file must also be provided.") + } + maybeTrustStore.foreach { _ => + require(maybeSubmitClientCertPem.isEmpty, + "Cannot specify client cert file and truststore file; must specify only one or the other.") + } + } + + private def isValidSslFileScheme(rawUri: String): Boolean = { + val resolvedScheme = Option.apply(Utils.resolveURI(rawUri).getScheme).getOrElse("file") + resolvedScheme == "file" || resolvedScheme == "local" + } + + private def buildSslConnectionConfiguration( + sslConfigurationParameters: DriverSubmitSslConfigurationParameters) + : (Option[X509TrustManager], SSLContext) = { + val maybeTrustStore = sslConfigurationParameters.submissionClientCertPem.map { certPem => + PemsToKeyStoreConverter.convertCertPemToTrustStore( + certPem, + sslConfigurationParameters.storeBasedSslOptions.trustStoreType) + }.orElse(sslConfigurationParameters.storeBasedSslOptions.trustStore.map { trustStoreFile => + if (!trustStoreFile.isFile) { + throw new SparkException(s"TrustStore file at ${trustStoreFile.getAbsolutePath}" + + s" does not exist or is not a file.") + } + val trustStore = KeyStore.getInstance( + sslConfigurationParameters + .storeBasedSslOptions + .trustStoreType + .getOrElse(KeyStore.getDefaultType)) + Utils.tryWithResource(new FileInputStream(trustStoreFile)) { trustStoreStream => + val trustStorePassword = sslConfigurationParameters + .storeBasedSslOptions + .trustStorePassword + .map(_.toCharArray) + .orNull + trustStore.load(trustStoreStream, trustStorePassword) + } + trustStore + }) + maybeTrustStore.map { trustStore => + val trustManagerFactory = TrustManagerFactory.getInstance( + TrustManagerFactory.getDefaultAlgorithm) + trustManagerFactory.init(trustStore) + val trustManagers = trustManagerFactory.getTrustManagers + val sslContext = SSLContext.getInstance("TLSv1.2") + sslContext.init(null, trustManagers, SECURE_RANDOM) + (Option.apply(trustManagers(0).asInstanceOf[X509TrustManager]), sslContext) + }.getOrElse((Option.empty[X509TrustManager], SSLContext.getDefault)) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SslConfigurationProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SslConfigurationProvider.scala deleted file mode 100644 index 4bbe3ed385a4d..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SslConfigurationProvider.scala +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes - -import java.io.FileInputStream -import java.security.{KeyStore, SecureRandom} -import javax.net.ssl.{SSLContext, TrustManagerFactory, X509TrustManager} - -import com.google.common.base.Charsets -import com.google.common.io.{BaseEncoding, Files} -import io.fabric8.kubernetes.api.model.{EnvVar, EnvVarBuilder, Secret, Volume, VolumeBuilder, VolumeMount, VolumeMountBuilder} -import io.fabric8.kubernetes.client.KubernetesClient -import scala.collection.JavaConverters._ -import scala.collection.mutable - -import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException, SSLOptions} -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.util.Utils - -private[spark] case class SslConfiguration( - sslOptions: SSLOptions, - isKeyStoreLocalFile: Boolean, - sslPodEnvVars: Array[EnvVar], - sslPodVolumes: Array[Volume], - sslPodVolumeMounts: Array[VolumeMount], - sslSecrets: Array[Secret], - driverSubmitClientTrustManager: Option[X509TrustManager], - driverSubmitClientSslContext: SSLContext) - -private[spark] class SslConfigurationProvider( - sparkConf: SparkConf, - kubernetesAppId: String, - kubernetesClient: KubernetesClient, - kubernetesResourceCleaner: KubernetesResourceCleaner) { - private val SECURE_RANDOM = new SecureRandom() - private val sslSecretsName = s"$SUBMISSION_SSL_SECRETS_PREFIX-$kubernetesAppId" - private val sslSecretsDirectory = DRIVER_CONTAINER_SUBMISSION_SECRETS_BASE_DIR + - s"/$kubernetesAppId-ssl" - - def getSslConfiguration(): SslConfiguration = { - val (driverSubmitSslOptions, isKeyStoreLocalFile) = parseDriverSubmitSslOptions() - if (driverSubmitSslOptions.enabled) { - val sslSecretsMap = mutable.HashMap[String, String]() - val sslEnvs = mutable.Buffer[EnvVar]() - val secrets = mutable.Buffer[Secret]() - driverSubmitSslOptions.keyStore.foreach(store => { - val resolvedKeyStoreFile = if (isKeyStoreLocalFile) { - if (!store.isFile) { - throw new SparkException(s"KeyStore specified at $store is not a file or" + - s" does not exist.") - } - val keyStoreBytes = Files.toByteArray(store) - val keyStoreBase64 = BaseEncoding.base64().encode(keyStoreBytes) - sslSecretsMap += (SUBMISSION_SSL_KEYSTORE_SECRET_NAME -> keyStoreBase64) - s"$sslSecretsDirectory/$SUBMISSION_SSL_KEYSTORE_SECRET_NAME" - } else { - store.getAbsolutePath - } - sslEnvs += new EnvVarBuilder() - .withName(ENV_SUBMISSION_KEYSTORE_FILE) - .withValue(resolvedKeyStoreFile) - .build() - }) - driverSubmitSslOptions.keyStorePassword.foreach(password => { - val passwordBase64 = BaseEncoding.base64().encode(password.getBytes(Charsets.UTF_8)) - sslSecretsMap += (SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME -> passwordBase64) - sslEnvs += new EnvVarBuilder() - .withName(ENV_SUBMISSION_KEYSTORE_PASSWORD_FILE) - .withValue(s"$sslSecretsDirectory/$SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME") - .build() - }) - driverSubmitSslOptions.keyPassword.foreach(password => { - val passwordBase64 = BaseEncoding.base64().encode(password.getBytes(Charsets.UTF_8)) - sslSecretsMap += (SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME -> passwordBase64) - sslEnvs += new EnvVarBuilder() - .withName(ENV_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE) - .withValue(s"$sslSecretsDirectory/$SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME") - .build() - }) - driverSubmitSslOptions.keyStoreType.foreach(storeType => { - sslEnvs += new EnvVarBuilder() - .withName(ENV_SUBMISSION_KEYSTORE_TYPE) - .withValue(storeType) - .build() - }) - sslEnvs += new EnvVarBuilder() - .withName(ENV_SUBMISSION_USE_SSL) - .withValue("true") - .build() - val sslVolume = new VolumeBuilder() - .withName(SUBMISSION_SSL_SECRETS_VOLUME_NAME) - .withNewSecret() - .withSecretName(sslSecretsName) - .endSecret() - .build() - val sslVolumeMount = new VolumeMountBuilder() - .withName(SUBMISSION_SSL_SECRETS_VOLUME_NAME) - .withReadOnly(true) - .withMountPath(sslSecretsDirectory) - .build() - val sslSecrets = kubernetesClient.secrets().createNew() - .withNewMetadata() - .withName(sslSecretsName) - .endMetadata() - .withData(sslSecretsMap.asJava) - .withType("Opaque") - .done() - kubernetesResourceCleaner.registerOrUpdateResource(sslSecrets) - secrets += sslSecrets - val (driverSubmitClientTrustManager, driverSubmitClientSslContext) = - buildSslConnectionConfiguration(driverSubmitSslOptions) - SslConfiguration( - driverSubmitSslOptions, - isKeyStoreLocalFile, - sslEnvs.toArray, - Array(sslVolume), - Array(sslVolumeMount), - secrets.toArray, - driverSubmitClientTrustManager, - driverSubmitClientSslContext) - } else { - SslConfiguration( - driverSubmitSslOptions, - isKeyStoreLocalFile, - Array[EnvVar](), - Array[Volume](), - Array[VolumeMount](), - Array[Secret](), - None, - SSLContext.getDefault) - } - } - - private def parseDriverSubmitSslOptions(): (SSLOptions, Boolean) = { - val maybeKeyStore = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_KEYSTORE) - val resolvedSparkConf = sparkConf.clone() - val (isLocalKeyStore, resolvedKeyStore) = maybeKeyStore.map(keyStore => { - val keyStoreURI = Utils.resolveURI(keyStore) - val isProvidedKeyStoreLocal = keyStoreURI.getScheme match { - case "file" | null => true - case "local" => false - case _ => throw new SparkException(s"Invalid KeyStore URI $keyStore; keyStore URI" + - " for submit server must have scheme file:// or local:// (no scheme defaults" + - " to file://)") - } - (isProvidedKeyStoreLocal, Option.apply(keyStoreURI.getPath)) - }).getOrElse((false, Option.empty[String])) - resolvedKeyStore.foreach { - resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_KEYSTORE, _) - } - sparkConf.get(KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE).foreach { trustStore => - val trustStoreURI = Utils.resolveURI(trustStore) - trustStoreURI.getScheme match { - case "file" | null => - resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE, trustStoreURI.getPath) - case _ => throw new SparkException(s"Invalid trustStore URI $trustStore; trustStore URI" + - " for submit server must have no scheme, or scheme file://") - } - } - val securityManager = new SparkSecurityManager(resolvedSparkConf) - (securityManager.getSSLOptions(KUBERNETES_SUBMIT_SSL_NAMESPACE), isLocalKeyStore) - } - - private def buildSslConnectionConfiguration(driverSubmitSslOptions: SSLOptions): - (Option[X509TrustManager], SSLContext) = { - driverSubmitSslOptions.trustStore.map(trustStoreFile => { - val trustManagerFactory = TrustManagerFactory.getInstance( - TrustManagerFactory.getDefaultAlgorithm) - val trustStore = KeyStore.getInstance( - driverSubmitSslOptions.trustStoreType.getOrElse(KeyStore.getDefaultType)) - if (!trustStoreFile.isFile) { - throw new SparkException(s"TrustStore file at ${trustStoreFile.getAbsolutePath}" + - s" does not exist or is not a file.") - } - Utils.tryWithResource(new FileInputStream(trustStoreFile)) { trustStoreStream => - driverSubmitSslOptions.trustStorePassword match { - case Some(password) => - trustStore.load(trustStoreStream, password.toCharArray) - case None => trustStore.load(trustStoreStream, null) - } - } - trustManagerFactory.init(trustStore) - val trustManagers = trustManagerFactory.getTrustManagers - val sslContext = SSLContext.getInstance("TLSv1.2") - sslContext.init(null, trustManagers, SECURE_RANDOM) - (Option.apply(trustManagers(0).asInstanceOf[X509TrustManager]), sslContext) - }).getOrElse((Option.empty[X509TrustManager], SSLContext.getDefault)) - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index e33c761ecc8d1..3328809e186e4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -197,26 +197,51 @@ package object config { .timeConf(TimeUnit.SECONDS) .createWithDefault(60L) - private[spark] val KUBERNETES_DRIVER_SUBMIT_KEYSTORE = - ConfigBuilder("spark.ssl.kubernetes.submission.keyStore") + private[spark] val KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE = + ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.keyStore") .doc("KeyStore file for the driver submission server listening on SSL. Can be pre-mounted" + " on the driver container or uploaded from the submitting client.") .stringConf .createOptional - private[spark] val KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE = - ConfigBuilder("spark.ssl.kubernetes.submission.trustStore") + private[spark] val KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE = + ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.trustStore") .doc("TrustStore containing certificates for communicating to the driver submission server" + " over SSL.") .stringConf .createOptional private[spark] val DRIVER_SUBMIT_SSL_ENABLED = - ConfigBuilder("spark.ssl.kubernetes.submission.enabled") + ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.enabled") .doc("Whether or not to use SSL when sending the application dependencies to the driver pod.") .booleanConf .createWithDefault(false) + private[spark] val DRIVER_SUBMIT_SSL_KEY_PEM = + ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.keyPem") + .doc("Key PEM file that the driver submission server will use when setting up TLS" + + " connections. Can be pre-mounted on the driver pod's disk or uploaded from the" + + " submitting client's machine.") + .stringConf + .createOptional + + private[spark] val DRIVER_SUBMIT_SSL_SERVER_CERT_PEM = + ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.serverCertPem") + .doc("Certificate PEM file that is associated with the key PEM file" + + " the submission server uses to set up TLS connections. Can be pre-mounted" + + " on the driver pod's disk or uploaded from the submitting client's machine.") + .stringConf + .createOptional + + private[spark] val DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM = + ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.clientCertPem") + .doc("Certificate pem file that the submission client uses to connect to the submission" + + " server over TLS. This should often be the same as the server certificate, but can be" + + " different if the submission client will contact the driver through a proxy instead of" + + " the driver service directly.") + .stringConf + .createOptional + private[spark] val KUBERNETES_DRIVER_SERVICE_NAME = ConfigBuilder("spark.kubernetes.driver.service.name") .doc("Kubernetes service that exposes the driver pod for external access.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 23d216e799fff..0e5fada302421 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -36,6 +36,8 @@ package object constants { private[spark] val SUBMISSION_SSL_KEYSTORE_SECRET_NAME = "spark-submission-server-keystore" private[spark] val SUBMISSION_SSL_SECRETS_PREFIX = "spark-submission-server-ssl" private[spark] val SUBMISSION_SSL_SECRETS_VOLUME_NAME = "spark-submission-server-ssl-secrets" + private[spark] val SUBMISSION_SSL_KEY_PEM_SECRET_NAME = "spark-submission-server-key-pem" + private[spark] val SUBMISSION_SSL_CERT_PEM_SECRET_NAME = "spark-submission-server-cert-pem" // Default and fixed ports private[spark] val SUBMISSION_SERVER_PORT = 7077 @@ -57,6 +59,8 @@ package object constants { private[spark] val ENV_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE = "SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE" private[spark] val ENV_SUBMISSION_KEYSTORE_TYPE = "SPARK_SUBMISSION_KEYSTORE_TYPE" + private[spark] val ENV_SUBMISSION_KEY_PEM_FILE = "SPARK_SUBMISSION_KEY_PEM_FILE" + private[spark] val ENV_SUBMISSION_CERT_PEM_FILE = "SPARK_SUBMISSION_CERT_PEM_FILE" private[spark] val ENV_SUBMISSION_USE_SSL = "SPARK_SUBMISSION_USE_SSL" private[spark] val ENV_EXECUTOR_PORT = "SPARK_EXECUTOR_PORT" private[spark] val ENV_DRIVER_URL = "SPARK_DRIVER_URL" @@ -74,7 +78,7 @@ package object constants { // Miscellaneous private[spark] val DRIVER_CONTAINER_NAME = "spark-kubernetes-driver" - private[spark] val KUBERNETES_SUBMIT_SSL_NAMESPACE = "kubernetes.submission" + private[spark] val DRIVER_SUBMIT_SSL_NAMESPACE = "kubernetes.driversubmitserver" private[spark] val KUBERNETES_MASTER_INTERNAL_URL = "https://kubernetes.default.svc" private[spark] val MEMORY_OVERHEAD_FACTOR = 0.10 private[spark] val MEMORY_OVERHEAD_MIN = 384L diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala index 4688521a59d38..4ca01b2f6bd38 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala @@ -19,6 +19,7 @@ package org.apache.spark.deploy.rest.kubernetes import java.io.{File, FileOutputStream, StringReader} import java.net.URI import java.nio.file.Paths +import java.security.SecureRandom import java.util.concurrent.CountDownLatch import java.util.concurrent.atomic.AtomicInteger import javax.servlet.http.{HttpServletRequest, HttpServletResponse} @@ -26,10 +27,11 @@ import javax.servlet.http.{HttpServletRequest, HttpServletResponse} import com.google.common.base.Charsets import com.google.common.io.{BaseEncoding, ByteStreams, Files} import org.apache.commons.codec.binary.Base64 +import org.apache.commons.lang3.RandomStringUtils import scala.collection.mutable import scala.collection.mutable.ArrayBuffer -import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SSLOptions} +import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.rest._ @@ -44,7 +46,9 @@ private case class KubernetesSparkRestServerArguments( keyStoreFile: Option[String] = None, keyStorePasswordFile: Option[String] = None, keyStoreType: Option[String] = None, - keyPasswordFile: Option[String] = None) { + keyPasswordFile: Option[String] = None, + keyPemFile: Option[String] = None, + certPemFile: Option[String] = None) { def validate(): KubernetesSparkRestServerArguments = { require(host.isDefined, "Hostname not set via --hostname.") require(port.isDefined, "Port not set via --port") @@ -83,6 +87,12 @@ private object KubernetesSparkRestServerArguments { case "--keystore-key-password-file" :: value :: tail => args = tail resolvedArguments.copy(keyPasswordFile = Some(value)) + case "--key-pem-file" :: value :: tail => + args = tail + resolvedArguments.copy(keyPemFile = Some(value)) + case "--cert-pem-file" :: value :: tail => + args = tail + resolvedArguments.copy(certPemFile = Some(value)) // TODO polish usage message case Nil => resolvedArguments case unknown => throw new IllegalStateException(s"Unknown argument(s) found: $unknown") @@ -377,26 +387,43 @@ private[spark] class KubernetesSparkRestServer( private[spark] object KubernetesSparkRestServer { private val barrier = new CountDownLatch(1) + private val SECURE_RANDOM = new SecureRandom() def main(args: Array[String]): Unit = { val parsedArguments = KubernetesSparkRestServerArguments.fromArgsArray(args) val secretFile = new File(parsedArguments.secretFile.get) - if (!secretFile.isFile) { - throw new IllegalArgumentException(s"Secret file specified by --secret-file" + - " is not a file, or does not exist.") - } + require(secretFile.isFile, "Secret file specified by --secret-file is not a file, or" + + " does not exist.") val sslOptions = if (parsedArguments.useSsl) { - val keyStorePassword = parsedArguments - .keyStorePasswordFile - .map(new File(_)) - .map(Files.toString(_, Charsets.UTF_8)) + validateSslOptions(parsedArguments) val keyPassword = parsedArguments .keyPasswordFile .map(new File(_)) .map(Files.toString(_, Charsets.UTF_8)) + // If key password isn't set but we're using PEM files, generate a password + .orElse(parsedArguments.keyPemFile.map(_ => randomPassword())) + val keyStorePassword = parsedArguments + .keyStorePasswordFile + .map(new File(_)) + .map(Files.toString(_, Charsets.UTF_8)) + // If keystore password isn't set but we're using PEM files, generate a password + .orElse(parsedArguments.keyPemFile.map(_ => randomPassword())) + val resolvedKeyStore = parsedArguments.keyStoreFile.map(new File(_)).orElse( + parsedArguments.keyPemFile.map(keyPemFile => { + parsedArguments.certPemFile.map(certPemFile => { + PemsToKeyStoreConverter.convertPemsToTempKeyStoreFile( + new File(keyPemFile), + new File(certPemFile), + "provided-key", + keyStorePassword, + keyPassword, + parsedArguments.keyStoreType) + }) + }).getOrElse(throw new SparkException("When providing PEM files to set up TLS for the" + + " submission server, both the key and the certificate must be specified."))) new SSLOptions( enabled = true, - keyStore = parsedArguments.keyStoreFile.map(new File(_)), + keyStore = resolvedKeyStore, keyStoreType = parsedArguments.keyStoreType, keyStorePassword = keyStorePassword, keyPassword = keyPassword) @@ -425,5 +452,25 @@ private[spark] object KubernetesSparkRestServer { barrier.await() System.exit(exitCode.get()) } + + private def validateSslOptions(parsedArguments: KubernetesSparkRestServerArguments): Unit = { + parsedArguments.keyStoreFile.foreach { _ => + require(parsedArguments.keyPemFile.orElse(parsedArguments.certPemFile).isEmpty, + "Cannot provide both key/cert PEM files and a keyStore file; select one or the other" + + " for configuring SSL.") + } + parsedArguments.keyPemFile.foreach { _ => + require(parsedArguments.certPemFile.isDefined, + "When providing the key PEM file, the certificate PEM file must also be provided.") + } + parsedArguments.certPemFile.foreach { _ => + require(parsedArguments.keyPemFile.isDefined, + "When providing the certificate PEM file, the key PEM file must also be provided.") + } + } + + private def randomPassword(): String = { + RandomStringUtils.random(1024, 0, Integer.MAX_VALUE, false, false, null, SECURE_RANDOM) + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/PemsToKeyStoreConverter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/PemsToKeyStoreConverter.scala new file mode 100644 index 0000000000000..e5c43560eccb4 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/PemsToKeyStoreConverter.scala @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import java.io.{File, FileInputStream, FileOutputStream, InputStreamReader} +import java.nio.file.Paths +import java.security.{KeyStore, PrivateKey} +import java.security.cert.Certificate +import java.util.UUID + +import com.google.common.base.Charsets +import org.bouncycastle.asn1.pkcs.PrivateKeyInfo +import org.bouncycastle.cert.X509CertificateHolder +import org.bouncycastle.cert.jcajce.JcaX509CertificateConverter +import org.bouncycastle.openssl.{PEMKeyPair, PEMParser} +import org.bouncycastle.openssl.jcajce.JcaPEMKeyConverter +import scala.collection.mutable + +import org.apache.spark.SparkException +import org.apache.spark.util.Utils + +private[spark] object PemsToKeyStoreConverter { + + /** + * Loads the given key-cert pair into a temporary keystore file. Returns the File pointing + * to where the keyStore was written to disk. + */ + def convertPemsToTempKeyStoreFile( + keyPemFile: File, + certPemFile: File, + keyAlias: String, + keyStorePassword: Option[String], + keyPassword: Option[String], + keyStoreType: Option[String]): File = { + require(keyPemFile.isFile, s"Key PEM file provided at ${keyPemFile.getAbsolutePath}" + + " does not exist or is not a file.") + require(certPemFile.isFile, s"Cert PEM file provided at ${certPemFile.getAbsolutePath}" + + " does not exist or is not a file.") + val privateKey = parsePrivateKeyFromPemFile(keyPemFile) + val certificates = parseCertificatesFromPemFile(certPemFile) + val resolvedKeyStoreType = keyStoreType.getOrElse(KeyStore.getDefaultType) + val keyStore = KeyStore.getInstance(resolvedKeyStoreType) + keyStore.load(null, null) + keyStore.setKeyEntry( + keyAlias, + privateKey, + keyPassword.map(_.toCharArray).orNull, + certificates) + val keyStoreOutputPath = Paths.get(s"keystore-${UUID.randomUUID()}.$resolvedKeyStoreType") + Utils.tryWithResource(new FileOutputStream(keyStoreOutputPath.toFile)) { storeStream => + keyStore.store(storeStream, keyStorePassword.map(_.toCharArray).orNull) + } + keyStoreOutputPath.toFile + } + + def convertCertPemToTrustStore( + certPemFile: File, + trustStoreType: Option[String]): KeyStore = { + require(certPemFile.isFile, s"Cert PEM file provided at ${certPemFile.getAbsolutePath}" + + " does not exist or is not a file.") + val trustStore = KeyStore.getInstance(trustStoreType.getOrElse(KeyStore.getDefaultType)) + trustStore.load(null, null) + parseCertificatesFromPemFile(certPemFile).zipWithIndex.foreach { case (cert, index) => + trustStore.setCertificateEntry(s"certificate-$index", cert) + } + trustStore + } + + private def withPemParsedFromFile[T](pemFile: File)(f: (PEMParser => T)): T = { + Utils.tryWithResource(new FileInputStream(pemFile)) { pemStream => + Utils.tryWithResource(new InputStreamReader(pemStream, Charsets.UTF_8)) { pemReader => + Utils.tryWithResource(new PEMParser(pemReader))(f) + } + } + } + + private def parsePrivateKeyFromPemFile(keyPemFile: File): PrivateKey = { + withPemParsedFromFile(keyPemFile) { keyPemParser => + val converter = new JcaPEMKeyConverter + keyPemParser.readObject() match { + case privateKey: PrivateKeyInfo => + converter.getPrivateKey(privateKey) + case keyPair: PEMKeyPair => + converter.getPrivateKey(keyPair.getPrivateKeyInfo) + case _ => + throw new SparkException(s"Key file provided at ${keyPemFile.getAbsolutePath}" + + s" is not a key pair or private key PEM file.") + } + } + } + + private def parseCertificatesFromPemFile(certPemFile: File): Array[Certificate] = { + withPemParsedFromFile(certPemFile) { certPemParser => + val certificates = mutable.Buffer[Certificate]() + var pemObject = certPemParser.readObject() + while (pemObject != null) { + pemObject match { + case certificate: X509CertificateHolder => + val converter = new JcaX509CertificateConverter + certificates += converter.getCertificate(certificate) + case _ => + } + pemObject = certPemParser.readObject() + } + if (certificates.isEmpty) { + throw new SparkException(s"No certificates found in ${certPemFile.getAbsolutePath}") + } + certificates.toArray + } + } +} diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index 3bf6b50ff69c1..1f35e7e5eb209 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -42,6 +42,8 @@ CMD SSL_ARGS="" && \ if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_TYPE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-type $SPARK_SUBMISSION_KEYSTORE_TYPE"; fi && \ if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-password-file $SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE"; fi && \ if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-key-password-file $SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE"; fi && \ + if ! [ -z ${SPARK_SUBMISSION_KEY_PEM_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --key-pem-file $SPARK_SUBMISSION_KEY_PEM_FILE"; fi && \ + if ! [ -z ${SPARK_SUBMISSION_CERT_PEM_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --cert-pem-file $SPARK_SUBMISSION_CERT_PEM_FILE"; fi && \ exec bin/spark-class org.apache.spark.deploy.rest.kubernetes.KubernetesSparkRestServer \ --hostname $HOSTNAME \ --port $SPARK_SUBMISSION_SERVER_PORT \ diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 5c54d0e5e3aab..da78e783cac1b 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -106,10 +106,6 @@ - - org.bouncycastle - bcpkix-jdk15on - diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 16564ca746b40..0e55e64fd1d77 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -72,8 +72,6 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { private val NAMESPACE = UUID.randomUUID().toString.replaceAll("-", "") private var minikubeKubernetesClient: KubernetesClient = _ private var clientConfig: Config = _ - private var keyStoreFile: File = _ - private var trustStoreFile: File = _ private var sparkConf: SparkConf = _ override def beforeAll(): Unit = { @@ -86,13 +84,6 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .done() minikubeKubernetesClient = Minikube.getKubernetesClient.inNamespace(NAMESPACE) clientConfig = minikubeKubernetesClient.getConfiguration - val (keyStore, trustStore) = SSLUtils.generateKeyStoreTrustStorePair( - Minikube.getMinikubeIp, - "changeit", - "changeit", - "changeit") - keyStoreFile = keyStore - trustStoreFile = trustStore } before { @@ -182,9 +173,6 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { } test("Run a simple example") { - // We'll make assertions based on spark rest api, so we need to turn on - // spark.ui.enabled explicitly since the scalatest-maven-plugin would set it - // to false by default. new Client( sparkConf = sparkConf, mainClass = SPARK_PI_MAIN_CLASS, @@ -265,11 +253,30 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { } test("Enable SSL on the driver submit server") { - sparkConf.set(KUBERNETES_DRIVER_SUBMIT_KEYSTORE, s"file://${keyStoreFile.getAbsolutePath}") - sparkConf.set("spark.ssl.kubernetes.submission.keyStorePassword", "changeit") - sparkConf.set("spark.ssl.kubernetes.submission.keyPassword", "changeit") - sparkConf.set(KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE, + val (keyStoreFile, trustStoreFile) = SSLUtils.generateKeyStoreTrustStorePair( + Minikube.getMinikubeIp, + "changeit", + "changeit", + "changeit") + sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE, s"file://${keyStoreFile.getAbsolutePath}") + sparkConf.set("spark.ssl.kubernetes.driversubmitserver.keyStorePassword", "changeit") + sparkConf.set("spark.ssl.kubernetes.driversubmitserver.keyPassword", "changeit") + sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE, s"file://${trustStoreFile.getAbsolutePath}") + sparkConf.set("spark.ssl.kubernetes.driversubmitserver.trustStorePassword", "changeit") + sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) + new Client( + sparkConf = sparkConf, + mainClass = SPARK_PI_MAIN_CLASS, + mainAppResource = SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() + } + + test("Enable SSL on the driver submit server using PEM files") { + val (keyPem, certPem) = SSLUtils.generateKeyCertPemPair(Minikube.getMinikubeIp) + sparkConf.set(DRIVER_SUBMIT_SSL_KEY_PEM, s"file://${keyPem.getAbsolutePath}") + sparkConf.set(DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM, s"file://${certPem.getAbsolutePath}") + sparkConf.set(DRIVER_SUBMIT_SSL_SERVER_CERT_PEM, s"file://${certPem.getAbsolutePath}") sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) new Client( sparkConf = sparkConf, diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil/SSLUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil/SSLUtils.scala index bde7b43226660..2078e0585e8f0 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil/SSLUtils.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil/SSLUtils.scala @@ -16,15 +16,18 @@ */ package org.apache.spark.deploy.kubernetes.integrationtest.sslutil -import java.io.{File, FileOutputStream} +import java.io.{File, FileOutputStream, OutputStreamWriter} import java.math.BigInteger import java.nio.file.Files -import java.security.{KeyPairGenerator, KeyStore, SecureRandom} +import java.security.cert.X509Certificate +import java.security.{KeyPair, KeyPairGenerator, KeyStore, SecureRandom} import java.util.{Calendar, Random} import javax.security.auth.x500.X500Principal +import com.google.common.base.Charsets import org.bouncycastle.asn1.x509.{Extension, GeneralName, GeneralNames} import org.bouncycastle.cert.jcajce.{JcaX509CertificateConverter, JcaX509v3CertificateBuilder} +import org.bouncycastle.openssl.jcajce.JcaPEMWriter import org.bouncycastle.operator.jcajce.JcaContentSignerBuilder import org.apache.spark.util.Utils @@ -39,6 +42,58 @@ private[spark] object SSLUtils { val keyPairGenerator = KeyPairGenerator.getInstance("RSA") keyPairGenerator.initialize(512) val keyPair = keyPairGenerator.generateKeyPair() + val certificate = generateCertificate(ipAddress, keyPair) + val keyStore = KeyStore.getInstance("JKS") + keyStore.load(null, null) + keyStore.setKeyEntry("key", keyPair.getPrivate, + keyPassword.toCharArray, Array(certificate)) + val tempDir = Files.createTempDirectory("temp-ssl-stores").toFile + tempDir.deleteOnExit() + val keyStoreFile = new File(tempDir, "keyStore.jks") + Utils.tryWithResource(new FileOutputStream(keyStoreFile)) { + keyStore.store(_, keyStorePassword.toCharArray) + } + val trustStore = KeyStore.getInstance("JKS") + trustStore.load(null, null) + trustStore.setCertificateEntry("key", certificate) + val trustStoreFile = new File(tempDir, "trustStore.jks") + Utils.tryWithResource(new FileOutputStream(trustStoreFile)) { + trustStore.store(_, trustStorePassword.toCharArray) + } + (keyStoreFile, trustStoreFile) + } + + def generateKeyCertPemPair(ipAddress: String): (File, File) = { + val keyPairGenerator = KeyPairGenerator.getInstance("RSA") + keyPairGenerator.initialize(512) + val keyPair = keyPairGenerator.generateKeyPair() + val certificate = generateCertificate(ipAddress, keyPair) + val tempDir = Files.createTempDirectory("temp-ssl-pems").toFile + tempDir.deleteOnExit() + val keyPemFile = new File(tempDir, "key.pem") + val certPemFile = new File(tempDir, "cert.pem") + Utils.tryWithResource(new FileOutputStream(keyPemFile)) { keyPemStream => + Utils.tryWithResource( + new OutputStreamWriter(keyPemStream, Charsets.UTF_8)) { streamWriter => + Utils.tryWithResource( + new JcaPEMWriter(streamWriter)) { pemWriter => + pemWriter.writeObject(keyPair.getPrivate) + } + } + } + Utils.tryWithResource(new FileOutputStream(certPemFile)) { keyPemStream => + Utils.tryWithResource( + new OutputStreamWriter(keyPemStream, Charsets.UTF_8)) { streamWriter => + Utils.tryWithResource( + new JcaPEMWriter(streamWriter)) { pemWriter => + pemWriter.writeObject(certificate) + } + } + } + (keyPemFile, certPemFile) + } + + private def generateCertificate(ipAddress: String, keyPair: KeyPair): X509Certificate = { val selfPrincipal = new X500Principal(s"cn=$ipAddress") val currentDate = Calendar.getInstance val validForOneHundredYears = Calendar.getInstance @@ -56,25 +111,6 @@ private[spark] object SSLUtils { .setSecureRandom(new SecureRandom()) .build(keyPair.getPrivate) val bcCertificate = certificateBuilder.build(signer) - val jcaCertificate = new JcaX509CertificateConverter().getCertificate(bcCertificate) - val keyStore = KeyStore.getInstance("JKS") - keyStore.load(null, null) - keyStore.setKeyEntry("key", keyPair.getPrivate, - keyPassword.toCharArray, Array(jcaCertificate)) - val tempDir = Files.createTempDirectory("temp-ssl-stores").toFile() - tempDir.deleteOnExit() - val keyStoreFile = new File(tempDir, "keyStore.jks") - Utils.tryWithResource(new FileOutputStream(keyStoreFile)) { - keyStore.store(_, keyStorePassword.toCharArray) - } - val trustStore = KeyStore.getInstance("JKS") - trustStore.load(null, null) - trustStore.setCertificateEntry("key", jcaCertificate) - val trustStoreFile = new File(tempDir, "trustStore.jks") - Utils.tryWithResource(new FileOutputStream(trustStoreFile)) { - trustStore.store(_, trustStorePassword.toCharArray) - } - (keyStoreFile, trustStoreFile) + new JcaX509CertificateConverter().getCertificate(bcCertificate) } - } From 7039934e392620f958e7908bb91cdde3dae7a04f Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Mon, 20 Mar 2017 23:51:50 -0700 Subject: [PATCH 084/156] Update tags on docker images. (#196) --- docs/running-on-kubernetes.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 3b6935560a575..b03396f37f644 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -24,11 +24,11 @@ If you wish to use pre-built docker images, you may use the images published in ComponentImage Spark Driver Image - kubespark/spark-driver:v2.1.0-k8s-support-0.1.0-alpha.1 + kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-rc1 Spark Executor Image - kubespark/spark-executor:v2.1.0-k8s-support-0.1.0-alpha.1 + kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-rc1 @@ -57,8 +57,8 @@ are set up as described above: --kubernetes-namespace default \ --conf spark.executor.instances=5 \ --conf spark.app.name=spark-pi \ - --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-k8s-support-0.1.0-alpha.1 \ - --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-k8s-support-0.1.0-alpha.1 \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-rc1 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-rc1 \ examples/jars/spark_examples_2.11-2.2.0.jar The Spark master, specified either via passing the `--master` command line argument to `spark-submit` or by setting @@ -108,8 +108,8 @@ If our local proxy were listening on port 8001, we would have our submission loo --kubernetes-namespace default \ --conf spark.executor.instances=5 \ --conf spark.app.name=spark-pi \ - --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-k8s-support-0.1.0-alpha.1 \ - --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-k8s-support-0.1.0-alpha.1 \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-rc1 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-rc1 \ examples/jars/spark_examples_2.11-2.2.0.jar Communication between Spark and Kubernetes clusters is performed using the fabric8 kubernetes-client library. From 325424653e2d06553bf547c7a08b076d91527f7b Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Wed, 22 Mar 2017 12:10:46 -0700 Subject: [PATCH 085/156] Add additional instructions to use release tarball (#198) --- docs/running-on-kubernetes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index b03396f37f644..794099638f80c 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -10,7 +10,7 @@ currently limited and not well-tested. This should not be used in production env * You must have a running Kubernetes cluster with access configured to it using [kubectl](https://kubernetes.io/docs/user-guide/prereqs/). If you do not already have a working Kubernetes cluster, you may setup a test cluster on your local machine using [minikube](https://kubernetes.io/docs/getting-started-guides/minikube/). * You must have appropriate permissions to create and list [pods](https://kubernetes.io/docs/user-guide/pods/), [nodes](https://kubernetes.io/docs/admin/node/) and [services](https://kubernetes.io/docs/user-guide/services/) in your cluster. You can verify that you can list these resources by running `kubectl get nodes`, `kubectl get pods` and `kubectl get svc` which should give you a list of nodes, pods and services (if any) respectively. -* You must [build Spark with Kubernetes support](../resource-managers/kubernetes/README.md#building-spark-with-kubernetes-support) from source. +* You must have a spark distribution with Kubernetes support. This may be obtained from the [release tarball](https://github.com/apache-spark-on-k8s/spark/releases) or by [building Spark with Kubernetes support](../resource-managers/kubernetes/README.md#building-spark-with-kubernetes-support). ## Driver & Executor Images From 35a5e32b1600f1d713877bde23f3708c8dfc120e Mon Sep 17 00:00:00 2001 From: Ye Yin Date: Thu, 30 Mar 2017 13:01:29 +0800 Subject: [PATCH 086/156] Support specify CPU cores for driver pod (#207) --- .../scala/org/apache/spark/deploy/kubernetes/Client.scala | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 7e700b569a3fb..e628464aa6201 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -64,6 +64,9 @@ private[spark] class Client( .map(_.split(",")) .getOrElse(Array.empty[String]) + // CPU settings + private val driverCpuCores = sparkConf.getOption("spark.driver.cores").getOrElse("1") + // Memory settings private val driverMemoryMb = sparkConf.get(org.apache.spark.internal.config.DRIVER_MEMORY) private val driverSubmitServerMemoryMb = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_SERVER_MEMORY) @@ -431,6 +434,9 @@ private[spark] class Client( .withPath("/v1/submissions/ping") .withNewPort(SUBMISSION_SERVER_PORT_NAME) .build() + val driverCpuQuantity = new QuantityBuilder(false) + .withAmount(driverCpuCores) + .build() val driverMemoryQuantity = new QuantityBuilder(false) .withAmount(s"${driverContainerMemoryMb}M") .build() @@ -478,6 +484,8 @@ private[spark] class Client( .endEnv() .addToEnv(sslConfiguration.sslPodEnvVars: _*) .withNewResources() + .addToRequests("cpu", driverCpuQuantity) + .addToLimits("cpu", driverCpuQuantity) .addToRequests("memory", driverMemoryQuantity) .addToLimits("memory", driverMemoryLimitQuantity) .endResources() From 0a13206df61a96bc84882fcc34629f599ab88530 Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Wed, 5 Apr 2017 13:08:37 -0700 Subject: [PATCH 087/156] Register executors using pod IPs instead of pod host names (#215) * Register executors using pod IPs * Fix block manager port typo * Fix import * Keep requiredEnv to be a val * Clean up indentation --- .../spark/deploy/kubernetes/Client.scala | 4 ++-- .../spark/deploy/kubernetes/constants.scala | 1 + .../KubernetesClusterSchedulerBackend.scala | 21 +++++++++++++------ .../src/main/docker/executor/Dockerfile | 2 +- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index e628464aa6201..5d115115b4595 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -252,8 +252,8 @@ private[spark] class Client( sparkConf.set("spark.app.id", kubernetesAppId) sparkConf.setIfMissing("spark.app.name", appName) sparkConf.setIfMissing("spark.driver.port", DEFAULT_DRIVER_PORT.toString) - sparkConf.setIfMissing("spark.blockmanager.port", - DEFAULT_BLOCKMANAGER_PORT.toString) + sparkConf.setIfMissing("spark.driver.blockManager.port", DEFAULT_BLOCKMANAGER_PORT.toString) + sparkConf.setIfMissing("spark.blockManager.port", DEFAULT_BLOCKMANAGER_PORT.toString) sparkConf.get(KUBERNETES_SUBMIT_OAUTH_TOKEN).foreach { _ => sparkConf.set(KUBERNETES_SUBMIT_OAUTH_TOKEN, "") } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 0e5fada302421..03b3d21ac9c45 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -68,6 +68,7 @@ package object constants { private[spark] val ENV_EXECUTOR_MEMORY = "SPARK_EXECUTOR_MEMORY" private[spark] val ENV_APPLICATION_ID = "SPARK_APPLICATION_ID" private[spark] val ENV_EXECUTOR_ID = "SPARK_EXECUTOR_ID" + private[spark] val ENV_EXECUTOR_POD_IP = "SPARK_EXECUTOR_POD_IP" private[spark] val ENV_DRIVER_MEMORY = "SPARK_DRIVER_MEMORY" // Annotation keys diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 234829a541c30..7eb1a6214df07 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -18,7 +18,8 @@ package org.apache.spark.scheduler.cluster.kubernetes import java.util.concurrent.atomic.{AtomicInteger, AtomicLong} -import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, Pod, QuantityBuilder} +import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, + EnvVarSourceBuilder, Pod, QuantityBuilder} import scala.collection.JavaConverters._ import scala.concurrent.{ExecutionContext, Future} @@ -177,11 +178,19 @@ private[spark] class KubernetesClusterSchedulerBackend( (ENV_EXECUTOR_CORES, executorCores), (ENV_EXECUTOR_MEMORY, executorMemoryString), (ENV_APPLICATION_ID, applicationId()), - (ENV_EXECUTOR_ID, executorId) - ).map(env => new EnvVarBuilder() - .withName(env._1) - .withValue(env._2) - .build()) + (ENV_EXECUTOR_ID, executorId)) + .map(env => new EnvVarBuilder() + .withName(env._1) + .withValue(env._2) + .build() + ) ++ Seq( + new EnvVarBuilder() + .withName(ENV_EXECUTOR_POD_IP) + .withValueFrom(new EnvVarSourceBuilder() + .withNewFieldRef("v1", "status.podIP") + .build()) + .build() + ) val requiredPorts = Seq( (EXECUTOR_PORT_NAME, executorPort), (BLOCK_MANAGER_PORT_NAME, blockmanagerPort)) diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile index cd5ac466a1fa0..23c6751f1b3ed 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -37,4 +37,4 @@ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark # TODO support spark.executor.extraClassPath -CMD exec ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp ${SPARK_HOME}/jars/\* org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $HOSTNAME +CMD exec ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp ${SPARK_HOME}/jars/\* org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP From 13f16d54cf3c4a2f32d4f3302cb570ad347cb3f0 Mon Sep 17 00:00:00 2001 From: mccheah Date: Mon, 10 Apr 2017 11:27:41 -0700 Subject: [PATCH 088/156] Upgrade bouncycastle, force bcprov version (#223) --- pom.xml | 7 ++++++- resource-managers/kubernetes/core/pom.xml | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 9cfaf6eb65323..3ac2235b7b742 100644 --- a/pom.xml +++ b/pom.xml @@ -137,7 +137,7 @@ 1.8.1 1.6.0 8.18.0 - 1.52 + 1.54 9.2.16.v20160414 3.1.0 0.8.0 @@ -332,6 +332,11 @@ bcpkix-jdk15on ${bouncycastle.version} + + org.bouncycastle + bcprov-jdk15on + ${bouncycastle.version} + diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 6d2f1d0fd2769..649d004f971d5 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -91,6 +91,10 @@ org.bouncycastle bcpkix-jdk15on + + org.bouncycastle + bcprov-jdk15on + From c6a5c6e8c793905efd7642b817654d22c3d50d9c Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Wed, 12 Apr 2017 17:30:11 -0700 Subject: [PATCH 089/156] Stop executors cleanly before deleting their pods (#231) --- .../kubernetes/KubernetesClusterSchedulerBackend.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 7eb1a6214df07..ccb4194336a44 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -131,6 +131,10 @@ private[spark] class KubernetesClusterSchedulerBackend( } override def stop(): Unit = { + // send stop message to executors so they shut down cleanly + super.stop() + + // then delete the executor pods // TODO investigate why Utils.tryLogNonFatalError() doesn't work in this context. // When using Utils.tryLogNonFatalError some of the code fails but without any logs or // indication as to why. @@ -149,7 +153,6 @@ private[spark] class KubernetesClusterSchedulerBackend( } catch { case e: Throwable => logError("Uncaught exception closing Kubernetes client.", e) } - super.stop() } private def allocateNewExecutorPod(): (String, Pod) = { From 0b0fb6f37a28e5ba992b1387e8ce6b2a0afaa98f Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 13 Apr 2017 17:08:30 -0700 Subject: [PATCH 090/156] Upgrade Kubernetes client to 2.2.13. (#230) --- resource-managers/kubernetes/core/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 649d004f971d5..09f0debd50c9c 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -29,7 +29,7 @@ Spark Project Kubernetes kubernetes - 2.2.1 + 2.2.13 From 1388e0a51e959d5f345251ba16f3f4e7d4c09194 Mon Sep 17 00:00:00 2001 From: mccheah Date: Mon, 17 Apr 2017 13:22:34 -0700 Subject: [PATCH 091/156] Respect JVM http proxy settings when using Feign. (#228) * Respect JVM http proxy settings when using Feign. * Address comments * Address more comments` --- .../rest/kubernetes/HttpClientUtil.scala | 64 ++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala index 576f7058f20ee..33988bdc36f04 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala @@ -16,6 +16,9 @@ */ package org.apache.spark.deploy.rest.kubernetes +import java.io.IOException +import java.net.{InetSocketAddress, ProxySelector, SocketAddress, URI} +import java.util.Collections import javax.net.ssl.{SSLContext, SSLSocketFactory, X509TrustManager} import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} @@ -24,12 +27,15 @@ import feign.{Client, Feign, Request, Response} import feign.Request.Options import feign.jackson.{JacksonDecoder, JacksonEncoder} import feign.jaxrs.JAXRSContract +import io.fabric8.kubernetes.client.Config import okhttp3.OkHttpClient import scala.reflect.ClassTag +import org.apache.spark.SparkException +import org.apache.spark.internal.Logging import org.apache.spark.status.api.v1.JacksonMessageWriter -private[spark] object HttpClientUtil { +private[spark] object HttpClientUtil extends Logging { def createClient[T: ClassTag]( uris: Set[String], @@ -42,6 +48,49 @@ private[spark] object HttpClientUtil { Option.apply(trustContext).foreach(context => { httpClientBuilder = httpClientBuilder.sslSocketFactory(sslSocketFactory, context) }) + val uriObjects = uris.map(URI.create) + val httpUris = uriObjects.filter(uri => uri.getScheme == "http") + val httpsUris = uriObjects.filter(uri => uri.getScheme == "https") + val maybeAllProxy = Option.apply(System.getProperty(Config.KUBERNETES_ALL_PROXY)) + val maybeHttpProxy = Option.apply(System.getProperty(Config.KUBERNETES_HTTP_PROXY)) + .orElse(maybeAllProxy) + .map(uriStringToProxy) + val maybeHttpsProxy = Option.apply(System.getProperty(Config.KUBERNETES_HTTPS_PROXY)) + .orElse(maybeAllProxy) + .map(uriStringToProxy) + val maybeNoProxy = Option.apply(System.getProperty(Config.KUBERNETES_NO_PROXY)) + .map(_.split(",")) + .toSeq + .flatten + val proxySelector = new ProxySelector { + override def select(uri: URI): java.util.List[java.net.Proxy] = { + val directProxy = java.net.Proxy.NO_PROXY + val resolvedProxy = maybeNoProxy.find( _ == uri.getHost) + .map( _ => directProxy) + .orElse(uri.getScheme match { + case "http" => + logDebug(s"Looking up http proxies to route $uri") + maybeHttpProxy.filter { _ => + matchingUriExists(uri, httpUris) + } + case "https" => + logDebug(s"Looking up https proxies to route $uri") + maybeHttpsProxy.filter { _ => + matchingUriExists(uri, httpsUris) + } + case _ => None + }).getOrElse(directProxy) + logDebug(s"Routing $uri through ${resolvedProxy.address()} with proxy" + + s" type ${resolvedProxy.`type`()}") + Collections.singletonList(resolvedProxy) + } + + override def connectFailed(uri: URI, sa: SocketAddress, ioe: IOException) = { + throw new SparkException(s"Failed to connect to proxy through uri $uri," + + s" socket address: $sa", ioe) + } + } + httpClientBuilder = httpClientBuilder.proxySelector(proxySelector) val objectMapper = new ObjectMapper() .registerModule(new DefaultScalaModule) .setDateFormat(JacksonMessageWriter.makeISODateFormat) @@ -66,4 +115,17 @@ private[spark] object HttpClientUtil { .retryer(target) .target(target) } + + private def matchingUriExists(uri: URI, httpUris: Set[URI]): Boolean = { + httpUris.exists(httpUri => { + httpUri.getScheme == uri.getScheme && httpUri.getHost == uri.getHost && + httpUri.getPort == uri.getPort + }) + } + + private def uriStringToProxy(uriString: String): java.net.Proxy = { + val uriObject = URI.create(uriString) + new java.net.Proxy(java.net.Proxy.Type.HTTP, + new InetSocketAddress(uriObject.getHost, uriObject.getPort)) + } } From 3f6e5ead760bca82c3af070d4d1535511bc6468a Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 20 Apr 2017 23:15:24 -0700 Subject: [PATCH 092/156] Staging server for receiving application dependencies. (#212) * Staging server for receiving application dependencies. * Add unit test for file writing * Minor fixes * Remove getting credentials from the API We still want to post them because in the future we can use these credentials to monitor the API server and handle cleaning up the data accordingly. * Generalize to resource staging server outside of Spark * Update code documentation * Val instead of var * Fix naming, remove unused import * Move suites from integration test package to core * Use TrieMap instead of locks * Address comments * Fix imports * Change paths, use POST instead of PUT * Use a resource identifier as well as a resource secret --- pom.xml | 21 ++++ resource-managers/kubernetes/core/pom.xml | 21 ++++ .../kubernetes/v2/ResourceStagingServer.scala | 61 ++++++++++++ .../v2/ResourceStagingService.scala | 85 ++++++++++++++++ .../v2/ResourceStagingServiceImpl.scala | 98 ++++++++++++++++++ .../v2/ResourceStagingServiceRetrofit.scala | 42 ++++++++ .../rest/kubernetes/v2/RetrofitUtils.scala | 38 +++++++ .../v2/StagedResourceIdentifier.scala | 19 ++++ .../v2/ResourceStagingServerSuite.scala | 99 +++++++++++++++++++ .../v2/ResourceStagingServiceImplSuite.scala | 60 +++++++++++ 10 files changed, 544 insertions(+) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/StagedResourceIdentifier.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala diff --git a/pom.xml b/pom.xml index 3ac2235b7b742..9533076a211ee 100644 --- a/pom.xml +++ b/pom.xml @@ -137,6 +137,7 @@ 1.8.1 1.6.0 8.18.0 + 2.2.0 1.54 9.2.16.v20160414 3.1.0 @@ -327,6 +328,21 @@ feign-jaxrs ${feign.version} + + com.squareup.retrofit2 + retrofit + ${retrofit.version} + + + com.squareup.retrofit2 + converter-jackson + ${retrofit.version} + + + com.squareup.retrofit2 + converter-scalars + ${retrofit.version} + org.bouncycastle bcpkix-jdk15on @@ -686,6 +702,11 @@ jersey-client ${jersey.version} + + org.glassfish.jersey.media + jersey-media-multipart + ${jersey.version} + javax.ws.rs javax.ws.rs-api diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 09f0debd50c9c..8856339d4f6d9 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -60,10 +60,31 @@ com.netflix.feign feign-okhttp + + org.glassfish.jersey.containers + jersey-container-servlet + + + org.glassfish.jersey.media + jersey-media-multipart + com.netflix.feign feign-jackson + + com.squareup.retrofit2 + retrofit + + + com.squareup.retrofit2 + converter-jackson + + + com.squareup.retrofit2 + converter-scalars + + com.netflix.feign feign-jaxrs diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala new file mode 100644 index 0000000000000..e09a788c45321 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.jaxrs.json.JacksonJaxbJsonProvider +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import org.eclipse.jetty.server.{Server, ServerConnector} +import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder} +import org.eclipse.jetty.util.thread.QueuedThreadPool +import org.glassfish.jersey.media.multipart.MultiPartFeature +import org.glassfish.jersey.server.ResourceConfig +import org.glassfish.jersey.servlet.ServletContainer + +private[spark] class ResourceStagingServer( + port: Int, + serviceInstance: ResourceStagingService) { + + private var jettyServer: Option[Server] = None + + def start(): Unit = synchronized { + val threadPool = new QueuedThreadPool + val contextHandler = new ServletContextHandler() + val jsonProvider = new JacksonJaxbJsonProvider() + jsonProvider.setMapper(new ObjectMapper().registerModule(new DefaultScalaModule)) + val resourceConfig = new ResourceConfig().registerInstances( + serviceInstance, + jsonProvider, + new MultiPartFeature) + val servletHolder = new ServletHolder("main", new ServletContainer(resourceConfig)) + contextHandler.setContextPath("/api/") + contextHandler.addServlet(servletHolder, "/*") + threadPool.setDaemon(true) + val server = new Server(threadPool) + val connector = new ServerConnector(server) + connector.setPort(port) + server.addConnector(connector) + server.setHandler(contextHandler) + server.start() + jettyServer = Some(server) + } + + def stop(): Unit = synchronized { + jettyServer.foreach(_.stop()) + jettyServer = None + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala new file mode 100644 index 0000000000000..5f7ceb461615e --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import java.io.InputStream +import javax.ws.rs.{Consumes, GET, HeaderParam, Path, PathParam, POST, Produces} +import javax.ws.rs.core.{MediaType, StreamingOutput} + +import org.glassfish.jersey.media.multipart.FormDataParam + +import org.apache.spark.deploy.rest.KubernetesCredentials + +/** + * Service that receives application data that can be retrieved later on. This is primarily used + * in the context of Spark, but the concept is generic enough to be used for arbitrary applications. + * The use case is to have a place for Kubernetes application submitters to bootstrap dynamic, + * heavyweight application data for pods. Application submitters may have data stored on their + * local disks that they want to provide to the pods they create through the API server. ConfigMaps + * are one way to provide this data, but the data in ConfigMaps are stored in etcd which cannot + * maintain data in the hundreds of megabytes in size. + *

    + * The general use case is for an application submitter to ship the dependencies to the server via + * {@link uploadResources}; the application submitter will then receive a unique secure token. + * The application submitter then ought to convert the token into a secret, and use this secret in + * a pod that fetches the uploaded dependencies via {@link downloadResources}. An application can + * provide multiple resource bundles simply by hitting the upload endpoint multiple times and + * downloading each bundle with the appropriate secret. + */ +@Path("/v0") +private[spark] trait ResourceStagingService { + + /** + * Register a resource with the dependency service, so that pods with the given labels can + * retrieve them when they run. + * + * @param resources Application resources to upload, compacted together in tar + gzip format. + * The tarball should contain the files laid out in a flat hierarchy, without + * any directories. We take a stream here to avoid holding these entirely in + * memory. + * @param podLabels Labels of pods to monitor. When no more pods are running with the given label, + * after some period of time, these dependencies will be cleared. + * @param podNamespace Namespace of pods to monitor. + * @param kubernetesCredentials These credentials are primarily used to monitor the progress of + * the application. When the application shuts down normally, shuts + * down abnormally and does not restart, or fails to start entirely, + * the data uploaded through this endpoint is cleared. + * @return A unique token that should be provided when retrieving these dependencies later. + */ + @POST + @Consumes(Array(MediaType.MULTIPART_FORM_DATA, MediaType.APPLICATION_JSON, MediaType.TEXT_PLAIN)) + @Produces(Array(MediaType.APPLICATION_JSON)) + @Path("/resources") + def uploadResources( + @FormDataParam("podLabels") podLabels: Map[String, String], + @FormDataParam("podNamespace") podNamespace: String, + @FormDataParam("resources") resources: InputStream, + @FormDataParam("kubernetesCredentials") kubernetesCredentials: KubernetesCredentials) + : StagedResourceIdentifier + + /** + * Download an application's resources. The resources are provided as a stream, where the stream's + * underlying data matches the stream that was uploaded in uploadResources. + */ + @GET + @Consumes(Array(MediaType.APPLICATION_JSON)) + @Produces(Array(MediaType.APPLICATION_OCTET_STREAM)) + @Path("/resources/{resourceId}") + def downloadResources( + @PathParam("resourceId") resourceId: String, + @HeaderParam("Authorization") resourceSecret: String): StreamingOutput +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala new file mode 100644 index 0000000000000..bb338dacdf511 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import java.io.{File, FileOutputStream, InputStream, OutputStream} +import java.security.SecureRandom +import java.util.UUID +import javax.ws.rs.{NotAuthorizedException, NotFoundException} +import javax.ws.rs.core.StreamingOutput + +import com.google.common.io.{BaseEncoding, ByteStreams, Files} +import scala.collection.concurrent.TrieMap + +import org.apache.spark.SparkException +import org.apache.spark.deploy.rest.KubernetesCredentials +import org.apache.spark.internal.Logging +import org.apache.spark.util.Utils + +private[spark] class ResourceStagingServiceImpl(dependenciesRootDir: File) + extends ResourceStagingService with Logging { + + private val SECURE_RANDOM = new SecureRandom() + // TODO clean up these resources based on the driver's lifecycle + private val stagedResources = TrieMap.empty[String, StagedResources] + + override def uploadResources( + podLabels: Map[String, String], + podNamespace: String, + resources: InputStream, + kubernetesCredentials: KubernetesCredentials): StagedResourceIdentifier = { + val resourceId = UUID.randomUUID().toString + val secretBytes = new Array[Byte](1024) + SECURE_RANDOM.nextBytes(secretBytes) + val resourceSecret = resourceId + "-" + BaseEncoding.base64().encode(secretBytes) + + val namespaceDir = new File(dependenciesRootDir, podNamespace) + val resourcesDir = new File(namespaceDir, resourceId) + try { + if (!resourcesDir.exists()) { + if (!resourcesDir.mkdirs()) { + throw new SparkException("Failed to create dependencies directory for application" + + s" at ${resourcesDir.getAbsolutePath}") + } + } + // TODO encrypt the written data with the secret. + val resourcesTgz = new File(resourcesDir, "resources.data") + Utils.tryWithResource(new FileOutputStream(resourcesTgz)) { ByteStreams.copy(resources, _) } + stagedResources(resourceId) = StagedResources( + resourceSecret, + podLabels, + podNamespace, + resourcesTgz, + kubernetesCredentials) + StagedResourceIdentifier(resourceId, resourceSecret) + } catch { + case e: Throwable => + if (!resourcesDir.delete()) { + logWarning(s"Failed to delete application directory $resourcesDir.") + } + throw e + } + } + + override def downloadResources(resourceId: String, resourceSecret: String): StreamingOutput = { + val resource = stagedResources + .get(resourceId) + .getOrElse(throw new NotFoundException(s"No resource bundle found with id $resourceId")) + if (!resource.resourceSecret.equals(resourceSecret)) { + throw new NotAuthorizedException(s"Unauthorized to download resource with id $resourceId") + } + new StreamingOutput { + override def write(outputStream: OutputStream) = { + Files.copy(resource.resourcesFile, outputStream) + } + } + } +} + +private case class StagedResources( + resourceSecret: String, + podLabels: Map[String, String], + podNamespace: String, + resourcesFile: File, + kubernetesCredentials: KubernetesCredentials) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala new file mode 100644 index 0000000000000..daf03f764b35a --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import okhttp3.{RequestBody, ResponseBody} +import retrofit2.Call +import retrofit2.http.{Multipart, Path, Streaming} + +/** + * Retrofit-compatible variant of {@link ResourceStagingService}. For documentation on + * how to use this service, see the aforementioned JAX-RS based interface. + */ +private[spark] trait ResourceStagingServiceRetrofit { + + @Multipart + @retrofit2.http.POST("/api/v0/resources/") + def uploadResources( + @retrofit2.http.Part("podLabels") podLabels: RequestBody, + @retrofit2.http.Part("podNamespace") podNamespace: RequestBody, + @retrofit2.http.Part("resources") resources: RequestBody, + @retrofit2.http.Part("kubernetesCredentials") + kubernetesCredentials: RequestBody): Call[StagedResourceIdentifier] + + @Streaming + @retrofit2.http.GET("/api/v0/resources/{resourceId}") + def downloadResources(@Path("resourceId") resourceId: String, + @retrofit2.http.Header("Authorization") resourceSecret: String): Call[ResponseBody] +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala new file mode 100644 index 0000000000000..c5c5c0d35b7cb --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import retrofit2.Retrofit +import retrofit2.converter.jackson.JacksonConverterFactory +import retrofit2.converter.scalars.ScalarsConverterFactory + +private[spark] object RetrofitUtils { + + private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + + def createRetrofitClient[T](baseUrl: String, serviceType: Class[T]): T = { + new Retrofit.Builder() + .baseUrl(baseUrl) + .addConverterFactory(ScalarsConverterFactory.create()) + .addConverterFactory(JacksonConverterFactory.create(OBJECT_MAPPER)) + .build() + .create(serviceType) + } + +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/StagedResourceIdentifier.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/StagedResourceIdentifier.scala new file mode 100644 index 0000000000000..65bc9bc17dae9 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/StagedResourceIdentifier.scala @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +case class StagedResourceIdentifier(resourceId: String, resourceSecret: String) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala new file mode 100644 index 0000000000000..70ba5be395042 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import java.net.ServerSocket +import javax.ws.rs.core.MediaType + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.google.common.io.ByteStreams +import okhttp3.{RequestBody, ResponseBody} +import org.scalatest.BeforeAndAfterAll +import retrofit2.Call + +import org.apache.spark.SparkFunSuite +import org.apache.spark.deploy.rest.KubernetesCredentials +import org.apache.spark.util.Utils + +/** + * Tests for {@link ResourceStagingServer} and its APIs. Note that this is not an end-to-end + * integration test, and as such does not upload and download files in tar.gz as would be done + * in production. Thus we use the retrofit clients directly despite the fact that in practice + * we would likely want to create an opinionated abstraction on top of the retrofit client; we + * can test this abstraction layer separately, however. This test is mainly for checking that + * we've configured the Jetty server correctly and that the endpoints reached over HTTP can + * receive streamed uploads and can stream downloads. + */ +class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfterAll { + + private val serverPort = new ServerSocket(0).getLocalPort + private val serviceImpl = new ResourceStagingServiceImpl(Utils.createTempDir()) + private val server = new ResourceStagingServer(serverPort, serviceImpl) + private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + + override def beforeAll(): Unit = { + server.start() + } + + override def afterAll(): Unit = { + server.stop() + } + + test("Accept file and jar uploads and downloads") { + val retrofitService = RetrofitUtils.createRetrofitClient(s"http://localhost:$serverPort/", + classOf[ResourceStagingServiceRetrofit]) + val resourcesBytes = Array[Byte](1, 2, 3, 4) + val labels = Map("label1" -> "label1Value", "label2" -> "label2value") + val namespace = "namespace" + val labelsJson = OBJECT_MAPPER.writer().writeValueAsString(labels) + val resourcesRequestBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.MULTIPART_FORM_DATA), resourcesBytes) + val labelsRequestBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), labelsJson) + val namespaceRequestBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.TEXT_PLAIN), namespace) + val kubernetesCredentials = KubernetesCredentials(Some("token"), Some("ca-cert"), None, None) + val kubernetesCredentialsString = OBJECT_MAPPER.writer() + .writeValueAsString(kubernetesCredentials) + val kubernetesCredentialsBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), kubernetesCredentialsString) + val uploadResponse = retrofitService.uploadResources( + labelsRequestBody, namespaceRequestBody, resourcesRequestBody, kubernetesCredentialsBody) + val resourceIdentifier = getTypedResponseResult(uploadResponse) + checkResponseBodyBytesMatches( + retrofitService.downloadResources( + resourceIdentifier.resourceId, resourceIdentifier.resourceSecret), resourcesBytes) + } + + private def getTypedResponseResult[T](call: Call[T]): T = { + val response = call.execute() + assert(response.code() >= 200 && response.code() < 300, Option(response.errorBody()) + .map(_.string()) + .getOrElse("Error executing HTTP request, but error body was not provided.")) + val callResult = response.body() + assert(callResult != null) + callResult + } + + private def checkResponseBodyBytesMatches(call: Call[ResponseBody], bytes: Array[Byte]): Unit = { + val responseBody = getTypedResponseResult(call) + val downloadedBytes = ByteStreams.toByteArray(responseBody.byteStream()) + assert(downloadedBytes.toSeq === bytes) + } + +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala new file mode 100644 index 0000000000000..b92257005d5df --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import java.io.{ByteArrayInputStream, File} +import java.nio.file.Paths + +import com.google.common.io.Files + +import org.apache.spark.SparkFunSuite +import org.apache.spark.deploy.rest.KubernetesCredentials +import org.apache.spark.util.Utils + +/** + * Unit, scala-level tests for KubernetesSparkDependencyServiceImpl. The coverage here + * differs from that of KubernetesSparkDependencyServerSuite as here we invoke the + * implementation methods directly as opposed to over HTTP, as well as check the + * data written to the underlying disk. + */ +class ResourceStagingServiceImplSuite extends SparkFunSuite { + + private val dependencyRootDir = Utils.createTempDir() + private val serviceImpl = new ResourceStagingServiceImpl(dependencyRootDir) + private val resourceBytes = Array[Byte](1, 2, 3, 4) + private val kubernetesCredentials = KubernetesCredentials( + Some("token"), Some("caCert"), Some("key"), Some("cert")) + private val namespace = "namespace" + private val labels = Map("label1" -> "label1value", "label2" -> "label2value") + + test("Uploads should write data to the underlying disk") { + Utils.tryWithResource(new ByteArrayInputStream(resourceBytes)) { resourceStream => + serviceImpl.uploadResources(labels, namespace, resourceStream, kubernetesCredentials) + } + val resourceNamespaceDir = Paths.get(dependencyRootDir.getAbsolutePath, "namespace").toFile + assert(resourceNamespaceDir.isDirectory, s"Resource namespace dir was not created at" + + s" ${resourceNamespaceDir.getAbsolutePath} or is not a directory.") + val resourceDirs = resourceNamespaceDir.listFiles() + assert(resourceDirs.length === 1, s"Resource root directory did not have exactly one" + + s" subdirectory. Got: ${resourceDirs.map(_.getAbsolutePath).mkString(",")}") + val resourceTgz = new File(resourceDirs(0), "resources.data") + assert(resourceTgz.isFile, + s"Resources written to ${resourceTgz.getAbsolutePath} does not exist or is not a file.") + val resourceTgzBytes = Files.toByteArray(resourceTgz) + assert(resourceTgzBytes.toSeq === resourceBytes.toSeq, "Incorrect resource bytes were written.") + } +} From e24c4af93c2cff29fb91bb2641ea70db3a22ffa0 Mon Sep 17 00:00:00 2001 From: mccheah Date: Fri, 21 Apr 2017 00:34:27 -0700 Subject: [PATCH 093/156] Reorganize packages between v1 work and v2 work (#220) * Staging server for receiving application dependencies. * Move packages around to split between v1 work and v2 work * Add unit test for file writing * Remove unnecessary main * Add back license header * Minor fixes * Fix integration test with renamed package for client. Fix scalastyle. * Force json serialization to consider the different package. * Revert extraneous log * Fix scalastyle * Remove getting credentials from the API We still want to post them because in the future we can use these credentials to monitor the API server and handle cleaning up the data accordingly. * Generalize to resource staging server outside of Spark * Update code documentation * Val instead of var * Fix build * Fix naming, remove unused import * Move suites from integration test package to core * Use TrieMap instead of locks * Address comments * Fix imports * Change paths, use POST instead of PUT * Use a resource identifier as well as a resource secret --- .../scala/org/apache/spark/deploy/SparkSubmit.scala | 2 +- dev/.rat-excludes | 2 +- ....deploy.kubernetes.submit.v1.DriverServiceManager | 2 ++ ...spark.deploy.rest.kubernetes.DriverServiceManager | 2 -- .../org/apache/spark/deploy/kubernetes/config.scala | 2 +- .../deploy/kubernetes/{ => submit/v1}/Client.scala | 5 ++--- .../submit/v1}/CompressionUtils.scala | 4 ++-- .../v1}/DriverPodKubernetesCredentialsProvider.scala | 4 ++-- .../submit/v1}/DriverServiceManager.scala | 3 +-- .../v1}/DriverSubmitSslConfigurationProvider.scala | 4 ++-- .../ExternalSuppliedUrisDriverServiceManager.scala | 2 +- .../{ => submit/v1}/KubernetesResourceCleaner.scala | 2 +- .../{ => submit/v1}/LoggingPodStatusWatcher.scala | 5 ++--- .../v1}/NodePortUrisDriverServiceManager.scala | 2 +- .../rest/kubernetes/{ => v1}/HttpClientUtil.scala | 2 +- .../kubernetes/{ => v1}/KubernetesFileUtils.scala | 2 +- .../v1}/KubernetesRestProtocolMessages.scala | 12 ++++++++++-- .../kubernetes/{ => v1}/KubernetesSparkRestApi.scala | 4 ++-- .../{ => v1}/KubernetesSparkRestServer.scala | 3 ++- .../kubernetes/{ => v1}/MultiServerFeignTarget.scala | 2 +- .../{ => v1}/PemsToKeyStoreConverter.scala | 2 +- .../rest/kubernetes/v2/ResourceStagingService.scala | 2 +- .../kubernetes/v2/ResourceStagingServiceImpl.scala | 2 +- .../kubernetes/KubernetesClientBuilder.scala | 2 +- .../KubernetesClusterSchedulerBackend.scala | 1 - .../kubernetes/v2/ResourceStagingServerSuite.scala | 2 +- .../v2/ResourceStagingServiceImplSuite.scala | 2 +- .../src/main/docker/driver/Dockerfile | 2 +- .../kubernetes/integrationtest/KubernetesSuite.scala | 3 +-- .../integrationtest/minikube/Minikube.scala | 2 +- 30 files changed, 45 insertions(+), 41 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.kubernetes.submit.v1.DriverServiceManager delete mode 100644 resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.rest.kubernetes.DriverServiceManager rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/{ => submit/v1}/Client.scala (99%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/{rest/kubernetes => kubernetes/submit/v1}/CompressionUtils.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/{ => submit/v1}/DriverPodKubernetesCredentialsProvider.scala (96%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/{rest/kubernetes => kubernetes/submit/v1}/DriverServiceManager.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/{ => submit/v1}/DriverSubmitSslConfigurationProvider.scala (99%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/{rest/kubernetes => kubernetes/submit/v1}/ExternalSuppliedUrisDriverServiceManager.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/{ => submit/v1}/KubernetesResourceCleaner.scala (97%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/{ => submit/v1}/LoggingPodStatusWatcher.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/{rest/kubernetes => kubernetes/submit/v1}/NodePortUrisDriverServiceManager.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{ => v1}/HttpClientUtil.scala (99%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{ => v1}/KubernetesFileUtils.scala (96%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/{ => kubernetes/v1}/KubernetesRestProtocolMessages.scala (81%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{ => v1}/KubernetesSparkRestApi.scala (89%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{ => v1}/KubernetesSparkRestServer.scala (99%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{ => v1}/MultiServerFeignTarget.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{ => v1}/PemsToKeyStoreConverter.scala (99%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/{deploy => scheduler/cluster}/kubernetes/KubernetesClientBuilder.scala (98%) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 002b29d5564e1..aeccd0088d76c 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -619,7 +619,7 @@ object SparkSubmit { } if (isKubernetesCluster) { - childMainClass = "org.apache.spark.deploy.kubernetes.Client" + childMainClass = "org.apache.spark.deploy.kubernetes.submit.v1.Client" childArgs += args.primaryResource childArgs += args.mainClass childArgs ++= args.childArgs diff --git a/dev/.rat-excludes b/dev/.rat-excludes index f69567d8f6752..6a805b3293a6f 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -103,4 +103,4 @@ org.apache.spark.scheduler.ExternalClusterManager org.apache.spark.deploy.yarn.security.ServiceCredentialProvider spark-warehouse structured-streaming/* -org.apache.spark.deploy.rest.kubernetes.DriverServiceManager +org.apache.spark.deploy.kubernetes.submit.v1.DriverServiceManager diff --git a/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.kubernetes.submit.v1.DriverServiceManager b/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.kubernetes.submit.v1.DriverServiceManager new file mode 100644 index 0000000000000..2ed0387c51bc6 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.kubernetes.submit.v1.DriverServiceManager @@ -0,0 +1,2 @@ +org.apache.spark.deploy.kubernetes.submit.v1.ExternalSuppliedUrisDriverServiceManager +org.apache.spark.deploy.kubernetes.submit.v1.NodePortUrisDriverServiceManager diff --git a/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.rest.kubernetes.DriverServiceManager b/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.rest.kubernetes.DriverServiceManager deleted file mode 100644 index 56203ee38ac99..0000000000000 --- a/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.rest.kubernetes.DriverServiceManager +++ /dev/null @@ -1,2 +0,0 @@ -org.apache.spark.deploy.rest.kubernetes.ExternalSuppliedUrisDriverServiceManager -org.apache.spark.deploy.rest.kubernetes.NodePortUrisDriverServiceManager diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 3328809e186e4..e403a6e8b927f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -19,7 +19,7 @@ package org.apache.spark.deploy.kubernetes import java.util.concurrent.TimeUnit import org.apache.spark.{SPARK_VERSION => sparkVersion} -import org.apache.spark.deploy.rest.kubernetes.NodePortUrisDriverServiceManager +import org.apache.spark.deploy.kubernetes.submit.v1.NodePortUrisDriverServiceManager import org.apache.spark.internal.config.ConfigBuilder import org.apache.spark.network.util.ByteUnit diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala similarity index 99% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala index 5d115115b4595..72d24f7bf8342 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes +package org.apache.spark.deploy.kubernetes.submit.v1 import java.io.File import java.security.SecureRandom @@ -32,8 +32,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.{AppResource, ContainerAppResource, KubernetesCreateSubmissionRequest, KubernetesCredentials, RemoteAppResource, UploadedAppResource} -import org.apache.spark.deploy.rest.kubernetes._ +import org.apache.spark.deploy.rest.kubernetes.v1.{AppResource, ContainerAppResource, HttpClientUtil, KubernetesCreateSubmissionRequest, KubernetesCredentials, KubernetesFileUtils, KubernetesSparkRestApi, RemoteAppResource, UploadedAppResource} import org.apache.spark.internal.Logging import org.apache.spark.util.{ShutdownHookManager, Utils} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/CompressionUtils.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/CompressionUtils.scala index 7204cb874aaec..8296218ba1f70 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/CompressionUtils.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes +package org.apache.spark.deploy.kubernetes.submit.v1 import java.io.{ByteArrayInputStream, File, FileInputStream, FileOutputStream} import java.util.zip.{GZIPInputStream, GZIPOutputStream} @@ -26,7 +26,7 @@ import org.apache.commons.compress.utils.CharsetNames import org.apache.commons.io.IOUtils import scala.collection.mutable -import org.apache.spark.deploy.rest.TarGzippedData +import org.apache.spark.deploy.rest.kubernetes.v1.TarGzippedData import org.apache.spark.internal.Logging import org.apache.spark.util.{ByteBufferOutputStream, Utils} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverPodKubernetesCredentialsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverPodKubernetesCredentialsProvider.scala similarity index 96% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverPodKubernetesCredentialsProvider.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverPodKubernetesCredentialsProvider.scala index cee47aad79393..bc7490ef9ec4a 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverPodKubernetesCredentialsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverPodKubernetesCredentialsProvider.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes +package org.apache.spark.deploy.kubernetes.submit.v1 import java.io.File @@ -22,7 +22,7 @@ import com.google.common.io.{BaseEncoding, Files} import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.rest.KubernetesCredentials +import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials import org.apache.spark.internal.config.OptionalConfigEntry private[spark] class DriverPodKubernetesCredentialsProvider(sparkConf: SparkConf) { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/DriverServiceManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverServiceManager.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/DriverServiceManager.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverServiceManager.scala index d92c0247e2a35..c7d394fcf00ad 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/DriverServiceManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverServiceManager.scala @@ -14,8 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -package org.apache.spark.deploy.rest.kubernetes +package org.apache.spark.deploy.kubernetes.submit.v1 import io.fabric8.kubernetes.api.model.{Service, ServiceBuilder} import io.fabric8.kubernetes.client.KubernetesClient diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverSubmitSslConfigurationProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala similarity index 99% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverSubmitSslConfigurationProvider.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala index a83c9a9896a08..10ffddcd7e7fc 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverSubmitSslConfigurationProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes +package org.apache.spark.deploy.kubernetes.submit.v1 import java.io.{File, FileInputStream} import java.security.{KeyStore, SecureRandom} @@ -29,7 +29,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.kubernetes.{KubernetesFileUtils, PemsToKeyStoreConverter} +import org.apache.spark.deploy.rest.kubernetes.v1.{KubernetesFileUtils, PemsToKeyStoreConverter} import org.apache.spark.util.Utils /** diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ExternalSuppliedUrisDriverServiceManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/ExternalSuppliedUrisDriverServiceManager.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ExternalSuppliedUrisDriverServiceManager.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/ExternalSuppliedUrisDriverServiceManager.scala index 257571b5a9d3e..4c784aeb5692f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ExternalSuppliedUrisDriverServiceManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/ExternalSuppliedUrisDriverServiceManager.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes +package org.apache.spark.deploy.kubernetes.submit.v1 import java.util.concurrent.TimeUnit diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesResourceCleaner.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/KubernetesResourceCleaner.scala similarity index 97% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesResourceCleaner.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/KubernetesResourceCleaner.scala index 6329bb1359516..266ec652ed8ae 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesResourceCleaner.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/KubernetesResourceCleaner.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes +package org.apache.spark.deploy.kubernetes.submit.v1 import io.fabric8.kubernetes.api.model.HasMetadata import io.fabric8.kubernetes.client.KubernetesClient diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala index 17c3db8331ac4..7be334194d9d7 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala @@ -14,15 +14,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes +package org.apache.spark.deploy.kubernetes.submit.v1 import java.util.concurrent.{CountDownLatch, Executors, TimeUnit} -import scala.collection.JavaConverters._ - import io.fabric8.kubernetes.api.model.Pod import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action +import scala.collection.JavaConverters._ import org.apache.spark.internal.Logging diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/NodePortUrisDriverServiceManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/NodePortUrisDriverServiceManager.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/NodePortUrisDriverServiceManager.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/NodePortUrisDriverServiceManager.scala index 1416476824793..965d71917403e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/NodePortUrisDriverServiceManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/NodePortUrisDriverServiceManager.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes +package org.apache.spark.deploy.kubernetes.submit.v1 import io.fabric8.kubernetes.api.model.{Service, ServiceBuilder} import scala.collection.JavaConverters._ diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/HttpClientUtil.scala similarity index 99% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/HttpClientUtil.scala index 33988bdc36f04..ea1abed72c07f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/HttpClientUtil.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes +package org.apache.spark.deploy.rest.kubernetes.v1 import java.io.IOException import java.net.{InetSocketAddress, ProxySelector, SocketAddress, URI} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesFileUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesFileUtils.scala similarity index 96% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesFileUtils.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesFileUtils.scala index f30be1535f81c..b8e644219097e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesFileUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesFileUtils.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes +package org.apache.spark.deploy.rest.kubernetes.v1 import org.apache.spark.util.Utils diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala similarity index 81% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala index 1ea44109c5f5e..cd1f9dcdf5879 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala @@ -14,11 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest +package org.apache.spark.deploy.rest.kubernetes.v1 -import com.fasterxml.jackson.annotation.{JsonSubTypes, JsonTypeInfo} +import com.fasterxml.jackson.annotation.{JsonIgnore, JsonSubTypes, JsonTypeInfo} import org.apache.spark.SPARK_VERSION +import org.apache.spark.deploy.rest.{SubmitRestProtocolRequest, SubmitRestProtocolResponse} +import org.apache.spark.util.Utils case class KubernetesCredentials( oauthToken: Option[String], @@ -35,6 +37,9 @@ case class KubernetesCreateSubmissionRequest( driverPodKubernetesCredentials: KubernetesCredentials, uploadedJarsBase64Contents: TarGzippedData, uploadedFilesBase64Contents: TarGzippedData) extends SubmitRestProtocolRequest { + @JsonIgnore + override val messageType: String = s"kubernetes.v1.${Utils.getFormattedClassName(this)}" + override val action = messageType message = "create" clientSparkVersion = SPARK_VERSION } @@ -68,5 +73,8 @@ class PingResponse extends SubmitRestProtocolResponse { val text = "pong" message = "pong" serverSparkVersion = SPARK_VERSION + @JsonIgnore + override val messageType: String = s"kubernetes.v1.${Utils.getFormattedClassName(this)}" + override val action: String = messageType } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestApi.scala similarity index 89% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestApi.scala index 18eb9b7a12ca6..270e7ea0e77bf 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestApi.scala @@ -14,12 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes +package org.apache.spark.deploy.rest.kubernetes.v1 import javax.ws.rs.{Consumes, GET, Path, POST, Produces} import javax.ws.rs.core.MediaType -import org.apache.spark.deploy.rest.{CreateSubmissionResponse, KubernetesCreateSubmissionRequest, PingResponse} +import org.apache.spark.deploy.rest.CreateSubmissionResponse @Path("/v1/submissions/") trait KubernetesSparkRestApi { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala similarity index 99% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala index 4ca01b2f6bd38..048427fa4ec23 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes +package org.apache.spark.deploy.rest.kubernetes.v1 import java.io.{File, FileOutputStream, StringReader} import java.net.URI @@ -34,6 +34,7 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.submit.v1.CompressionUtils import org.apache.spark.deploy.rest._ import org.apache.spark.internal.config.OptionalConfigEntry import org.apache.spark.util.{ShutdownHookManager, ThreadUtils, Utils} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/MultiServerFeignTarget.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/MultiServerFeignTarget.scala index 51313e00ce2da..56ff82ea2fc33 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/MultiServerFeignTarget.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes +package org.apache.spark.deploy.rest.kubernetes.v1 import feign.{Request, RequestTemplate, RetryableException, Retryer, Target} import scala.reflect.ClassTag diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/PemsToKeyStoreConverter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala similarity index 99% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/PemsToKeyStoreConverter.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala index e5c43560eccb4..da863a9fb48e2 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/PemsToKeyStoreConverter.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes +package org.apache.spark.deploy.rest.kubernetes.v1 import java.io.{File, FileInputStream, FileOutputStream, InputStreamReader} import java.nio.file.Paths diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala index 5f7ceb461615e..95cc6ab949d5c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala @@ -22,7 +22,7 @@ import javax.ws.rs.core.{MediaType, StreamingOutput} import org.glassfish.jersey.media.multipart.FormDataParam -import org.apache.spark.deploy.rest.KubernetesCredentials +import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials /** * Service that receives application data that can be retrieved later on. This is primarily used diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala index bb338dacdf511..732969cd67d89 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala @@ -26,7 +26,7 @@ import com.google.common.io.{BaseEncoding, ByteStreams, Files} import scala.collection.concurrent.TrieMap import org.apache.spark.SparkException -import org.apache.spark.deploy.rest.KubernetesCredentials +import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials import org.apache.spark.internal.Logging import org.apache.spark.util.Utils diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala index 554ed17ff25c4..6725992aae978 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes +package org.apache.spark.scheduler.cluster.kubernetes import java.io.File diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index ccb4194336a44..130b143c7e92b 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -24,7 +24,6 @@ import scala.collection.JavaConverters._ import scala.concurrent.{ExecutionContext, Future} import org.apache.spark.{SparkContext, SparkException} -import org.apache.spark.deploy.kubernetes.KubernetesClientBuilder import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.rpc.RpcEndpointAddress diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala index 70ba5be395042..babc0994d25dc 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala @@ -27,7 +27,7 @@ import org.scalatest.BeforeAndAfterAll import retrofit2.Call import org.apache.spark.SparkFunSuite -import org.apache.spark.deploy.rest.KubernetesCredentials +import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials import org.apache.spark.util.Utils /** diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala index b92257005d5df..60850bb877540 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala @@ -22,7 +22,7 @@ import java.nio.file.Paths import com.google.common.io.Files import org.apache.spark.SparkFunSuite -import org.apache.spark.deploy.rest.KubernetesCredentials +import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials import org.apache.spark.util.Utils /** diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index 1f35e7e5eb209..8ab7a58704505 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -44,7 +44,7 @@ CMD SSL_ARGS="" && \ if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-key-password-file $SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE"; fi && \ if ! [ -z ${SPARK_SUBMISSION_KEY_PEM_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --key-pem-file $SPARK_SUBMISSION_KEY_PEM_FILE"; fi && \ if ! [ -z ${SPARK_SUBMISSION_CERT_PEM_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --cert-pem-file $SPARK_SUBMISSION_CERT_PEM_FILE"; fi && \ - exec bin/spark-class org.apache.spark.deploy.rest.kubernetes.KubernetesSparkRestServer \ + exec bin/spark-class org.apache.spark.deploy.rest.kubernetes.v1.KubernetesSparkRestServer \ --hostname $HOSTNAME \ --port $SPARK_SUBMISSION_SERVER_PORT \ --secret-file $SPARK_SUBMISSION_SECRET_LOCATION \ diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 0e55e64fd1d77..8deb790f4b7a0 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -35,14 +35,13 @@ import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException, SparkFunSuite} import org.apache.spark.deploy.SparkSubmit -import org.apache.spark.deploy.kubernetes.Client import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.kubernetes.integrationtest.docker.SparkDockerImageBuilder import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 import org.apache.spark.deploy.kubernetes.integrationtest.sslutil.SSLUtils -import org.apache.spark.deploy.rest.kubernetes.ExternalSuppliedUrisDriverServiceManager +import org.apache.spark.deploy.kubernetes.submit.v1.{Client, ExternalSuppliedUrisDriverServiceManager} import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} import org.apache.spark.util.Utils diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala index 07274bf962dde..81491be944d3e 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala @@ -27,7 +27,7 @@ import io.fabric8.kubernetes.client.internal.SSLUtils import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag -import org.apache.spark.deploy.rest.kubernetes.HttpClientUtil +import org.apache.spark.deploy.rest.kubernetes.v1.HttpClientUtil import org.apache.spark.internal.Logging import org.apache.spark.util.Utils From 4940eae3f78c3a7f6eebc55a24e00b066dff22bc Mon Sep 17 00:00:00 2001 From: mccheah Date: Fri, 21 Apr 2017 02:20:26 -0700 Subject: [PATCH 094/156] Support SSL on the file staging server (#221) * Staging server for receiving application dependencies. * Move packages around to split between v1 work and v2 work * Add unit test for file writing * Remove unnecessary main * Allow the file staging server to be secured with TLS. * Add back license header * Minor fixes * Fix integration test with renamed package for client. Fix scalastyle. * Remove unused import * Force json serialization to consider the different package. * Revert extraneous log * Fix scalastyle * Remove getting credentials from the API We still want to post them because in the future we can use these credentials to monitor the API server and handle cleaning up the data accordingly. * Fix build * Randomize name and namespace in test to prevent collisions * Generalize to resource staging server outside of Spark * Update code documentation * Val instead of var * Fix unit tests. * Fix build * Fix naming, remove unused import * Move suites from integration test package to core * Fix unit test * Use TrieMap instead of locks * Address comments * Fix imports * Address comments * Change main object name * Change config variable names * Change paths, use POST instead of PUT * Use a resource identifier as well as a resource secret --- .../spark/deploy/kubernetes/config.scala | 33 +++++ .../v1/PemsToKeyStoreConverter.scala | 7 +- .../kubernetes/v2/ResourceStagingServer.scala | 76 +++++++++- ...ourceStagingServerSslOptionsProvider.scala | 133 ++++++++++++++++++ .../rest/kubernetes/v2/RetrofitUtils.scala | 31 +++- .../spark/deploy/kubernetes}/SSLUtils.scala | 2 +- ...StagingServerSslOptionsProviderSuite.scala | 116 +++++++++++++++ .../v2/ResourceStagingServerSuite.scala | 57 ++++++-- .../kubernetes/integration-tests/pom.xml | 7 + .../integrationtest/KubernetesSuite.scala | 2 +- 10 files changed, 442 insertions(+), 22 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala rename resource-managers/kubernetes/{integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil => core/src/test/scala/org/apache/spark/deploy/kubernetes}/SSLUtils.scala (98%) create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index e403a6e8b927f..15f7a17857f1f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -288,4 +288,37 @@ package object config { .doc("Interval between reports of the current app status in cluster mode.") .timeConf(TimeUnit.MILLISECONDS) .createWithDefaultString("1s") + + // Spark dependency server for submission v2 + + private[spark] val RESOURCE_STAGING_SERVER_PORT = + ConfigBuilder("spark.kubernetes.resourceStagingServer.port") + .doc("Port for the Kubernetes resource staging server to listen on.") + .intConf + .createWithDefault(10000) + + private[spark] val RESOURCE_STAGING_SERVER_KEY_PEM = + ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.keyPem") + .doc("Key PEM file to use when having the Kubernetes dependency server listen on TLS.") + .stringConf + .createOptional + + private[spark] val RESOURCE_STAGING_SERVER_CERT_PEM = + ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.serverCertPem") + .doc("Certificate PEM file to use when having the Kubernetes dependency server" + + " listen on TLS.") + .stringConf + .createOptional + + private[spark] val RESOURCE_STAGING_SERVER_KEYSTORE_PASSWORD_FILE = + ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.keyStorePasswordFile") + .doc("File containing the keystore password for the Kubernetes dependency server.") + .stringConf + .createOptional + + private[spark] val RESOURCE_STAGING_SERVER_KEYSTORE_KEY_PASSWORD_FILE = + ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.keyPasswordFile") + .doc("File containing the key password for the Kubernetes dependency server.") + .stringConf + .createOptional } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala index da863a9fb48e2..2c68b150baf91 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala @@ -60,11 +60,12 @@ private[spark] object PemsToKeyStoreConverter { privateKey, keyPassword.map(_.toCharArray).orNull, certificates) - val keyStoreOutputPath = Paths.get(s"keystore-${UUID.randomUUID()}.$resolvedKeyStoreType") - Utils.tryWithResource(new FileOutputStream(keyStoreOutputPath.toFile)) { storeStream => + val keyStoreDir = Utils.createTempDir("temp-keystores") + val keyStoreFile = new File(keyStoreDir, s"keystore-${UUID.randomUUID()}.$resolvedKeyStoreType") + Utils.tryWithResource(new FileOutputStream(keyStoreFile)) { storeStream => keyStore.store(storeStream, keyStorePassword.map(_.toCharArray).orNull) } - keyStoreOutputPath.toFile + keyStoreFile } def convertCertPemToTrustStore( diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala index e09a788c45321..8ca13da545d5d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala @@ -16,19 +16,32 @@ */ package org.apache.spark.deploy.rest.kubernetes.v2 +import java.io.{File, FileInputStream} +import java.util.Properties + import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.jaxrs.json.JacksonJaxbJsonProvider import com.fasterxml.jackson.module.scala.DefaultScalaModule -import org.eclipse.jetty.server.{Server, ServerConnector} +import com.google.common.collect.Maps +import org.eclipse.jetty.http.HttpVersion +import org.eclipse.jetty.server.{HttpConfiguration, HttpConnectionFactory, Server, ServerConnector, SslConnectionFactory} import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder} -import org.eclipse.jetty.util.thread.QueuedThreadPool +import org.eclipse.jetty.util.thread.{QueuedThreadPool, ScheduledExecutorScheduler} import org.glassfish.jersey.media.multipart.MultiPartFeature import org.glassfish.jersey.server.ResourceConfig import org.glassfish.jersey.servlet.ServletContainer +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.internal.Logging +import org.apache.spark.internal.config.{ConfigReader, SparkConfigProvider} +import org.apache.spark.util.Utils private[spark] class ResourceStagingServer( port: Int, - serviceInstance: ResourceStagingService) { + serviceInstance: ResourceStagingService, + sslOptionsProvider: ResourceStagingServerSslOptionsProvider) extends Logging { private var jettyServer: Option[Server] = None @@ -45,17 +58,72 @@ private[spark] class ResourceStagingServer( contextHandler.setContextPath("/api/") contextHandler.addServlet(servletHolder, "/*") threadPool.setDaemon(true) + val resolvedConnectionFactories = sslOptionsProvider.getSslOptions + .createJettySslContextFactory() + .map(sslFactory => { + val sslConnectionFactory = new SslConnectionFactory( + sslFactory, HttpVersion.HTTP_1_1.asString()) + val rawHttpConfiguration = new HttpConfiguration() + rawHttpConfiguration.setSecureScheme("https") + rawHttpConfiguration.setSecurePort(port) + val rawHttpConnectionFactory = new HttpConnectionFactory(rawHttpConfiguration) + Array(sslConnectionFactory, rawHttpConnectionFactory) + }).getOrElse(Array(new HttpConnectionFactory())) val server = new Server(threadPool) - val connector = new ServerConnector(server) + val connector = new ServerConnector( + server, + null, + // Call this full constructor to set this, which forces daemon threads: + new ScheduledExecutorScheduler("DependencyServer-Executor", true), + null, + -1, + -1, + resolvedConnectionFactories: _*) connector.setPort(port) server.addConnector(connector) server.setHandler(contextHandler) server.start() jettyServer = Some(server) + logInfo(s"Resource staging server started on port $port.") } + def join(): Unit = jettyServer.foreach(_.join()) + def stop(): Unit = synchronized { jettyServer.foreach(_.stop()) jettyServer = None } } + +object ResourceStagingServer { + def main(args: Array[String]): Unit = { + val sparkConf = new SparkConf(true) + if (args.nonEmpty) { + val propertiesFile = new File(args(0)) + if (!propertiesFile.isFile) { + throw new IllegalArgumentException(s"Server properties file given at" + + s" ${propertiesFile.getAbsoluteFile} does not exist or is not a file.") + } + val properties = new Properties + Utils.tryWithResource(new FileInputStream(propertiesFile))(properties.load) + val propertiesMap = Maps.fromProperties(properties) + val configReader = new ConfigReader(new SparkConfigProvider(propertiesMap)) + propertiesMap.asScala.keys.foreach { key => + configReader.get(key).foreach(sparkConf.set(key, _)) + } + } + val dependenciesRootDir = Utils.createTempDir(namePrefix = "local-application-dependencies") + val serviceInstance = new ResourceStagingServiceImpl(dependenciesRootDir) + val sslOptionsProvider = new ResourceStagingServerSslOptionsProviderImpl(sparkConf) + val server = new ResourceStagingServer( + port = sparkConf.get(RESOURCE_STAGING_SERVER_PORT), + serviceInstance = serviceInstance, + sslOptionsProvider = sslOptionsProvider) + server.start() + try { + server.join() + } finally { + server.stop() + } + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala new file mode 100644 index 0000000000000..2744ed0a74616 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import java.io.File + +import com.google.common.base.Charsets +import com.google.common.io.Files + +import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException, SSLOptions} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.rest.kubernetes.v1.PemsToKeyStoreConverter +import org.apache.spark.internal.Logging + +private[spark] trait ResourceStagingServerSslOptionsProvider { + def getSslOptions: SSLOptions +} + +private[spark] class ResourceStagingServerSslOptionsProviderImpl(sparkConf: SparkConf) + extends ResourceStagingServerSslOptionsProvider with Logging { + def getSslOptions: SSLOptions = { + val baseSslOptions = new SparkSecurityManager(sparkConf) + .getSSLOptions("kubernetes.resourceStagingServer") + val maybeKeyPem = sparkConf.get(RESOURCE_STAGING_SERVER_KEY_PEM) + val maybeCertPem = sparkConf.get(RESOURCE_STAGING_SERVER_CERT_PEM) + val maybeKeyStorePasswordFile = sparkConf.get(RESOURCE_STAGING_SERVER_KEYSTORE_PASSWORD_FILE) + val maybeKeyPasswordFile = sparkConf.get(RESOURCE_STAGING_SERVER_KEYSTORE_KEY_PASSWORD_FILE) + + logSslConfigurations( + baseSslOptions, maybeKeyPem, maybeCertPem, maybeKeyStorePasswordFile, maybeKeyPasswordFile) + + requireNandDefined(baseSslOptions.keyStore, maybeKeyPem, + "Shouldn't provide both key PEM and keyStore files for TLS.") + requireNandDefined(baseSslOptions.keyStore, maybeCertPem, + "Shouldn't provide both certificate PEM and keyStore files for TLS.") + requireNandDefined(baseSslOptions.keyStorePassword, maybeKeyStorePasswordFile, + "Shouldn't provide both the keyStore password value and the keyStore password file.") + requireNandDefined(baseSslOptions.keyPassword, maybeKeyPasswordFile, + "Shouldn't provide both the keyStore key password value and the keyStore key password file.") + requireBothOrNeitherDefined( + maybeKeyPem, + maybeCertPem, + "When providing a certificate PEM file, the key PEM file must also be provided.", + "When providing a key PEM file, the certificate PEM file must also be provided.") + + val resolvedKeyStorePassword = baseSslOptions.keyStorePassword + .orElse(maybeKeyStorePasswordFile.map { keyStorePasswordFile => + safeFileToString(keyStorePasswordFile, "KeyStore password file") + }) + val resolvedKeyStoreKeyPassword = baseSslOptions.keyPassword + .orElse(maybeKeyPasswordFile.map { keyPasswordFile => + safeFileToString(keyPasswordFile, "KeyStore key password file") + }) + val resolvedKeyStore = baseSslOptions.keyStore + .orElse(maybeKeyPem.map { keyPem => + val keyPemFile = new File(keyPem) + val certPemFile = new File(maybeCertPem.get) + PemsToKeyStoreConverter.convertPemsToTempKeyStoreFile( + keyPemFile, + certPemFile, + "key", + resolvedKeyStorePassword, + resolvedKeyStoreKeyPassword, + baseSslOptions.keyStoreType) + }) + baseSslOptions.copy( + keyStore = resolvedKeyStore, + keyStorePassword = resolvedKeyStorePassword, + keyPassword = resolvedKeyStoreKeyPassword) + } + + private def logSslConfigurations( + baseSslOptions: SSLOptions, + maybeKeyPem: Option[String], + maybeCertPem: Option[String], + maybeKeyStorePasswordFile: Option[String], + maybeKeyPasswordFile: Option[String]) = { + logDebug("The following SSL configurations were provided for the resource staging server:") + logDebug(s"KeyStore File: ${baseSslOptions.keyStore.map(_.getAbsolutePath).getOrElse("N/A")}") + logDebug("KeyStore Password: " + + baseSslOptions.keyStorePassword.map(_ => "").getOrElse("N/A")) + logDebug(s"KeyStore Password File: ${maybeKeyStorePasswordFile.getOrElse("N/A")}") + logDebug("Key Password: " + + baseSslOptions.keyPassword.map(_ => "").getOrElse("N/A")) + logDebug(s"Key Password File: ${maybeKeyPasswordFile.getOrElse("N/A")}") + logDebug(s"KeyStore Type: ${baseSslOptions.keyStoreType.getOrElse("N/A")}") + logDebug(s"Key PEM: ${maybeKeyPem.getOrElse("N/A")}") + logDebug(s"Certificate PEM: ${maybeCertPem.getOrElse("N/A")}") + } + + private def requireBothOrNeitherDefined( + opt1: Option[_], + opt2: Option[_], + errMessageWhenFirstIsMissing: String, + errMessageWhenSecondIsMissing: String): Unit = { + requireSecondIfFirstIsDefined(opt1, opt2, errMessageWhenSecondIsMissing) + requireSecondIfFirstIsDefined(opt2, opt1, errMessageWhenFirstIsMissing) + } + + private def requireSecondIfFirstIsDefined( + opt1: Option[_], opt2: Option[_], errMessageWhenSecondIsMissing: String): Unit = { + opt1.foreach { _ => + require(opt2.isDefined, errMessageWhenSecondIsMissing) + } + } + + private def requireNandDefined(opt1: Option[_], opt2: Option[_], errMessage: String): Unit = { + opt1.foreach { _ => require(opt2.isEmpty, errMessage) } + } + + private def safeFileToString(filePath: String, fileType: String): String = { + val file = new File(filePath) + if (!file.isFile) { + throw new SparkException(s"$fileType provided at ${file.getAbsolutePath} does not exist or" + + s" is not a file.") + } + Files.toString(file, Charsets.UTF_8) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala index c5c5c0d35b7cb..7416c624e97f6 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala @@ -16,21 +16,50 @@ */ package org.apache.spark.deploy.rest.kubernetes.v2 +import java.io.FileInputStream +import java.security.{KeyStore, SecureRandom} +import javax.net.ssl.{SSLContext, TrustManagerFactory, X509TrustManager} + import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule +import okhttp3.OkHttpClient import retrofit2.Retrofit import retrofit2.converter.jackson.JacksonConverterFactory import retrofit2.converter.scalars.ScalarsConverterFactory +import org.apache.spark.SSLOptions +import org.apache.spark.util.Utils + private[spark] object RetrofitUtils { private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + private val SECURE_RANDOM = new SecureRandom() - def createRetrofitClient[T](baseUrl: String, serviceType: Class[T]): T = { + def createRetrofitClient[T](baseUrl: String, serviceType: Class[T], sslOptions: SSLOptions): T = { + val okHttpClientBuilder = new OkHttpClient.Builder() + sslOptions.trustStore.foreach { trustStoreFile => + require(trustStoreFile.isFile, s"TrustStore provided at ${trustStoreFile.getAbsolutePath}" + + " does not exist, or is not a file.") + val trustStoreType = sslOptions.trustStoreType.getOrElse(KeyStore.getDefaultType) + val trustStore = KeyStore.getInstance(trustStoreType) + val trustStorePassword = sslOptions.trustStorePassword.map(_.toCharArray).orNull + Utils.tryWithResource(new FileInputStream(trustStoreFile)) { + trustStore.load(_, trustStorePassword) + } + val trustManagerFactory = TrustManagerFactory.getInstance( + TrustManagerFactory.getDefaultAlgorithm) + trustManagerFactory.init(trustStore) + val trustManagers = trustManagerFactory.getTrustManagers + val sslContext = SSLContext.getInstance("TLSv1.2") + sslContext.init(null, trustManagers, SECURE_RANDOM) + okHttpClientBuilder.sslSocketFactory(sslContext.getSocketFactory, + trustManagers(0).asInstanceOf[X509TrustManager]) + } new Retrofit.Builder() .baseUrl(baseUrl) .addConverterFactory(ScalarsConverterFactory.create()) .addConverterFactory(JacksonConverterFactory.create(OBJECT_MAPPER)) + .client(okHttpClientBuilder.build()) .build() .create(serviceType) } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil/SSLUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala similarity index 98% rename from resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil/SSLUtils.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala index 2078e0585e8f0..dacb017d8a513 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil/SSLUtils.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.integrationtest.sslutil +package org.apache.spark.deploy.kubernetes import java.io.{File, FileOutputStream, OutputStreamWriter} import java.math.BigInteger diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala new file mode 100644 index 0000000000000..290b46a537bf3 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import java.io.{File, FileInputStream, StringWriter} +import java.security.KeyStore + +import com.google.common.base.Charsets +import com.google.common.io.Files +import org.bouncycastle.openssl.jcajce.JcaPEMWriter +import org.scalatest.BeforeAndAfter + +import org.apache.spark.{SparkConf, SparkFunSuite, SSLOptions} +import org.apache.spark.deploy.kubernetes.SSLUtils +import org.apache.spark.util.Utils + +class ResourceStagingServerSslOptionsProviderSuite extends SparkFunSuite with BeforeAndAfter { + + private var sslTempDir: File = _ + private var keyStoreFile: File = _ + + private var sparkConf: SparkConf = _ + private var sslOptionsProvider: ResourceStagingServerSslOptionsProvider = _ + + before { + sslTempDir = Utils.createTempDir(namePrefix = "resource-staging-server-ssl-test") + keyStoreFile = new File(sslTempDir, "keyStore.jks") + sparkConf = new SparkConf(true) + sslOptionsProvider = new ResourceStagingServerSslOptionsProviderImpl(sparkConf) + } + + test("Default SparkConf does not have TLS enabled.") { + assert(sslOptionsProvider.getSslOptions === SSLOptions()) + assert(!sslOptionsProvider.getSslOptions.enabled) + keyStoreFile.delete() + sslTempDir.delete() + } + + test("Setting keyStore, key password, and key field directly.") { + sparkConf.set("spark.ssl.kubernetes.resourceStagingServer.enabled", "true") + .set("spark.ssl.kubernetes.resourceStagingServer.keyStore", keyStoreFile.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.keyStorePassword", "keyStorePassword") + .set("spark.ssl.kubernetes.resourceStagingServer.keyPassword", "keyPassword") + val sslOptions = sslOptionsProvider.getSslOptions + assert(sslOptions.enabled, "SSL should be enabled.") + assert(sslOptions.keyStore.map(_.getAbsolutePath) === Some(keyStoreFile.getAbsolutePath), + "Incorrect keyStore path or it was not set.") + assert(sslOptions.keyStorePassword === Some("keyStorePassword"), + "Incorrect keyStore password or it was not set.") + assert(sslOptions.keyPassword === Some("keyPassword"), + "Incorrect key password or it was not set.") + } + + test("Setting key and certificate pem files should write an appropriate keyStore.") { + val (keyPemFile, certPemFile) = SSLUtils.generateKeyCertPemPair("127.0.0.1") + sparkConf.set("spark.ssl.kubernetes.resourceStagingServer.enabled", "true") + .set("spark.ssl.kubernetes.resourceStagingServer.keyPem", keyPemFile.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.serverCertPem", certPemFile.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.keyStorePassword", "keyStorePassword") + .set("spark.ssl.kubernetes.resourceStagingServer.keyPassword", "keyPassword") + val sslOptions = sslOptionsProvider.getSslOptions + assert(sslOptions.enabled, "SSL should be enabled.") + assert(sslOptions.keyStore.isDefined, "KeyStore should be defined.") + sslOptions.keyStore.foreach { keyStoreFile => + val keyStore = KeyStore.getInstance(KeyStore.getDefaultType) + Utils.tryWithResource(new FileInputStream(keyStoreFile)) { + keyStore.load(_, "keyStorePassword".toCharArray) + } + val key = keyStore.getKey("key", "keyPassword".toCharArray) + compareJcaPemObjectToFileString(key, keyPemFile) + val certificate = keyStore.getCertificateChain("key")(0) + compareJcaPemObjectToFileString(certificate, certPemFile) + } + } + + test("Using password files should read from the appropriate locations.") { + val keyStorePasswordFile = new File(sslTempDir, "keyStorePassword.txt") + Files.write("keyStorePassword", keyStorePasswordFile, Charsets.UTF_8) + val keyPasswordFile = new File(sslTempDir, "keyPassword.txt") + Files.write("keyPassword", keyPasswordFile, Charsets.UTF_8) + sparkConf.set("spark.ssl.kubernetes.resourceStagingServer.enabled", "true") + .set("spark.ssl.kubernetes.resourceStagingServer.keyStore", keyStoreFile.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.keyStorePasswordFile", + keyStorePasswordFile.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.keyPasswordFile", keyPasswordFile.getAbsolutePath) + val sslOptions = sslOptionsProvider.getSslOptions + assert(sslOptions.keyStorePassword === Some("keyStorePassword"), + "Incorrect keyStore password or it was not set.") + assert(sslOptions.keyPassword === Some("keyPassword"), + "Incorrect key password or it was not set.") + } + + private def compareJcaPemObjectToFileString(pemObject: Any, pemFile: File): Unit = { + Utils.tryWithResource(new StringWriter()) { stringWriter => + Utils.tryWithResource(new JcaPEMWriter(stringWriter)) { pemWriter => + pemWriter.writeObject(pemObject) + } + val pemFileAsString = Files.toString(pemFile, Charsets.UTF_8) + assert(stringWriter.toString === pemFileAsString) + } + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala index babc0994d25dc..51c5e43af1124 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala @@ -23,10 +23,11 @@ import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.google.common.io.ByteStreams import okhttp3.{RequestBody, ResponseBody} -import org.scalatest.BeforeAndAfterAll +import org.scalatest.BeforeAndAfter import retrofit2.Call -import org.apache.spark.SparkFunSuite +import org.apache.spark.{SparkFunSuite, SSLOptions} +import org.apache.spark.deploy.kubernetes.SSLUtils import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials import org.apache.spark.util.Utils @@ -39,30 +40,53 @@ import org.apache.spark.util.Utils * we've configured the Jetty server correctly and that the endpoints reached over HTTP can * receive streamed uploads and can stream downloads. */ -class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfterAll { +class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfter { + private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) private val serverPort = new ServerSocket(0).getLocalPort private val serviceImpl = new ResourceStagingServiceImpl(Utils.createTempDir()) - private val server = new ResourceStagingServer(serverPort, serviceImpl) - private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + private val sslOptionsProvider = new SettableReferenceSslOptionsProvider() + private val server = new ResourceStagingServer(serverPort, serviceImpl, sslOptionsProvider) - override def beforeAll(): Unit = { + after { + server.stop() + } + + test("Accept file and jar uploads and downloads") { server.start() + runUploadAndDownload(SSLOptions()) } - override def afterAll(): Unit = { - server.stop() + test("Enable SSL on the server") { + val (keyStore, trustStore) = SSLUtils.generateKeyStoreTrustStorePair( + ipAddress = "127.0.0.1", + keyStorePassword = "keyStore", + keyPassword = "key", + trustStorePassword = "trustStore") + val sslOptions = SSLOptions( + enabled = true, + keyStore = Some(keyStore), + keyStorePassword = Some("keyStore"), + keyPassword = Some("key"), + trustStore = Some(trustStore), + trustStorePassword = Some("trustStore")) + sslOptionsProvider.setOptions(sslOptions) + server.start() + runUploadAndDownload(sslOptions) } - test("Accept file and jar uploads and downloads") { - val retrofitService = RetrofitUtils.createRetrofitClient(s"http://localhost:$serverPort/", - classOf[ResourceStagingServiceRetrofit]) + private def runUploadAndDownload(sslOptions: SSLOptions): Unit = { + val scheme = if (sslOptions.enabled) "https" else "http" + val retrofitService = RetrofitUtils.createRetrofitClient( + s"$scheme://127.0.0.1:$serverPort/", + classOf[ResourceStagingServiceRetrofit], + sslOptions) val resourcesBytes = Array[Byte](1, 2, 3, 4) val labels = Map("label1" -> "label1Value", "label2" -> "label2value") val namespace = "namespace" val labelsJson = OBJECT_MAPPER.writer().writeValueAsString(labels) val resourcesRequestBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.MULTIPART_FORM_DATA), resourcesBytes) + okhttp3.MediaType.parse(MediaType.MULTIPART_FORM_DATA), resourcesBytes) val labelsRequestBody = RequestBody.create( okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), labelsJson) val namespaceRequestBody = RequestBody.create( @@ -95,5 +119,14 @@ class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfterAll { val downloadedBytes = ByteStreams.toByteArray(responseBody.byteStream()) assert(downloadedBytes.toSeq === bytes) } +} + +private class SettableReferenceSslOptionsProvider extends ResourceStagingServerSslOptionsProvider { + private var options = SSLOptions() + + def setOptions(newOptions: SSLOptions): Unit = { + this.options = newOptions + } + override def getSslOptions: SSLOptions = options } diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index da78e783cac1b..5418afa25ca85 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -35,6 +35,13 @@ ${project.version} test + + org.apache.spark + spark-kubernetes_${scala.binary.version} + ${project.version} + test + test-jar + org.apache.spark spark-core_${scala.binary.version} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 8deb790f4b7a0..750e7668b9912 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -35,12 +35,12 @@ import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException, SparkFunSuite} import org.apache.spark.deploy.SparkSubmit +import org.apache.spark.deploy.kubernetes.SSLUtils import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.kubernetes.integrationtest.docker.SparkDockerImageBuilder import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 -import org.apache.spark.deploy.kubernetes.integrationtest.sslutil.SSLUtils import org.apache.spark.deploy.kubernetes.submit.v1.{Client, ExternalSuppliedUrisDriverServiceManager} import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} import org.apache.spark.util.Utils From 04afcf81df3ea09a85f7e4825d6bd2907bc8fe34 Mon Sep 17 00:00:00 2001 From: mccheah Date: Mon, 24 Apr 2017 18:15:11 -0700 Subject: [PATCH 095/156] Driver submission with mounting dependencies from the staging server (#227) --- resource-managers/kubernetes/core/pom.xml | 8 +- .../{submit/v1 => }/CompressionUtils.scala | 123 +++--- .../spark/deploy/kubernetes/config.scala | 107 ++++- .../spark/deploy/kubernetes/constants.scala | 42 +- .../deploy/kubernetes/submit/v1/Client.scala | 17 +- ...iverPodKubernetesCredentialsProvider.scala | 1 - .../deploy/kubernetes/submit/v2/Client.scala | 249 ++++++++++++ .../v2/ContainerNameEqualityPredicate.scala | 29 ++ .../v2/MountedDependencyManagerImpl.scala | 324 +++++++++++++++ .../v2/MountedDependencyManagerProvider.scala | 58 +++ .../SubmissionKubernetesClientProvider.scala | 55 +++ .../v1/KubernetesSparkRestServer.scala | 2 +- ...SparkDependencyDownloadInitContainer.scala | 127 ++++++ .../kubernetes/v2/ResourceStagingServer.scala | 24 +- .../v2/ResourceStagingService.scala | 13 +- .../v2/ResourceStagingServiceImpl.scala | 2 + .../v2/ResourceStagingServiceRetrofit.scala | 8 +- ...tils.scala => RetrofitClientFactory.scala} | 13 +- .../v2/SparkConfPropertiesParser.scala | 46 +++ .../DriverPodKubernetesClientProvider.scala | 83 ++++ .../KubernetesClusterSchedulerBackend.scala | 14 +- .../kubernetes/submit/v2/ClientV2Suite.scala | 328 ++++++++++++++++ .../v2/MountedDependencyManagerSuite.scala | 323 +++++++++++++++ ...DependencyDownloadInitContainerSuite.scala | 165 ++++++++ .../v2/ResourceStagingServerSuite.scala | 2 +- .../kubernetes/docker-minimal-bundle/pom.xml | 17 +- ...river-assembly.xml => docker-assembly.xml} | 6 +- .../src/main/assembly/executor-assembly.xml | 84 ---- .../src/main/docker/driver-init/Dockerfile | 38 ++ .../src/main/docker/driver-v2/Dockerfile | 43 ++ .../docker/resource-staging-server/Dockerfile | 38 ++ .../kubernetes/integration-tests/pom.xml | 65 +--- .../integrationtest/KubernetesSuite.scala | 368 ++---------------- .../KubernetesTestComponents.scala | 72 ++++ .../integrationtest/KubernetesV1Suite.scala | 306 +++++++++++++++ .../integrationtest/KubernetesV2Suite.scala | 127 ++++++ .../ResourceStagingServerLauncher.scala | 196 ++++++++++ .../docker/SparkDockerImageBuilder.scala | 25 +- 38 files changed, 2932 insertions(+), 616 deletions(-) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/{submit/v1 => }/CompressionUtils.scala (58%) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerNameEqualityPredicate.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerImpl.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerProvider.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmissionKubernetesClientProvider.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/{RetrofitUtils.scala => RetrofitClientFactory.scala} (85%) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/SparkConfPropertiesParser.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala rename resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/{driver-assembly.xml => docker-assembly.xml} (95%) delete mode 100644 resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml create mode 100644 resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-init/Dockerfile create mode 100644 resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-v2/Dockerfile create mode 100644 resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 8856339d4f6d9..70c252009c9b4 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -108,6 +108,8 @@ com.google.guava guava + + org.bouncycastle bcpkix-jdk15on @@ -116,7 +118,11 @@ org.bouncycastle bcprov-jdk15on - + + org.mockito + mockito-core + test + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/CompressionUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/CompressionUtils.scala similarity index 58% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/CompressionUtils.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/CompressionUtils.scala index 8296218ba1f70..03991ba26a6f7 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/CompressionUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/CompressionUtils.scala @@ -14,9 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v1 +package org.apache.spark.deploy.kubernetes -import java.io.{ByteArrayInputStream, File, FileInputStream, FileOutputStream} +import java.io.{ByteArrayInputStream, File, FileInputStream, FileOutputStream, InputStream, OutputStream} import java.util.zip.{GZIPInputStream, GZIPOutputStream} import com.google.common.io.Files @@ -48,40 +48,7 @@ private[spark] object CompressionUtils extends Logging { */ def createTarGzip(paths: Iterable[String]): TarGzippedData = { val compressedBytesStream = Utils.tryWithResource(new ByteBufferOutputStream()) { raw => - Utils.tryWithResource(new GZIPOutputStream(raw)) { gzipping => - Utils.tryWithResource(new TarArchiveOutputStream( - gzipping, - BLOCK_SIZE, - RECORD_SIZE, - ENCODING)) { tarStream => - val usedFileNames = mutable.HashSet.empty[String] - for (path <- paths) { - val file = new File(path) - if (!file.isFile) { - throw new IllegalArgumentException(s"Cannot add $path to tarball; either does" + - s" not exist or is a directory.") - } - var resolvedFileName = file.getName - val extension = Files.getFileExtension(file.getName) - val nameWithoutExtension = Files.getNameWithoutExtension(file.getName) - var deduplicationCounter = 1 - while (usedFileNames.contains(resolvedFileName)) { - val oldResolvedFileName = resolvedFileName - resolvedFileName = s"$nameWithoutExtension-$deduplicationCounter.$extension" - logWarning(s"File with name $oldResolvedFileName already exists. Trying to add" + - s" with file name $resolvedFileName instead.") - deduplicationCounter += 1 - } - usedFileNames += resolvedFileName - val tarEntry = new TarArchiveEntry(file, resolvedFileName) - tarStream.putArchiveEntry(tarEntry) - Utils.tryWithResource(new FileInputStream(file)) { fileInput => - IOUtils.copy(fileInput, tarStream) - } - tarStream.closeArchiveEntry() - } - } - } + writeTarGzipToStream(raw, paths) raw } val compressedAsBase64 = Base64.encodeBase64String(compressedBytesStream.toByteBuffer.array) @@ -93,6 +60,44 @@ private[spark] object CompressionUtils extends Logging { ) } + def writeTarGzipToStream(outputStream: OutputStream, paths: Iterable[String]): Unit = { + Utils.tryWithResource(new GZIPOutputStream(outputStream)) { gzipping => + Utils.tryWithResource(new TarArchiveOutputStream( + gzipping, + BLOCK_SIZE, + RECORD_SIZE, + ENCODING)) { tarStream => + val usedFileNames = mutable.HashSet.empty[String] + for (path <- paths) { + val file = new File(path) + if (!file.isFile) { + throw new IllegalArgumentException(s"Cannot add $path to tarball; either does" + + s" not exist or is a directory.") + } + var resolvedFileName = file.getName + val extension = Files.getFileExtension(file.getName) + val nameWithoutExtension = Files.getNameWithoutExtension(file.getName) + var deduplicationCounter = 1 + while (usedFileNames.contains(resolvedFileName)) { + val oldResolvedFileName = resolvedFileName + resolvedFileName = s"$nameWithoutExtension-$deduplicationCounter.$extension" + logWarning(s"File with name $oldResolvedFileName already exists. Trying to add" + + s" with file name $resolvedFileName instead.") + deduplicationCounter += 1 + } + usedFileNames += resolvedFileName + val tarEntry = new TarArchiveEntry(resolvedFileName) + tarEntry.setSize(file.length()); + tarStream.putArchiveEntry(tarEntry) + Utils.tryWithResource(new FileInputStream(file)) { fileInput => + IOUtils.copy(fileInput, tarStream) + } + tarStream.closeArchiveEntry() + } + } + } + } + /** * Decompresses the provided tar archive to a directory. * @param compressedData In-memory representation of the compressed data, ideally created via @@ -104,7 +109,6 @@ private[spark] object CompressionUtils extends Logging { def unpackAndWriteCompressedFiles( compressedData: TarGzippedData, rootOutputDir: File): Seq[String] = { - val paths = mutable.Buffer.empty[String] val compressedBytes = Base64.decodeBase64(compressedData.dataBase64) if (!rootOutputDir.exists) { if (!rootOutputDir.mkdirs) { @@ -116,24 +120,39 @@ private[spark] object CompressionUtils extends Logging { s"${rootOutputDir.getAbsolutePath} exists and is not a directory.") } Utils.tryWithResource(new ByteArrayInputStream(compressedBytes)) { compressedBytesStream => - Utils.tryWithResource(new GZIPInputStream(compressedBytesStream)) { gzipped => - Utils.tryWithResource(new TarArchiveInputStream( - gzipped, - compressedData.blockSize, - compressedData.recordSize, - compressedData.encoding)) { tarInputStream => - var nextTarEntry = tarInputStream.getNextTarEntry - while (nextTarEntry != null) { - val outputFile = new File(rootOutputDir, nextTarEntry.getName) - Utils.tryWithResource(new FileOutputStream(outputFile)) { fileOutputStream => - IOUtils.copy(tarInputStream, fileOutputStream) - } - paths += outputFile.getAbsolutePath - nextTarEntry = tarInputStream.getNextTarEntry + unpackTarStreamToDirectory( + compressedBytesStream, + rootOutputDir, + compressedData.blockSize, + compressedData.recordSize, + compressedData.encoding) + } + } + + def unpackTarStreamToDirectory( + inputStream: InputStream, + outputDir: File, + blockSize: Int = BLOCK_SIZE, + recordSize: Int = RECORD_SIZE, + encoding: String = ENCODING): Seq[String] = { + val paths = mutable.Buffer.empty[String] + Utils.tryWithResource(new GZIPInputStream(inputStream)) { gzipped => + Utils.tryWithResource(new TarArchiveInputStream( + gzipped, + blockSize, + recordSize, + encoding)) { tarInputStream => + var nextTarEntry = tarInputStream.getNextTarEntry + while (nextTarEntry != null) { + val outputFile = new File(outputDir, nextTarEntry.getName) + Utils.tryWithResource(new FileOutputStream(outputFile)) { fileOutputStream => + IOUtils.copy(tarInputStream, fileOutputStream) } + paths += outputFile.getAbsolutePath + nextTarEntry = tarInputStream.getNextTarEntry } } } - paths.toSeq + paths } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 15f7a17857f1f..1c8b6798bbdd5 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -19,11 +19,13 @@ package org.apache.spark.deploy.kubernetes import java.util.concurrent.TimeUnit import org.apache.spark.{SPARK_VERSION => sparkVersion} +import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.kubernetes.submit.v1.NodePortUrisDriverServiceManager +import org.apache.spark.internal.Logging import org.apache.spark.internal.config.ConfigBuilder import org.apache.spark.network.util.ByteUnit -package object config { +package object config extends Logging { private[spark] val KUBERNETES_NAMESPACE = ConfigBuilder("spark.kubernetes.namespace") @@ -321,4 +323,107 @@ package object config { .doc("File containing the key password for the Kubernetes dependency server.") .stringConf .createOptional + + private[spark] val RESOURCE_STAGING_SERVER_SSL_ENABLED = + ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.enabled") + .doc("Whether or not to use SSL when communicating with the dependency server.") + .booleanConf + .createOptional + private[spark] val RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE = + ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.trustStore") + .doc("File containing the trustStore to communicate with the Kubernetes dependency server.") + .stringConf + .createOptional + private[spark] val RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD = + ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.trustStorePassword") + .doc("Password for the trustStore for talking to the dependency server.") + .stringConf + .createOptional + private[spark] val RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE = + ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.trustStoreType") + .doc("Type of trustStore for communicating with the dependency server.") + .stringConf + .createOptional + + // Driver and Init-Container parameters for submission v2 + private[spark] val RESOURCE_STAGING_SERVER_URI = + ConfigBuilder("spark.kubernetes.resourceStagingServer.uri") + .doc("Base URI for the Spark resource staging server") + .stringConf + .createOptional + + private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER = + ConfigBuilder("spark.kubernetes.driver.initcontainer.downloadJarsResourceIdentifier") + .doc("Identifier for the jars tarball that was uploaded to the staging service.") + .internal() + .stringConf + .createOptional + + private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION = + ConfigBuilder("spark.kubernetes.driver.initcontainer.downloadJarsSecretLocation") + .doc("Location of the application secret to use when the init-container contacts the" + + " resource staging server to download jars.") + .internal() + .stringConf + .createWithDefault(INIT_CONTAINER_DOWNLOAD_JARS_SECRET_PATH) + + private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER = + ConfigBuilder("spark.kubernetes.driver.initcontainer.downloadFilesResourceIdentifier") + .doc("Identifier for the files tarball that was uploaded to the staging service.") + .internal() + .stringConf + .createOptional + + private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION = + ConfigBuilder("spark.kubernetes.driver.initcontainer.downloadFilesSecretLocation") + .doc("Location of the application secret to use when the init-container contacts the" + + " resource staging server to download files.") + .internal() + .stringConf + .createWithDefault(INIT_CONTAINER_DOWNLOAD_FILES_SECRET_PATH) + + private[spark] val INIT_CONTAINER_DOCKER_IMAGE = + ConfigBuilder("spark.kubernetes.driver.initcontainer.docker.image") + .doc("Image for the driver's init-container that downloads mounted dependencies.") + .stringConf + .createWithDefault(s"spark-driver-init:$sparkVersion") + + private[spark] val DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION = + ConfigBuilder("spark.kubernetes.driver.mountdependencies.jarsDownloadDir") + .doc("Location to download local jars to in the driver. When using spark-submit, this" + + " directory must be empty and will be mounted as an empty directory volume on the" + + " driver pod.") + .stringConf + .createWithDefault("/var/spark-data/spark-local-jars") + + private[spark] val DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION = + ConfigBuilder("spark.kubernetes.driver.mountdependencies.filesDownloadDir") + .doc("Location to download local files to in the driver. When using spark-submit, this" + + " directory must be empty and will be mounted as an empty directory volume on the" + + " driver pod.") + .stringConf + .createWithDefault("/var/spark-data/spark-local-files") + + private[spark] val DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT = + ConfigBuilder("spark.kubernetes.mountdependencies.mountTimeout") + .doc("Timeout before aborting the attempt to download and unpack local dependencies from" + + " the dependency staging server when initializing the driver pod.") + .timeConf(TimeUnit.MINUTES) + .createWithDefault(5) + + private[spark] def resolveK8sMaster(rawMasterString: String): String = { + if (!rawMasterString.startsWith("k8s://")) { + throw new IllegalArgumentException("Master URL should start with k8s:// in Kubernetes mode.") + } + val masterWithoutK8sPrefix = rawMasterString.replaceFirst("k8s://", "") + if (masterWithoutK8sPrefix.startsWith("http://") + || masterWithoutK8sPrefix.startsWith("https://")) { + masterWithoutK8sPrefix + } else { + val resolvedURL = s"https://$masterWithoutK8sPrefix" + logDebug(s"No scheme specified for kubernetes master URL, so defaulting to https. Resolved" + + s" URL is $resolvedURL") + resolvedURL + } + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 03b3d21ac9c45..f82cb88b4c622 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -30,9 +30,9 @@ package object constants { private[spark] val SUBMISSION_APP_SECRET_PREFIX = "spark-submission-server-secret" private[spark] val SUBMISSION_APP_SECRET_VOLUME_NAME = "spark-submission-secret-volume" private[spark] val SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME = - "spark-submission-server-key-password" + "spark-submission-server-key-password" private[spark] val SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME = - "spark-submission-server-keystore-password" + "spark-submission-server-keystore-password" private[spark] val SUBMISSION_SSL_KEYSTORE_SECRET_NAME = "spark-submission-server-keystore" private[spark] val SUBMISSION_SSL_SECRETS_PREFIX = "spark-submission-server-ssl" private[spark] val SUBMISSION_SSL_SECRETS_VOLUME_NAME = "spark-submission-server-ssl-secrets" @@ -55,9 +55,9 @@ package object constants { private[spark] val ENV_SUBMISSION_SERVER_PORT = "SPARK_SUBMISSION_SERVER_PORT" private[spark] val ENV_SUBMISSION_KEYSTORE_FILE = "SPARK_SUBMISSION_KEYSTORE_FILE" private[spark] val ENV_SUBMISSION_KEYSTORE_PASSWORD_FILE = - "SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE" + "SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE" private[spark] val ENV_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE = - "SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE" + "SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE" private[spark] val ENV_SUBMISSION_KEYSTORE_TYPE = "SPARK_SUBMISSION_KEYSTORE_TYPE" private[spark] val ENV_SUBMISSION_KEY_PEM_FILE = "SPARK_SUBMISSION_KEY_PEM_FILE" private[spark] val ENV_SUBMISSION_CERT_PEM_FILE = "SPARK_SUBMISSION_CERT_PEM_FILE" @@ -70,12 +70,18 @@ package object constants { private[spark] val ENV_EXECUTOR_ID = "SPARK_EXECUTOR_ID" private[spark] val ENV_EXECUTOR_POD_IP = "SPARK_EXECUTOR_POD_IP" private[spark] val ENV_DRIVER_MEMORY = "SPARK_DRIVER_MEMORY" + private[spark] val ENV_UPLOADED_JARS_DIR = "SPARK_UPLOADED_JARS_DIR" + private[spark] val ENV_SUBMIT_EXTRA_CLASSPATH = "SPARK_SUBMIT_EXTRA_CLASSPATH" + private[spark] val ENV_MOUNTED_CLASSPATH = "SPARK_MOUNTED_CLASSPATH" + private[spark] val ENV_DRIVER_MAIN_CLASS = "SPARK_DRIVER_CLASS" + private[spark] val ENV_DRIVER_ARGS = "SPARK_DRIVER_ARGS" + private[spark] val ENV_DRIVER_JAVA_OPTS = "SPARK_DRIVER_JAVA_OPTS" // Annotation keys private[spark] val ANNOTATION_PROVIDE_EXTERNAL_URI = - "spark-job.alpha.apache.org/provideExternalUri" + "spark-job.alpha.apache.org/provideExternalUri" private[spark] val ANNOTATION_RESOLVED_EXTERNAL_URI = - "spark-job.alpha.apache.org/resolvedExternalUri" + "spark-job.alpha.apache.org/resolvedExternalUri" // Miscellaneous private[spark] val DRIVER_CONTAINER_NAME = "spark-kubernetes-driver" @@ -83,4 +89,28 @@ package object constants { private[spark] val KUBERNETES_MASTER_INTERNAL_URL = "https://kubernetes.default.svc" private[spark] val MEMORY_OVERHEAD_FACTOR = 0.10 private[spark] val MEMORY_OVERHEAD_MIN = 384L + + // V2 submission init container + private[spark] val INIT_CONTAINER_ANNOTATION = "pod.beta.kubernetes.io/init-containers" + private[spark] val INIT_CONTAINER_SECRETS_VOLUME_NAME = "dependency-secret" + private[spark] val INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH = "/mnt/secrets/spark-init" + private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY = "downloadJarsSecret" + private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY = "downloadFilesSecret" + private[spark] val INIT_CONTAINER_TRUSTSTORE_SECRET_KEY = "trustStore" + private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_SECRET_PATH = + s"$INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH/$INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY" + private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_SECRET_PATH = + s"$INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH/$INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY" + private[spark] val INIT_CONTAINER_TRUSTSTORE_PATH = + s"$INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH/$INIT_CONTAINER_TRUSTSTORE_SECRET_KEY" + private[spark] val INIT_CONTAINER_DOWNLOAD_CREDENTIALS_PATH = + "/mnt/secrets/kubernetes-credentials" + private[spark] val INIT_CONTAINER_CONFIG_MAP_KEY = "init-driver" + private[spark] val INIT_CONTAINER_PROPERTIES_FILE_VOLUME = "init-container-properties" + private[spark] val INIT_CONTAINER_PROPERTIES_FILE_MOUNT_PATH = "/etc/spark-init/" + private[spark] val INIT_CONTAINER_PROPERTIES_FILE_NAME = "init-driver.properties" + private[spark] val INIT_CONTAINER_PROPERTIES_FILE_PATH = + s"$INIT_CONTAINER_PROPERTIES_FILE_MOUNT_PATH/$INIT_CONTAINER_PROPERTIES_FILE_NAME" + private[spark] val DOWNLOAD_JARS_VOLUME_NAME = "download-jars" + private[spark] val DOWNLOAD_FILES_VOLUME_NAME = "download-files" } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala index 72d24f7bf8342..e1cfac8feba37 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala @@ -30,6 +30,7 @@ import org.apache.commons.codec.binary.Base64 import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException} +import org.apache.spark.deploy.kubernetes.CompressionUtils import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.rest.kubernetes.v1.{AppResource, ContainerAppResource, HttpClientUtil, KubernetesCreateSubmissionRequest, KubernetesCredentials, KubernetesFileUtils, KubernetesSparkRestApi, RemoteAppResource, UploadedAppResource} @@ -733,20 +734,4 @@ private[spark] object Client extends Logging { sparkConf = sparkConf, appArgs = appArgs).run() } - - def resolveK8sMaster(rawMasterString: String): String = { - if (!rawMasterString.startsWith("k8s://")) { - throw new IllegalArgumentException("Master URL should start with k8s:// in Kubernetes mode.") - } - val masterWithoutK8sPrefix = rawMasterString.replaceFirst("k8s://", "") - if (masterWithoutK8sPrefix.startsWith("http://") - || masterWithoutK8sPrefix.startsWith("https://")) { - masterWithoutK8sPrefix - } else { - val resolvedURL = s"https://$masterWithoutK8sPrefix" - logDebug(s"No scheme specified for kubernetes master URL, so defaulting to https. Resolved" + - s" URL is $resolvedURL") - resolvedURL - } - } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverPodKubernetesCredentialsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverPodKubernetesCredentialsProvider.scala index bc7490ef9ec4a..112226dbe3fc1 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverPodKubernetesCredentialsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverPodKubernetesCredentialsProvider.scala @@ -45,7 +45,6 @@ private[spark] class DriverPodKubernetesCredentialsProvider(sparkConf: SparkConf s"Driver client key file provided at %s does not exist or is not a file.") val clientCertDataBase64 = safeFileConfToBase64(KUBERNETES_DRIVER_CLIENT_CERT_FILE, s"Driver client cert file provided at %s does not exist or is not a file.") - val serviceAccountName = sparkConf.get(KUBERNETES_SERVICE_ACCOUNT_NAME) KubernetesCredentials( oauthToken = oauthToken, caCertDataBase64 = caCertDataBase64, diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala new file mode 100644 index 0000000000000..69dbfd041bb86 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala @@ -0,0 +1,249 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.File +import java.util.Collections + +import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, HasMetadata, OwnerReferenceBuilder, PodBuilder} +import scala.collection.JavaConverters._ +import scala.collection.mutable + +import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.internal.Logging +import org.apache.spark.launcher.SparkLauncher +import org.apache.spark.util.Utils + +/** + * Submission client for launching Spark applications on Kubernetes clusters. + * + * This class is responsible for instantiating Kubernetes resources that allow a Spark driver to + * run in a pod on the Kubernetes cluster with the Spark configurations specified by spark-submit. + * Application submitters that desire to provide their application's dependencies from their local + * disk must provide a resource staging server URI to this client so that the client can push the + * local resources to the resource staging server and have the driver pod pull the resources in an + * init-container. Interactions with the resource staging server are offloaded to the + * {@link MountedDependencyManager} class. If instead the application submitter has their + * dependencies pre-staged in remote locations like HDFS or their own HTTP servers already, then + * the mounted dependency manager is bypassed entirely, but the init-container still needs to + * fetch these remote dependencies (TODO https://github.com/apache-spark-on-k8s/spark/issues/238). + */ +private[spark] class Client( + mainClass: String, + sparkConf: SparkConf, + appArgs: Array[String], + mainAppResource: String, + kubernetesClientProvider: SubmissionKubernetesClientProvider, + mountedDependencyManagerProvider: MountedDependencyManagerProvider) extends Logging { + + private val namespace = sparkConf.get(KUBERNETES_NAMESPACE) + private val master = resolveK8sMaster(sparkConf.get("spark.master")) + private val launchTime = System.currentTimeMillis + private val appName = sparkConf.getOption("spark.app.name") + .getOrElse("spark") + private val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") + private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) + private val maybeStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) + private val driverMemoryMb = sparkConf.get(org.apache.spark.internal.config.DRIVER_MEMORY) + private val memoryOverheadMb = sparkConf + .get(KUBERNETES_DRIVER_MEMORY_OVERHEAD) + .getOrElse(math.max((MEMORY_OVERHEAD_FACTOR * driverMemoryMb).toInt, + MEMORY_OVERHEAD_MIN)) + private val driverContainerMemoryWithOverhead = driverMemoryMb + memoryOverheadMb + private val customLabels = sparkConf.get(KUBERNETES_DRIVER_LABELS) + private val customAnnotations = sparkConf.get(KUBERNETES_DRIVER_ANNOTATIONS) + private val sparkJars = sparkConf.getOption("spark.jars") + .map(_.split(",")) + .getOrElse(Array.empty[String]) ++ + Option(mainAppResource) + .filterNot(_ == SparkLauncher.NO_RESOURCE) + .toSeq + + private val sparkFiles = sparkConf.getOption("spark.files") + .map(_.split(",")) + .getOrElse(Array.empty[String]) + private val driverExtraClasspath = sparkConf.get( + org.apache.spark.internal.config.DRIVER_CLASS_PATH) + private val driverJavaOptions = sparkConf.get( + org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) + + def run(): Unit = { + val parsedCustomLabels = parseKeyValuePairs(customLabels, KUBERNETES_DRIVER_LABELS.key, + "labels") + require(!parsedCustomLabels.contains(SPARK_APP_ID_LABEL), s"Label with key " + + s" $SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping operations.") + require(!parsedCustomLabels.contains(SPARK_APP_NAME_LABEL), s"Label with key" + + s" $SPARK_APP_NAME_LABEL is not allowed as it is reserved for Spark bookkeeping operations.") + val allLabels = parsedCustomLabels ++ + Map(SPARK_APP_ID_LABEL -> kubernetesAppId, SPARK_APP_NAME_LABEL -> appName) + val parsedCustomAnnotations = parseKeyValuePairs( + customAnnotations, + KUBERNETES_DRIVER_ANNOTATIONS.key, + "annotations") + Utils.tryWithResource(kubernetesClientProvider.get) { kubernetesClient => + val driverExtraClasspathEnv = driverExtraClasspath.map { classPath => + new EnvVarBuilder() + .withName(ENV_SUBMIT_EXTRA_CLASSPATH) + .withValue(classPath) + .build() + } + val driverContainer = new ContainerBuilder() + .withName(DRIVER_CONTAINER_NAME) + .withImage(driverDockerImage) + .withImagePullPolicy("IfNotPresent") + .addToEnv(driverExtraClasspathEnv.toSeq: _*) + .addNewEnv() + .withName(ENV_DRIVER_MEMORY) + .withValue(driverContainerMemoryWithOverhead + "m") + .endEnv() + .addNewEnv() + .withName(ENV_DRIVER_MAIN_CLASS) + .withValue(mainClass) + .endEnv() + .addNewEnv() + .withName(ENV_DRIVER_ARGS) + .withValue(appArgs.mkString(" ")) + .endEnv() + .build() + val basePod = new PodBuilder() + .withNewMetadata() + .withName(kubernetesAppId) + .addToLabels(allLabels.asJava) + .addToAnnotations(parsedCustomAnnotations.asJava) + .endMetadata() + .withNewSpec() + .addToContainers(driverContainer) + .endSpec() + + val nonDriverPodKubernetesResources = mutable.Buffer[HasMetadata]() + val resolvedJars = mutable.Buffer[String]() + val resolvedFiles = mutable.Buffer[String]() + val driverPodWithMountedDeps = maybeStagingServerUri.map { stagingServerUri => + val mountedDependencyManager = mountedDependencyManagerProvider.getMountedDependencyManager( + kubernetesAppId, + stagingServerUri, + allLabels, + namespace, + sparkJars, + sparkFiles) + val jarsResourceIdentifier = mountedDependencyManager.uploadJars() + val filesResourceIdentifier = mountedDependencyManager.uploadFiles() + val initContainerKubernetesSecret = mountedDependencyManager.buildInitContainerSecret( + jarsResourceIdentifier.resourceSecret, filesResourceIdentifier.resourceSecret) + val initContainerConfigMap = mountedDependencyManager.buildInitContainerConfigMap( + jarsResourceIdentifier.resourceId, filesResourceIdentifier.resourceId) + resolvedJars ++= mountedDependencyManager.resolveSparkJars() + resolvedFiles ++= mountedDependencyManager.resolveSparkFiles() + nonDriverPodKubernetesResources += initContainerKubernetesSecret + nonDriverPodKubernetesResources += initContainerConfigMap + mountedDependencyManager.configurePodToMountLocalDependencies( + driverContainer.getName, initContainerKubernetesSecret, initContainerConfigMap, basePod) + }.getOrElse { + sparkJars.map(Utils.resolveURI).foreach { jar => + require(Option.apply(jar.getScheme).getOrElse("file") != "file", + "When submitting with local jars, a resource staging server must be provided to" + + s" deploy your jars into the driver pod. Cannot send jar with URI $jar.") + } + sparkFiles.map(Utils.resolveURI).foreach { file => + require(Option.apply(file.getScheme).getOrElse("file") != "file", + "When submitting with local files, a resource staging server must be provided to" + + s" deploy your files into the driver pod. Cannot send file with URI $file") + } + resolvedJars ++= sparkJars + resolvedFiles ++= sparkFiles + basePod + } + val resolvedSparkConf = sparkConf.clone() + if (resolvedJars.nonEmpty) { + resolvedSparkConf.set("spark.jars", resolvedJars.mkString(",")) + } + if (resolvedFiles.nonEmpty) { + resolvedSparkConf.set("spark.files", resolvedFiles.mkString(",")) + } + resolvedSparkConf.set(KUBERNETES_DRIVER_POD_NAME, kubernetesAppId) + resolvedSparkConf.set("spark.app.id", kubernetesAppId) + // We don't need this anymore since we just set the JVM options on the environment + resolvedSparkConf.remove(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) + resolvedSparkConf.get(KUBERNETES_SUBMIT_OAUTH_TOKEN).foreach { _ => + resolvedSparkConf.set(KUBERNETES_SUBMIT_OAUTH_TOKEN.key, "") + } + resolvedSparkConf.get(KUBERNETES_DRIVER_OAUTH_TOKEN).foreach { _ => + resolvedSparkConf.set(KUBERNETES_DRIVER_OAUTH_TOKEN.key, "") + } + + val mountedClassPath = resolvedJars.map(Utils.resolveURI).filter { jarUri => + val scheme = Option.apply(jarUri.getScheme).getOrElse("file") + scheme == "local" || scheme == "file" + }.map(_.getPath).mkString(File.pathSeparator) + val resolvedDriverJavaOpts = resolvedSparkConf.getAll.map { case (confKey, confValue) => + s"-D$confKey=$confValue" + }.mkString(" ") + driverJavaOptions.map(" " + _).getOrElse("") + val resolvedDriverPod = driverPodWithMountedDeps.editSpec() + .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainer.getName)) + .addNewEnv() + .withName(ENV_MOUNTED_CLASSPATH) + .withValue(mountedClassPath) + .endEnv() + .addNewEnv() + .withName(ENV_DRIVER_JAVA_OPTS) + .withValue(resolvedDriverJavaOpts) + .endEnv() + .endContainer() + .endSpec() + .build() + val createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod) + try { + val driverPodOwnerReference = new OwnerReferenceBuilder() + .withName(createdDriverPod.getMetadata.getName) + .withApiVersion(createdDriverPod.getApiVersion) + .withUid(createdDriverPod.getMetadata.getUid) + .withKind(createdDriverPod.getKind) + .withController(true) + .build() + nonDriverPodKubernetesResources.foreach { resource => + val originalMetadata = resource.getMetadata + originalMetadata.setOwnerReferences(Collections.singletonList(driverPodOwnerReference)) + } + kubernetesClient.resourceList(nonDriverPodKubernetesResources: _*).createOrReplace() + } catch { + case e: Throwable => + kubernetesClient.pods().delete(createdDriverPod) + throw e + } + } + } + + private def parseKeyValuePairs( + maybeKeyValues: Option[String], + configKey: String, + keyValueType: String): Map[String, String] = { + maybeKeyValues.map(keyValues => { + keyValues.split(",").map(_.trim).filterNot(_.isEmpty).map(keyValue => { + keyValue.split("=", 2).toSeq match { + case Seq(k, v) => + (k, v) + case _ => + throw new SparkException(s"Custom $keyValueType set by $configKey must be a" + + s" comma-separated list of key-value pairs, with format =." + + s" Got value: $keyValue. All values: $keyValues") + } + }).toMap + }).getOrElse(Map.empty[String, String]) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerNameEqualityPredicate.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerNameEqualityPredicate.scala new file mode 100644 index 0000000000000..5101e1506e4d5 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerNameEqualityPredicate.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.lang.Boolean + +import io.fabric8.kubernetes.api.builder.Predicate +import io.fabric8.kubernetes.api.model.ContainerBuilder + +private[spark] class ContainerNameEqualityPredicate(containerName: String) + extends Predicate[ContainerBuilder] { + override def apply(item: ContainerBuilder): Boolean = { + item.getName == containerName + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerImpl.scala new file mode 100644 index 0000000000000..9dbbcd0d56a3b --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerImpl.scala @@ -0,0 +1,324 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.{File, FileOutputStream, StringWriter} +import java.util.Properties +import javax.ws.rs.core.MediaType + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.google.common.base.Charsets +import com.google.common.io.{BaseEncoding, Files} +import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, Container, ContainerBuilder, EmptyDirVolumeSource, PodBuilder, Secret, SecretBuilder, VolumeMount, VolumeMountBuilder} +import okhttp3.RequestBody +import retrofit2.Call +import scala.collection.JavaConverters._ +import scala.collection.mutable + +import org.apache.spark.{SparkException, SSLOptions} +import org.apache.spark.deploy.kubernetes.CompressionUtils +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.rest.kubernetes.v1.{KubernetesCredentials, KubernetesFileUtils} +import org.apache.spark.deploy.rest.kubernetes.v2.{ResourceStagingServiceRetrofit, RetrofitClientFactory, StagedResourceIdentifier} +import org.apache.spark.util.Utils + +private[spark] trait MountedDependencyManager { + + /** + * Upload submitter-local jars to the resource staging server. + * @return The resource ID and secret to use to retrieve these jars. + */ + def uploadJars(): StagedResourceIdentifier + + /** + * Upload submitter-local files to the resource staging server. + * @return The resource ID and secret to use to retrieve these files. + */ + def uploadFiles(): StagedResourceIdentifier + + def configurePodToMountLocalDependencies( + driverContainerName: String, + initContainerSecret: Secret, + initContainerConfigMap: ConfigMap, + originalPodSpec: PodBuilder): PodBuilder + + def buildInitContainerSecret(jarsSecret: String, filesSecret: String): Secret + + def buildInitContainerConfigMap( + jarsResourceId: String, filesResourceId: String): ConfigMap + + /** + * Convert the Spark jar paths from their locations on the submitter's disk to + * the locations they will be downloaded to on the driver's disk. + */ + def resolveSparkJars(): Seq[String] + + /** + * Convert the Spark file paths from their locations on the submitter's disk to + * the locations they will be downloaded to on the driver's disk. + */ + def resolveSparkFiles(): Seq[String] +} + +/** + * Default implementation of a MountedDependencyManager that is backed by a + * Resource Staging Service. + */ +private[spark] class MountedDependencyManagerImpl( + kubernetesAppId: String, + podLabels: Map[String, String], + podNamespace: String, + stagingServerUri: String, + initContainerImage: String, + jarsDownloadPath: String, + filesDownloadPath: String, + downloadTimeoutMinutes: Long, + sparkJars: Seq[String], + sparkFiles: Seq[String], + stagingServiceSslOptions: SSLOptions, + retrofitClientFactory: RetrofitClientFactory) extends MountedDependencyManager { + private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + + private def localUriStringsToFiles(uris: Seq[String]): Iterable[File] = { + KubernetesFileUtils.getOnlySubmitterLocalFiles(uris) + .map(Utils.resolveURI) + .map(uri => new File(uri.getPath)) + } + private def localJars: Iterable[File] = localUriStringsToFiles(sparkJars) + private def localFiles: Iterable[File] = localUriStringsToFiles(sparkFiles) + + override def uploadJars(): StagedResourceIdentifier = doUpload(localJars, "uploaded-jars") + override def uploadFiles(): StagedResourceIdentifier = doUpload(localFiles, "uploaded-files") + + private def doUpload(files: Iterable[File], fileNamePrefix: String): StagedResourceIdentifier = { + val filesDir = Utils.createTempDir(namePrefix = fileNamePrefix) + val filesTgz = new File(filesDir, s"$fileNamePrefix.tgz") + Utils.tryWithResource(new FileOutputStream(filesTgz)) { filesOutputStream => + CompressionUtils.writeTarGzipToStream(filesOutputStream, files.map(_.getAbsolutePath)) + } + // TODO provide credentials properly when the staging server monitors the Kubernetes API. + val kubernetesCredentialsString = OBJECT_MAPPER.writer() + .writeValueAsString(KubernetesCredentials(None, None, None, None)) + val labelsAsString = OBJECT_MAPPER.writer().writeValueAsString(podLabels) + + val filesRequestBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.MULTIPART_FORM_DATA), filesTgz) + + val kubernetesCredentialsBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), kubernetesCredentialsString) + + val namespaceRequestBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.TEXT_PLAIN), podNamespace) + + val labelsRequestBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), labelsAsString) + + val service = retrofitClientFactory.createRetrofitClient( + stagingServerUri, + classOf[ResourceStagingServiceRetrofit], + stagingServiceSslOptions) + val uploadResponse = service.uploadResources( + labelsRequestBody, namespaceRequestBody, filesRequestBody, kubernetesCredentialsBody) + getTypedResponseResult(uploadResponse) + } + + override def configurePodToMountLocalDependencies( + driverContainerName: String, + initContainerSecret: Secret, + initContainerConfigMap: ConfigMap, + originalPodSpec: PodBuilder): PodBuilder = { + val sharedVolumeMounts = Seq[VolumeMount]( + new VolumeMountBuilder() + .withName(DOWNLOAD_JARS_VOLUME_NAME) + .withMountPath(jarsDownloadPath) + .build(), + new VolumeMountBuilder() + .withName(DOWNLOAD_FILES_VOLUME_NAME) + .withMountPath(filesDownloadPath) + .build()) + + val initContainers = Seq(new ContainerBuilder() + .withName("spark-driver-init") + .withImage(initContainerImage) + .withImagePullPolicy("IfNotPresent") + .addNewVolumeMount() + .withName(INIT_CONTAINER_PROPERTIES_FILE_VOLUME) + .withMountPath(INIT_CONTAINER_PROPERTIES_FILE_MOUNT_PATH) + .endVolumeMount() + .addNewVolumeMount() + .withName(INIT_CONTAINER_SECRETS_VOLUME_NAME) + .withMountPath(INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH) + .endVolumeMount() + .addToVolumeMounts(sharedVolumeMounts: _*) + .addToArgs(INIT_CONTAINER_PROPERTIES_FILE_PATH) + .build()) + + // Make sure we don't override any user-provided init containers by just appending ours to + // the existing list. + val resolvedInitContainers = originalPodSpec + .editMetadata() + .getAnnotations + .asScala + .get(INIT_CONTAINER_ANNOTATION) + .map { existingInitContainerAnnotation => + val existingInitContainers = OBJECT_MAPPER.readValue( + existingInitContainerAnnotation, classOf[List[Container]]) + existingInitContainers ++ initContainers + }.getOrElse(initContainers) + val resolvedSerializedInitContainers = OBJECT_MAPPER.writeValueAsString(resolvedInitContainers) + originalPodSpec + .editMetadata() + .removeFromAnnotations(INIT_CONTAINER_ANNOTATION) + .addToAnnotations(INIT_CONTAINER_ANNOTATION, resolvedSerializedInitContainers) + .endMetadata() + .editSpec() + .addNewVolume() + .withName(INIT_CONTAINER_PROPERTIES_FILE_VOLUME) + .withNewConfigMap() + .withName(initContainerConfigMap.getMetadata.getName) + .addNewItem() + .withKey(INIT_CONTAINER_CONFIG_MAP_KEY) + .withPath(INIT_CONTAINER_PROPERTIES_FILE_NAME) + .endItem() + .endConfigMap() + .endVolume() + .addNewVolume() + .withName(DOWNLOAD_JARS_VOLUME_NAME) + .withEmptyDir(new EmptyDirVolumeSource()) + .endVolume() + .addNewVolume() + .withName(DOWNLOAD_FILES_VOLUME_NAME) + .withEmptyDir(new EmptyDirVolumeSource()) + .endVolume() + .addNewVolume() + .withName(INIT_CONTAINER_SECRETS_VOLUME_NAME) + .withNewSecret() + .withSecretName(initContainerSecret.getMetadata.getName) + .endSecret() + .endVolume() + .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainerName)) + .addToVolumeMounts(sharedVolumeMounts: _*) + .addNewEnv() + .withName(ENV_UPLOADED_JARS_DIR) + .withValue(jarsDownloadPath) + .endEnv() + .endContainer() + .endSpec() + } + + override def buildInitContainerSecret(jarsSecret: String, filesSecret: String): Secret = { + val trustStoreBase64 = stagingServiceSslOptions.trustStore.map { trustStoreFile => + require(trustStoreFile.isFile, "Dependency server trustStore provided at" + + trustStoreFile.getAbsolutePath + " does not exist or is not a file.") + (INIT_CONTAINER_TRUSTSTORE_SECRET_KEY, + BaseEncoding.base64().encode(Files.toByteArray(trustStoreFile))) + }.toMap + val jarsSecretBase64 = BaseEncoding.base64().encode(jarsSecret.getBytes(Charsets.UTF_8)) + val filesSecretBase64 = BaseEncoding.base64().encode(filesSecret.getBytes(Charsets.UTF_8)) + val secretData = Map( + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY -> jarsSecretBase64, + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY -> filesSecretBase64) ++ + trustStoreBase64 + val kubernetesSecret = new SecretBuilder() + .withNewMetadata() + .withName(s"$kubernetesAppId-spark-init") + .endMetadata() + .addToData(secretData.asJava) + .build() + kubernetesSecret + } + + override def buildInitContainerConfigMap( + jarsResourceId: String, filesResourceId: String): ConfigMap = { + val initContainerProperties = new Properties() + initContainerProperties.setProperty(RESOURCE_STAGING_SERVER_URI.key, stagingServerUri) + initContainerProperties.setProperty(DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION.key, jarsDownloadPath) + initContainerProperties.setProperty(DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION.key, filesDownloadPath) + initContainerProperties.setProperty( + INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key, jarsResourceId) + initContainerProperties.setProperty( + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key, INIT_CONTAINER_DOWNLOAD_JARS_SECRET_PATH) + initContainerProperties.setProperty( + INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key, filesResourceId) + initContainerProperties.setProperty( + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key, INIT_CONTAINER_DOWNLOAD_FILES_SECRET_PATH) + initContainerProperties.setProperty(DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT.key, + s"${downloadTimeoutMinutes}m") + stagingServiceSslOptions.trustStore.foreach { _ => + initContainerProperties.setProperty(RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key, + INIT_CONTAINER_TRUSTSTORE_PATH) + } + initContainerProperties.setProperty(RESOURCE_STAGING_SERVER_SSL_ENABLED.key, + stagingServiceSslOptions.enabled.toString) + stagingServiceSslOptions.trustStorePassword.foreach { password => + initContainerProperties.setProperty(RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key, password) + } + stagingServiceSslOptions.trustStoreType.foreach { storeType => + initContainerProperties.setProperty(RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key, storeType) + } + val propertiesWriter = new StringWriter() + initContainerProperties.store(propertiesWriter, "Init-container properties.") + new ConfigMapBuilder() + .withNewMetadata() + .withName(s"$kubernetesAppId-init-properties") + .endMetadata() + .addToData(INIT_CONTAINER_CONFIG_MAP_KEY, propertiesWriter.toString) + .build() + } + + override def resolveSparkJars(): Seq[String] = resolveLocalFiles(sparkJars, jarsDownloadPath) + + override def resolveSparkFiles(): Seq[String] = resolveLocalFiles(sparkFiles, filesDownloadPath) + + private def resolveLocalFiles( + allFileUriStrings: Seq[String], localDownloadRoot: String): Seq[String] = { + val usedLocalFileNames = mutable.HashSet.empty[String] + val resolvedFiles = mutable.Buffer.empty[String] + for (fileUriString <- allFileUriStrings) { + val fileUri = Utils.resolveURI(fileUriString) + val resolvedFile = Option(fileUri.getScheme).getOrElse("file") match { + case "file" => + // Deduplication logic matches that of CompressionUtils#writeTarGzipToStream + val file = new File(fileUri.getPath) + val extension = Files.getFileExtension(file.getName) + val nameWithoutExtension = Files.getNameWithoutExtension(file.getName) + var resolvedFileName = file.getName + var deduplicationCounter = 1 + while (usedLocalFileNames.contains(resolvedFileName)) { + resolvedFileName = s"$nameWithoutExtension-$deduplicationCounter.$extension" + deduplicationCounter += 1 + } + s"file://$localDownloadRoot/$resolvedFileName" + case _ => fileUriString + } + resolvedFiles += resolvedFile + } + resolvedFiles + } + + private def getTypedResponseResult[T](call: Call[T]): T = { + val response = call.execute() + if (response.code() < 200 || response.code() >= 300) { + throw new SparkException("Unexpected response from dependency server when uploading" + + s" dependencies: ${response.code()}. Error body: " + + Option(response.errorBody()).map(_.string()).getOrElse("N/A")) + } + response.body() + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerProvider.scala new file mode 100644 index 0000000000000..8f09112132b2c --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerProvider.scala @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.rest.kubernetes.v2.RetrofitClientFactoryImpl + +private[spark] trait MountedDependencyManagerProvider { + def getMountedDependencyManager( + kubernetesAppId: String, + stagingServerUri: String, + podLabels: Map[String, String], + podNamespace: String, + sparkJars: Seq[String], + sparkFiles: Seq[String]): MountedDependencyManager +} + +private[spark] class MountedDependencyManagerProviderImpl(sparkConf: SparkConf) + extends MountedDependencyManagerProvider { + override def getMountedDependencyManager( + kubernetesAppId: String, + stagingServerUri: String, + podLabels: Map[String, String], + podNamespace: String, + sparkJars: Seq[String], + sparkFiles: Seq[String]): MountedDependencyManager = { + val resourceStagingServerSslOptions = new SparkSecurityManager(sparkConf) + .getSSLOptions("kubernetes.resourceStagingServer") + new MountedDependencyManagerImpl( + kubernetesAppId, + podLabels, + podNamespace, + stagingServerUri, + sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE), + sparkConf.get(DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION), + sparkConf.get(DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION), + sparkConf.get(DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT), + sparkJars, + sparkFiles, + resourceStagingServerSslOptions, + RetrofitClientFactoryImpl) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmissionKubernetesClientProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmissionKubernetesClientProvider.scala new file mode 100644 index 0000000000000..af3de6ce85026 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmissionKubernetesClientProvider.scala @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient, KubernetesClient} + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.internal.Logging + +trait SubmissionKubernetesClientProvider { + def get: KubernetesClient +} + +private[spark] class SubmissionKubernetesClientProviderImpl(sparkConf: SparkConf) + extends SubmissionKubernetesClientProvider with Logging { + + private val namespace = sparkConf.get(KUBERNETES_NAMESPACE) + private val master = resolveK8sMaster(sparkConf.get("spark.master")) + + override def get: KubernetesClient = { + var k8ConfBuilder = new ConfigBuilder() + .withApiVersion("v1") + .withMasterUrl(master) + .withNamespace(namespace) + sparkConf.get(KUBERNETES_SUBMIT_CA_CERT_FILE).foreach { + f => k8ConfBuilder = k8ConfBuilder.withCaCertFile(f) + } + sparkConf.get(KUBERNETES_SUBMIT_CLIENT_KEY_FILE).foreach { + f => k8ConfBuilder = k8ConfBuilder.withClientKeyFile(f) + } + sparkConf.get(KUBERNETES_SUBMIT_CLIENT_CERT_FILE).foreach { + f => k8ConfBuilder = k8ConfBuilder.withClientCertFile(f) + } + sparkConf.get(KUBERNETES_SUBMIT_OAUTH_TOKEN).foreach { token => + k8ConfBuilder = k8ConfBuilder.withOauthToken(token) + } + val k8ClientConfig = k8ConfBuilder.build + new DefaultKubernetesClient(k8ClientConfig) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala index 048427fa4ec23..ca05fe767146b 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala @@ -33,8 +33,8 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.deploy.kubernetes.CompressionUtils import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.submit.v1.CompressionUtils import org.apache.spark.deploy.rest._ import org.apache.spark.internal.config.OptionalConfigEntry import org.apache.spark.util.{ShutdownHookManager, ThreadUtils, Utils} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala new file mode 100644 index 0000000000000..680d305985cc0 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.rest.kubernetes.v2 + +import java.io.File +import java.util.concurrent.TimeUnit + +import com.google.common.base.Charsets +import com.google.common.io.Files +import com.google.common.util.concurrent.SettableFuture +import okhttp3.ResponseBody +import retrofit2.{Call, Callback, Response} + +import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.CompressionUtils +import org.apache.spark.internal.Logging +import org.apache.spark.util.Utils + +private trait WaitableCallback[T] extends Callback[T] { + private val complete = SettableFuture.create[Boolean] + + override final def onFailure(call: Call[T], t: Throwable): Unit = complete.setException(t) + + override final def onResponse(call: Call[T], response: Response[T]): Unit = { + require(response.code() >= 200 && response.code() < 300, Option(response.errorBody()) + .map(_.string()) + .getOrElse(s"Error executing HTTP request, but error body was not provided.")) + handleResponse(response.body()) + complete.set(true) + } + + protected def handleResponse(body: T): Unit + + final def waitForCompletion(time: Long, timeUnit: TimeUnit): Unit = { + complete.get(time, timeUnit) + } +} + +private class DownloadTarGzCallback(downloadDir: File) extends WaitableCallback[ResponseBody] { + + override def handleResponse(responseBody: ResponseBody): Unit = { + Utils.tryWithResource(responseBody.byteStream()) { responseStream => + CompressionUtils.unpackTarStreamToDirectory(responseStream, downloadDir) + } + } +} + +private[spark] class KubernetesSparkDependencyDownloadInitContainer( + sparkConf: SparkConf, retrofitClientFactory: RetrofitClientFactory) extends Logging { + + private val resourceStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) + .getOrElse(throw new SparkException("No dependency server URI was provided.")) + + private val downloadJarsResourceIdentifier = sparkConf + .get(INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER) + .getOrElse(throw new SparkException("No resource identifier provided for jars.")) + private val downloadJarsSecretLocation = new File( + sparkConf.get(INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION)) + private val downloadFilesResourceIdentifier = sparkConf + .get(INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER) + .getOrElse(throw new SparkException("No resource identifier provided for files.")) + private val downloadFilesSecretLocation = new File( + sparkConf.get(INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION)) + require(downloadJarsSecretLocation.isFile, "Application jars download secret provided" + + s" at ${downloadJarsSecretLocation.getAbsolutePath} does not exist or is not a file.") + require(downloadFilesSecretLocation.isFile, "Application files download secret provided" + + s" at ${downloadFilesSecretLocation.getAbsolutePath} does not exist or is not a file.") + + private val jarsDownloadDir = new File(sparkConf.get(DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION)) + require(jarsDownloadDir.isDirectory, "Application jars download directory provided at" + + s" ${jarsDownloadDir.getAbsolutePath} does not exist or is not a directory.") + + private val filesDownloadDir = new File(sparkConf.get(DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION)) + require(filesDownloadDir.isDirectory, "Application files download directory provided at" + + s" ${filesDownloadDir.getAbsolutePath} does not exist or is not a directory.") + private val downloadTimeoutMinutes = sparkConf.get(DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT) + + def run(): Unit = { + val securityManager = new SparkSecurityManager(sparkConf) + val sslOptions = securityManager.getSSLOptions("kubernetes.resourceStagingServer") + val service = retrofitClientFactory.createRetrofitClient( + resourceStagingServerUri, classOf[ResourceStagingServiceRetrofit], sslOptions) + val jarsSecret = Files.toString(downloadJarsSecretLocation, Charsets.UTF_8) + val filesSecret = Files.toString(downloadFilesSecretLocation, Charsets.UTF_8) + val downloadJarsCallback = new DownloadTarGzCallback(jarsDownloadDir) + val downloadFilesCallback = new DownloadTarGzCallback(filesDownloadDir) + service.downloadResources(downloadJarsResourceIdentifier, jarsSecret) + .enqueue(downloadJarsCallback) + service.downloadResources(downloadFilesResourceIdentifier, filesSecret) + .enqueue(downloadFilesCallback) + logInfo("Waiting to download jars...") + downloadJarsCallback.waitForCompletion(downloadTimeoutMinutes, TimeUnit.MINUTES) + logInfo(s"Jars downloaded to ${jarsDownloadDir.getAbsolutePath}") + logInfo("Waiting to download files...") + downloadFilesCallback.waitForCompletion(downloadTimeoutMinutes, TimeUnit.MINUTES) + logInfo(s"Files downloaded to ${filesDownloadDir.getAbsolutePath}") + } +} + +object KubernetesSparkDependencyDownloadInitContainer extends Logging { + def main(args: Array[String]): Unit = { + logInfo("Starting init-container to download Spark application dependencies.") + val sparkConf = if (args.nonEmpty) { + SparkConfPropertiesParser.getSparkConfFromPropertiesFile(new File(args(0))) + } else { + new SparkConf(true) + } + new KubernetesSparkDependencyDownloadInitContainer(sparkConf, RetrofitClientFactoryImpl).run() + logInfo("Finished downloading application dependencies.") + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala index 8ca13da545d5d..4ecb6369ff3b0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala @@ -16,13 +16,11 @@ */ package org.apache.spark.deploy.rest.kubernetes.v2 -import java.io.{File, FileInputStream} -import java.util.Properties +import java.io.File import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.jaxrs.json.JacksonJaxbJsonProvider import com.fasterxml.jackson.module.scala.DefaultScalaModule -import com.google.common.collect.Maps import org.eclipse.jetty.http.HttpVersion import org.eclipse.jetty.server.{HttpConfiguration, HttpConnectionFactory, Server, ServerConnector, SslConnectionFactory} import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder} @@ -30,12 +28,10 @@ import org.eclipse.jetty.util.thread.{QueuedThreadPool, ScheduledExecutorSchedul import org.glassfish.jersey.media.multipart.MultiPartFeature import org.glassfish.jersey.server.ResourceConfig import org.glassfish.jersey.servlet.ServletContainer -import scala.collection.JavaConverters._ import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.internal.Logging -import org.apache.spark.internal.config.{ConfigReader, SparkConfigProvider} import org.apache.spark.util.Utils private[spark] class ResourceStagingServer( @@ -97,20 +93,10 @@ private[spark] class ResourceStagingServer( object ResourceStagingServer { def main(args: Array[String]): Unit = { - val sparkConf = new SparkConf(true) - if (args.nonEmpty) { - val propertiesFile = new File(args(0)) - if (!propertiesFile.isFile) { - throw new IllegalArgumentException(s"Server properties file given at" + - s" ${propertiesFile.getAbsoluteFile} does not exist or is not a file.") - } - val properties = new Properties - Utils.tryWithResource(new FileInputStream(propertiesFile))(properties.load) - val propertiesMap = Maps.fromProperties(properties) - val configReader = new ConfigReader(new SparkConfigProvider(propertiesMap)) - propertiesMap.asScala.keys.foreach { key => - configReader.get(key).foreach(sparkConf.set(key, _)) - } + val sparkConf = if (args.nonEmpty) { + SparkConfPropertiesParser.getSparkConfFromPropertiesFile(new File(args(0))) + } else { + new SparkConf(true) } val dependenciesRootDir = Utils.createTempDir(namePrefix = "local-application-dependencies") val serviceInstance = new ResourceStagingServiceImpl(dependenciesRootDir) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala index 95cc6ab949d5c..844809dec995c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala @@ -80,6 +80,15 @@ private[spark] trait ResourceStagingService { @Produces(Array(MediaType.APPLICATION_OCTET_STREAM)) @Path("/resources/{resourceId}") def downloadResources( - @PathParam("resourceId") resourceId: String, - @HeaderParam("Authorization") resourceSecret: String): StreamingOutput + @PathParam("resourceId") resourceId: String, + @HeaderParam("Authorization") resourceSecret: String): StreamingOutput + + /** + * Health check. + */ + @GET + @Consumes(Array(MediaType.APPLICATION_JSON)) + @Produces(Array(MediaType.TEXT_PLAIN)) + @Path("/ping") + def ping(): String } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala index 732969cd67d89..cf6180fbf53d4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala @@ -88,6 +88,8 @@ private[spark] class ResourceStagingServiceImpl(dependenciesRootDir: File) } } } + + override def ping(): String = "pong" } private case class StagedResources( diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala index daf03f764b35a..b1a3cc0676757 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala @@ -37,6 +37,10 @@ private[spark] trait ResourceStagingServiceRetrofit { @Streaming @retrofit2.http.GET("/api/v0/resources/{resourceId}") - def downloadResources(@Path("resourceId") resourceId: String, - @retrofit2.http.Header("Authorization") resourceSecret: String): Call[ResponseBody] + def downloadResources( + @Path("resourceId") resourceId: String, + @retrofit2.http.Header("Authorization") resourceSecret: String): Call[ResponseBody] + + @retrofit2.http.GET("/api/ping") + def ping(): String } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitClientFactory.scala similarity index 85% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitClientFactory.scala index 7416c624e97f6..f906423524944 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitClientFactory.scala @@ -22,21 +22,26 @@ import javax.net.ssl.{SSLContext, TrustManagerFactory, X509TrustManager} import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule -import okhttp3.OkHttpClient +import okhttp3.{Dispatcher, OkHttpClient} import retrofit2.Retrofit import retrofit2.converter.jackson.JacksonConverterFactory import retrofit2.converter.scalars.ScalarsConverterFactory import org.apache.spark.SSLOptions -import org.apache.spark.util.Utils +import org.apache.spark.util.{ThreadUtils, Utils} -private[spark] object RetrofitUtils { +private[spark] trait RetrofitClientFactory { + def createRetrofitClient[T](baseUrl: String, serviceType: Class[T], sslOptions: SSLOptions): T +} + +private[spark] object RetrofitClientFactoryImpl extends RetrofitClientFactory { private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) private val SECURE_RANDOM = new SecureRandom() def createRetrofitClient[T](baseUrl: String, serviceType: Class[T], sslOptions: SSLOptions): T = { - val okHttpClientBuilder = new OkHttpClient.Builder() + val dispatcher = new Dispatcher(ThreadUtils.newDaemonCachedThreadPool(s"http-client-$baseUrl")) + val okHttpClientBuilder = new OkHttpClient.Builder().dispatcher(dispatcher) sslOptions.trustStore.foreach { trustStoreFile => require(trustStoreFile.isFile, s"TrustStore provided at ${trustStoreFile.getAbsolutePath}" + " does not exist, or is not a file.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/SparkConfPropertiesParser.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/SparkConfPropertiesParser.scala new file mode 100644 index 0000000000000..cf9decab127c5 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/SparkConfPropertiesParser.scala @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import java.io.{File, FileInputStream} +import java.util.Properties + +import com.google.common.collect.Maps +import scala.collection.JavaConverters.mapAsScalaMapConverter + +import org.apache.spark.SparkConf +import org.apache.spark.internal.config.{ConfigReader, SparkConfigProvider} +import org.apache.spark.util.Utils + +private[spark] object SparkConfPropertiesParser { + + def getSparkConfFromPropertiesFile(propertiesFile: File): SparkConf = { + val sparkConf = new SparkConf(true) + if (!propertiesFile.isFile) { + throw new IllegalArgumentException(s"Server properties file given at" + + s" ${propertiesFile.getAbsoluteFile} does not exist or is not a file.") + } + val properties = new Properties + Utils.tryWithResource(new FileInputStream(propertiesFile))(properties.load) + val propertiesMap = Maps.fromProperties(properties) + val configReader = new ConfigReader(new SparkConfigProvider(propertiesMap)) + propertiesMap.asScala.keys.foreach { key => + configReader.get(key).foreach(sparkConf.set(key, _)) + } + sparkConf + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala new file mode 100644 index 0000000000000..b8c2b0c91bbeb --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.scheduler.cluster.kubernetes + +import java.io.File + +import com.google.common.base.Charsets +import com.google.common.io.Files +import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient} + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ + +private[spark] class DriverPodKubernetesClientProvider(sparkConf: SparkConf, namespace: String) { + private val SERVICE_ACCOUNT_TOKEN = new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH) + private val SERVICE_ACCOUNT_CA_CERT = new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH) + private val oauthTokenFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN) + private val caCertFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE) + private val clientKeyFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE) + private val clientCertFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE) + + /** + * Creates a {@link KubernetesClient}, expecting to be from within the context of a pod. When + * doing so, service account token files can be picked up from canonical locations. + */ + def get: DefaultKubernetesClient = { + val baseClientConfigBuilder = new ConfigBuilder() + .withApiVersion("v1") + .withMasterUrl(KUBERNETES_MASTER_INTERNAL_URL) + .withNamespace(namespace) + + val configBuilder = oauthTokenFile + .orElse(caCertFile) + .orElse(clientKeyFile) + .orElse(clientCertFile) + .map { _ => + var mountedAuthConfigBuilder = baseClientConfigBuilder + oauthTokenFile.foreach { tokenFilePath => + val tokenFile = new File(tokenFilePath) + mountedAuthConfigBuilder = mountedAuthConfigBuilder + .withOauthToken(Files.toString(tokenFile, Charsets.UTF_8)) + } + caCertFile.foreach { caFile => + mountedAuthConfigBuilder = mountedAuthConfigBuilder.withCaCertFile(caFile) + } + clientKeyFile.foreach { keyFile => + mountedAuthConfigBuilder = mountedAuthConfigBuilder.withClientKeyFile(keyFile) + } + clientCertFile.foreach { certFile => + mountedAuthConfigBuilder = mountedAuthConfigBuilder.withClientCertFile(certFile) + } + mountedAuthConfigBuilder + }.getOrElse { + var serviceAccountConfigBuilder = baseClientConfigBuilder + if (SERVICE_ACCOUNT_CA_CERT.isFile) { + serviceAccountConfigBuilder = serviceAccountConfigBuilder.withCaCertFile( + SERVICE_ACCOUNT_CA_CERT.getAbsolutePath) + } + + if (SERVICE_ACCOUNT_TOKEN.isFile) { + serviceAccountConfigBuilder = serviceAccountConfigBuilder.withOauthToken( + Files.toString(SERVICE_ACCOUNT_TOKEN, Charsets.UTF_8)) + } + serviceAccountConfigBuilder + } + new DefaultKubernetesClient(configBuilder.build) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 130b143c7e92b..15457db7e1459 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -47,11 +47,6 @@ private[spark] class KubernetesClusterSchedulerBackend( private val blockmanagerPort = conf .getInt("spark.blockmanager.port", DEFAULT_BLOCKMANAGER_PORT) - private val kubernetesDriverServiceName = conf - .get(KUBERNETES_DRIVER_SERVICE_NAME) - .getOrElse( - throw new SparkException("Must specify the service name the driver is running with")) - private val kubernetesDriverPodName = conf .get(KUBERNETES_DRIVER_POD_NAME) .getOrElse( @@ -73,8 +68,8 @@ private[spark] class KubernetesClusterSchedulerBackend( private implicit val requestExecutorContext = ExecutionContext.fromExecutorService( ThreadUtils.newDaemonCachedThreadPool("kubernetes-executor-requests")) - private val kubernetesClient = new KubernetesClientBuilder(conf, kubernetesNamespace) - .buildFromWithinPod() + private val kubernetesClient = new DriverPodKubernetesClientProvider(conf, kubernetesNamespace) + .get private val driverPod = try { kubernetesClient.pods().inNamespace(kubernetesNamespace). @@ -142,11 +137,6 @@ private[spark] class KubernetesClusterSchedulerBackend( } catch { case e: Throwable => logError("Uncaught exception while shutting down controllers.", e) } - try { - kubernetesClient.services().withName(kubernetesDriverServiceName).delete() - } catch { - case e: Throwable => logError("Uncaught exception while shutting down driver service.", e) - } try { kubernetesClient.close() } catch { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala new file mode 100644 index 0000000000000..9e2ab26460412 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala @@ -0,0 +1,328 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.File + +import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, Container, DoneablePod, HasMetadata, Pod, PodBuilder, PodList, Secret, SecretBuilder} +import io.fabric8.kubernetes.client.KubernetesClient +import io.fabric8.kubernetes.client.dsl.{MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource} +import org.hamcrest.{BaseMatcher, Description} +import org.mockito.Matchers.{any, anyVararg, argThat, startsWith, eq => mockitoEq} +import org.mockito.Mockito.when +import org.mockito.invocation.InvocationOnMock +import org.mockito.stubbing.Answer +import org.scalatest.BeforeAndAfter +import org.scalatest.mock.MockitoSugar._ +import scala.collection.JavaConverters._ +import scala.reflect.ClassTag + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.rest.kubernetes.v2.StagedResourceIdentifier +import org.apache.spark.util.Utils + +class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { + + private val MAIN_CLASS = "org.apache.spark.test.Main" + private val APP_ARGS = Array[String]("arg1", "arg2") + private val MAIN_APP_RESOURCE = "local:///app/jars/spark-main.jar" + private val APP_NAME = "spark-test-app" + private val STAGING_SERVER_URI = "http://localhost:9000" + private val SPARK_JARS = Seq( + "local:///app/jars/spark-helper.jar", "file:///var/data/spark-local-helper.jar") + private val RESOLVED_SPARK_JARS = Seq( + "local:///app/jars/spark-helper.jar", + "file:///var/data/spark-downloaded/spark-local-helper.jar") + private val SPARK_FILES = Seq( + "local:///app/files/spark-file.txt", "file:///var/data/spark-local-file.txt") + private val RESOLVED_SPARK_FILES = Seq( + "local:///app/files/spark-file.txt", "file:///var/data/spark-downloaded/spark-local-file.txt") + private val DRIVER_EXTRA_CLASSPATH = "/app/jars/extra-jar1.jar:/app/jars/extra-jars2.jar" + private val DRIVER_DOCKER_IMAGE_VALUE = "spark-driver:latest" + private val DRIVER_MEMORY_OVERHEARD_MB = 128L + private val DRIVER_MEMORY_MB = 512L + private val NAMESPACE = "namespace" + private val DOWNLOAD_JARS_RESOURCE_IDENTIFIER = StagedResourceIdentifier("jarsId", "jarsSecret") + private val DOWNLOAD_FILES_RESOURCE_IDENTIFIER = StagedResourceIdentifier( + "filesId", "filesSecret") + private val MOUNTED_FILES_ANNOTATION_KEY = "mountedFiles" + + private var sparkConf: SparkConf = _ + private var submissionKubernetesClientProvider: SubmissionKubernetesClientProvider = _ + private var submissionKubernetesClient: KubernetesClient = _ + private type PODS = MixedOperation[Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]] + private type RESOURCES = NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable[ + HasMetadata, Boolean] + private var podOperations: PODS = _ + private var resourceListOperations: RESOURCES = _ + private var mountedDependencyManagerProvider: MountedDependencyManagerProvider = _ + private var mountedDependencyManager: MountedDependencyManager = _ + private var captureCreatedPodAnswer: SelfArgumentCapturingAnswer[Pod] = _ + private var captureCreatedResourcesAnswer: AllArgumentsCapturingAnswer[HasMetadata, RESOURCES] = _ + + before { + sparkConf = new SparkConf(true) + .set("spark.app.name", APP_NAME) + .set("spark.master", "k8s://https://localhost:443") + .set(DRIVER_DOCKER_IMAGE, DRIVER_DOCKER_IMAGE_VALUE) + .set(KUBERNETES_DRIVER_MEMORY_OVERHEAD, DRIVER_MEMORY_OVERHEARD_MB) + .set(KUBERNETES_NAMESPACE, NAMESPACE) + .set(org.apache.spark.internal.config.DRIVER_MEMORY, DRIVER_MEMORY_MB) + submissionKubernetesClientProvider = mock[SubmissionKubernetesClientProvider] + submissionKubernetesClient = mock[KubernetesClient] + podOperations = mock[PODS] + resourceListOperations = mock[RESOURCES] + mountedDependencyManagerProvider = mock[MountedDependencyManagerProvider] + mountedDependencyManager = mock[MountedDependencyManager] + when(submissionKubernetesClientProvider.get).thenReturn(submissionKubernetesClient) + when(submissionKubernetesClient.pods()).thenReturn(podOperations) + captureCreatedPodAnswer = new SelfArgumentCapturingAnswer[Pod] + captureCreatedResourcesAnswer = new AllArgumentsCapturingAnswer[HasMetadata, RESOURCES]( + resourceListOperations) + when(podOperations.create(any())).thenAnswer(captureCreatedPodAnswer) + when(submissionKubernetesClient.resourceList(anyVararg[HasMetadata])) + .thenAnswer(captureCreatedResourcesAnswer) + } + + // Tests w/o local dependencies, or behave independently to that configuration. + test("Simple properties and environment set on the driver pod.") { + sparkConf.set(org.apache.spark.internal.config.DRIVER_CLASS_PATH, DRIVER_EXTRA_CLASSPATH) + val createdDriverPod = createAndGetDriverPod() + val maybeDriverContainer = getDriverContainer(createdDriverPod) + maybeDriverContainer.foreach { driverContainer => + assert(driverContainer.getName === DRIVER_CONTAINER_NAME) + assert(driverContainer.getImage === DRIVER_DOCKER_IMAGE_VALUE) + assert(driverContainer.getImagePullPolicy === "IfNotPresent") + val envs = driverContainer.getEnv.asScala.map { env => + (env.getName, env.getValue) + }.toMap + assert(envs(ENV_DRIVER_MEMORY) === s"${DRIVER_MEMORY_MB + DRIVER_MEMORY_OVERHEARD_MB}m") + assert(envs(ENV_DRIVER_MAIN_CLASS) === MAIN_CLASS) + assert(envs(ENV_DRIVER_ARGS) === APP_ARGS.mkString(" ")) + assert(envs(ENV_SUBMIT_EXTRA_CLASSPATH) === DRIVER_EXTRA_CLASSPATH) + } + } + + test("Created pod should apply custom annotations and labels") { + sparkConf.set(KUBERNETES_DRIVER_LABELS, + "label1=label1value,label2=label2value") + sparkConf.set(KUBERNETES_DRIVER_ANNOTATIONS, + "annotation1=annotation1value,annotation2=annotation2value") + val createdDriverPod = createAndGetDriverPod() + val labels = createdDriverPod.getMetadata.getLabels.asScala + assert(labels.size === 4) + // App ID is non-deterministic, but just check if it's set and is prefixed with the app name + val appIdLabel = labels(SPARK_APP_ID_LABEL) + assert(appIdLabel != null && appIdLabel.startsWith(APP_NAME) && appIdLabel != APP_NAME) + val appNameLabel = labels(SPARK_APP_NAME_LABEL) + assert(appNameLabel != null && appNameLabel == APP_NAME) + assert(labels("label1") === "label1value") + assert(labels("label2") === "label2value") + val annotations = createdDriverPod.getMetadata.getAnnotations.asScala + val expectedAnnotations = Map( + "annotation1" -> "annotation1value", "annotation2" -> "annotation2value") + assert(annotations === expectedAnnotations) + } + + test("Driver JVM Options should be set in the environment.") { + sparkConf.set(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS, "-Dopt1=opt1value") + sparkConf.set("spark.logConf", "true") + val createdDriverPod = createAndGetDriverPod() + val maybeDriverContainer = getDriverContainer(createdDriverPod) + maybeDriverContainer.foreach { driverContainer => + val maybeJvmOptionsEnv = driverContainer.getEnv + .asScala + .find(_.getName == ENV_DRIVER_JAVA_OPTS) + assert(maybeJvmOptionsEnv.isDefined) + maybeJvmOptionsEnv.foreach { jvmOptionsEnv => + val jvmOptions = jvmOptionsEnv.getValue.split(" ") + jvmOptions.foreach { opt => assert(opt.startsWith("-D")) } + val optionKeyValues = jvmOptions.map { option => + val withoutDashDPrefix = option.stripPrefix("-D") + val split = withoutDashDPrefix.split('=') + assert(split.length == 2) + (split(0), split(1)) + }.toMap + assert(optionKeyValues("opt1") === "opt1value") + assert(optionKeyValues.contains("spark.app.id")) + assert(optionKeyValues("spark.jars") === MAIN_APP_RESOURCE) + assert(optionKeyValues(KUBERNETES_DRIVER_POD_NAME.key).startsWith(APP_NAME)) + assert(optionKeyValues("spark.app.name") === APP_NAME) + assert(optionKeyValues("spark.logConf") === "true") + } + } + } + + // Tests with local dependencies with the mounted dependency manager. + test("Uploading local dependencies should create Kubernetes secrets and config map") { + val initContainerConfigMap = getInitContainerConfigMap() + val initContainerSecret = getInitContainerSecret() + runWithMountedDependencies(initContainerConfigMap, initContainerSecret) + val driverPod = captureCreatedPodAnswer.capturedArgument + assert(captureCreatedResourcesAnswer.capturedArguments != null) + assert(captureCreatedResourcesAnswer.capturedArguments.size === 2) + assert(captureCreatedResourcesAnswer.capturedArguments.toSet === + Set(initContainerSecret, initContainerConfigMap)) + captureCreatedResourcesAnswer.capturedArguments.foreach { resource => + val driverPodOwnerReferences = resource.getMetadata.getOwnerReferences + assert(driverPodOwnerReferences.size === 1) + val driverPodOwnerReference = driverPodOwnerReferences.asScala.head + assert(driverPodOwnerReference.getName === driverPod.getMetadata.getName) + assert(driverPodOwnerReference.getApiVersion === driverPod.getApiVersion) + assert(driverPodOwnerReference.getUid === driverPod.getMetadata.getUid) + assert(driverPodOwnerReference.getKind === driverPod.getKind) + assert(driverPodOwnerReference.getController) + } + } + + test("Uploading local resources should set classpath environment variables") { + val initContainerConfigMap = getInitContainerConfigMap() + val initContainerSecret = getInitContainerSecret() + runWithMountedDependencies(initContainerConfigMap, initContainerSecret) + val driverPod = captureCreatedPodAnswer.capturedArgument + val maybeDriverContainer = getDriverContainer(driverPod) + maybeDriverContainer.foreach { driverContainer => + val envs = driverContainer.getEnv + .asScala + .map { env => (env.getName, env.getValue) } + .toMap + val classPathEntries = envs(ENV_MOUNTED_CLASSPATH).split(File.pathSeparator).toSet + val expectedClassPathEntries = RESOLVED_SPARK_JARS + .map(Utils.resolveURI) + .map(_.getPath) + .toSet + assert(classPathEntries === expectedClassPathEntries) + } + } + + private def getInitContainerSecret(): Secret = { + new SecretBuilder() + .withNewMetadata().withName(s"$APP_NAME-init-container-secret").endMetadata() + .addToData( + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY, DOWNLOAD_JARS_RESOURCE_IDENTIFIER.resourceSecret) + .addToData(INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY, + DOWNLOAD_FILES_RESOURCE_IDENTIFIER.resourceSecret) + .build() + } + + private def getInitContainerConfigMap(): ConfigMap = { + new ConfigMapBuilder() + .withNewMetadata().withName(s"$APP_NAME-init-container-conf").endMetadata() + .addToData("key", "configuration") + .build() + } + + private def runWithMountedDependencies( + initContainerConfigMap: ConfigMap, initContainerSecret: Secret): Unit = { + sparkConf.set(RESOURCE_STAGING_SERVER_URI, STAGING_SERVER_URI) + .setJars(SPARK_JARS) + .set("spark.files", SPARK_FILES.mkString(",")) + val labelsMatcher = new BaseMatcher[Map[String, String]] { + override def matches(maybeLabels: scala.Any) = { + maybeLabels match { + case labels: Map[String, String] => + labels(SPARK_APP_ID_LABEL).startsWith(APP_NAME) && + labels(SPARK_APP_NAME_LABEL) == APP_NAME + case _ => false + } + } + + override def describeTo(description: Description) = { + description.appendText("Checks if the labels contain the app ID and app name.") + } + } + when(mountedDependencyManagerProvider.getMountedDependencyManager( + startsWith(APP_NAME), + mockitoEq(STAGING_SERVER_URI), + argThat(labelsMatcher), + mockitoEq(NAMESPACE), + mockitoEq(SPARK_JARS ++ Seq(MAIN_APP_RESOURCE)), + mockitoEq(SPARK_FILES))).thenReturn(mountedDependencyManager) + when(mountedDependencyManager.uploadJars()).thenReturn(DOWNLOAD_JARS_RESOURCE_IDENTIFIER) + when(mountedDependencyManager.uploadFiles()).thenReturn(DOWNLOAD_FILES_RESOURCE_IDENTIFIER) + when(mountedDependencyManager.buildInitContainerSecret( + DOWNLOAD_JARS_RESOURCE_IDENTIFIER.resourceSecret, + DOWNLOAD_FILES_RESOURCE_IDENTIFIER.resourceSecret)) + .thenReturn(initContainerSecret) + when(mountedDependencyManager.buildInitContainerConfigMap( + DOWNLOAD_JARS_RESOURCE_IDENTIFIER.resourceId, DOWNLOAD_FILES_RESOURCE_IDENTIFIER.resourceId)) + .thenReturn(initContainerConfigMap) + when(mountedDependencyManager.resolveSparkJars()).thenReturn(RESOLVED_SPARK_JARS) + when(mountedDependencyManager.resolveSparkFiles()).thenReturn(RESOLVED_SPARK_FILES) + when(mountedDependencyManager.configurePodToMountLocalDependencies( + mockitoEq(DRIVER_CONTAINER_NAME), + mockitoEq(initContainerSecret), + mockitoEq(initContainerConfigMap), + any())).thenAnswer(new Answer[PodBuilder] { + override def answer(invocationOnMock: InvocationOnMock): PodBuilder = { + val basePod = invocationOnMock.getArgumentAt(3, classOf[PodBuilder]) + basePod.editMetadata().addToAnnotations(MOUNTED_FILES_ANNOTATION_KEY, "true").endMetadata() + } + }) + val clientUnderTest = createClient() + clientUnderTest.run() + } + + private def getDriverContainer(driverPod: Pod): Option[Container] = { + val maybeDriverContainer = driverPod.getSpec + .getContainers + .asScala + .find(_.getName == DRIVER_CONTAINER_NAME) + assert(maybeDriverContainer.isDefined) + maybeDriverContainer + } + + private def createAndGetDriverPod(): Pod = { + val clientUnderTest = createClient() + clientUnderTest.run() + val createdDriverPod = captureCreatedPodAnswer.capturedArgument + assert(createdDriverPod != null) + createdDriverPod + } + + private def createClient(): Client = { + new Client( + MAIN_CLASS, + sparkConf, + APP_ARGS, + MAIN_APP_RESOURCE, + submissionKubernetesClientProvider, + mountedDependencyManagerProvider) + } + + private class SelfArgumentCapturingAnswer[T: ClassTag] extends Answer[T] { + var capturedArgument: T = _ + + override def answer(invocationOnMock: InvocationOnMock): T = { + val argumentClass = implicitly[ClassTag[T]].runtimeClass.asInstanceOf[Class[T]] + val argument = invocationOnMock.getArgumentAt(0, argumentClass) + this.capturedArgument = argument + argument + } + } + + private class AllArgumentsCapturingAnswer[I, T](returnValue: T) extends Answer[T] { + var capturedArguments: Seq[I] = _ + + override def answer(invocationOnMock: InvocationOnMock): T = { + capturedArguments = invocationOnMock.getArguments.map(_.asInstanceOf[I]).toSeq + returnValue + } + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerSuite.scala new file mode 100644 index 0000000000000..321fe1b3fd889 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerSuite.scala @@ -0,0 +1,323 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.{ByteArrayOutputStream, File, StringReader} +import java.util.{Properties, UUID} + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.google.common.base.Charsets +import com.google.common.io.{BaseEncoding, Files} +import io.fabric8.kubernetes.api.model.{ConfigMapBuilder, Container, Pod, PodBuilder, SecretBuilder} +import okhttp3.RequestBody +import okio.Okio +import org.mockito.invocation.InvocationOnMock +import org.mockito.stubbing.Answer +import org.mockito.Matchers.any +import org.mockito.Mockito +import org.scalatest.BeforeAndAfter +import org.scalatest.mock.MockitoSugar._ +import retrofit2.{Call, Response} +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkFunSuite, SSLOptions} +import org.apache.spark.deploy.kubernetes.CompressionUtils +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.rest.kubernetes.v2.{ResourceStagingServiceRetrofit, RetrofitClientFactory, StagedResourceIdentifier} +import org.apache.spark.util.Utils + +private[spark] class MountedDependencyManagerSuite extends SparkFunSuite with BeforeAndAfter { + import MountedDependencyManagerSuite.createTempFile + + private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + private val APP_ID = "app-id" + private val LABELS = Map("label1" -> "label1value", "label2" -> "label2value") + private val NAMESPACE = "namespace" + private val STAGING_SERVER_URI = "http://localhost:8000" + private val INIT_CONTAINER_IMAGE = "spark-driver-init:latest" + private val JARS_DOWNLOAD_PATH = DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION.defaultValue.get + private val FILES_DOWNLOAD_PATH = DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION.defaultValue.get + private val DOWNLOAD_TIMEOUT_MINUTES = 5 + private val LOCAL_JARS = Seq(createTempFile("jar"), createTempFile("jar")) + private val JARS = Seq("hdfs://localhost:9000/jars/jar1.jar", + s"file://${LOCAL_JARS.head}", + LOCAL_JARS(1)) + private val LOCAL_FILES = Seq(createTempFile("txt")) + private val FILES = Seq("hdfs://localhost:9000/files/file1.txt", + LOCAL_FILES.head) + private val TRUSTSTORE_FILE = new File(createTempFile(".jks")) + private val TRUSTSTORE_PASSWORD = "trustStorePassword" + private val TRUSTSTORE_TYPE = "jks" + private val STAGING_SERVER_SSL_OPTIONS = SSLOptions( + enabled = true, + trustStore = Some(TRUSTSTORE_FILE), + trustStorePassword = Some(TRUSTSTORE_PASSWORD), + trustStoreType = Some(TRUSTSTORE_TYPE)) + private val JARS_RESOURCE_ID = "jarsId" + private val JARS_SECRET = "jarsSecret" + private val FILES_RESOURCE_ID = "filesId" + private val FILES_SECRET = "filesSecret" + private var retrofitClientFactory: RetrofitClientFactory = _ + private var retrofitClient: ResourceStagingServiceRetrofit = _ + + private var dependencyManagerUnderTest: MountedDependencyManager = _ + + before { + retrofitClientFactory = mock[RetrofitClientFactory] + retrofitClient = mock[ResourceStagingServiceRetrofit] + Mockito.when( + retrofitClientFactory.createRetrofitClient( + STAGING_SERVER_URI, classOf[ResourceStagingServiceRetrofit], STAGING_SERVER_SSL_OPTIONS)) + .thenReturn(retrofitClient) + dependencyManagerUnderTest = new MountedDependencyManagerImpl( + APP_ID, + LABELS, + NAMESPACE, + STAGING_SERVER_URI, + INIT_CONTAINER_IMAGE, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH, + DOWNLOAD_TIMEOUT_MINUTES, + JARS, + FILES, + STAGING_SERVER_SSL_OPTIONS, + retrofitClientFactory) + } + + test("Uploading jars should contact the staging server with the appropriate parameters") { + val capturingArgumentsAnswer = new UploadDependenciesArgumentsCapturingAnswer( + StagedResourceIdentifier("resourceId", "resourceSecret")) + Mockito.when(retrofitClient.uploadResources(any(), any(), any(), any())) + .thenAnswer(capturingArgumentsAnswer) + dependencyManagerUnderTest.uploadJars() + testUploadSendsCorrectFiles(LOCAL_JARS, capturingArgumentsAnswer) + } + + test("Uploading files should contact the staging server with the appropriate parameters") { + val capturingArgumentsAnswer = new UploadDependenciesArgumentsCapturingAnswer( + StagedResourceIdentifier("resourceId", "resourceSecret")) + Mockito.when(retrofitClient.uploadResources(any(), any(), any(), any())) + .thenAnswer(capturingArgumentsAnswer) + dependencyManagerUnderTest.uploadFiles() + testUploadSendsCorrectFiles(LOCAL_FILES, capturingArgumentsAnswer) + } + + test("Init container secret should contain jars, files, and trustStore") { + val jarsSecretBase64 = BaseEncoding.base64().encode(JARS_SECRET.getBytes(Charsets.UTF_8)) + val filesSecretBase64 = BaseEncoding.base64().encode(FILES_SECRET.getBytes(Charsets.UTF_8)) + val trustStoreBase64 = BaseEncoding.base64().encode(Files.toByteArray(TRUSTSTORE_FILE)) + val secret = dependencyManagerUnderTest.buildInitContainerSecret("jarsSecret", "filesSecret") + assert(secret.getMetadata.getName === s"$APP_ID-spark-init") + val expectedSecrets = Map( + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY -> jarsSecretBase64, + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY -> filesSecretBase64, + INIT_CONTAINER_TRUSTSTORE_SECRET_KEY -> trustStoreBase64) + assert(secret.getData.asScala === expectedSecrets) + } + + test("Init container config map should contain parameters for downloading from staging server") { + val configMap = dependencyManagerUnderTest.buildInitContainerConfigMap( + JARS_RESOURCE_ID, FILES_RESOURCE_ID) + assert(configMap.getMetadata.getName === s"$APP_ID-init-properties") + val propertiesRawString = configMap.getData.get(INIT_CONTAINER_CONFIG_MAP_KEY) + assert(propertiesRawString != null) + val propertiesReader = new StringReader(propertiesRawString) + val properties = new Properties() + properties.load(propertiesReader) + val propertiesMap = properties.stringPropertyNames().asScala.map { prop => + (prop, properties.getProperty(prop)) + }.toMap + val expectedProperties = Map[String, String]( + RESOURCE_STAGING_SERVER_URI.key -> STAGING_SERVER_URI, + DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION.key -> JARS_DOWNLOAD_PATH, + DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION.key -> FILES_DOWNLOAD_PATH, + INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> JARS_RESOURCE_ID, + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_PATH, + INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> FILES_RESOURCE_ID, + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_PATH, + DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT.key -> s"${DOWNLOAD_TIMEOUT_MINUTES}m", + RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key -> INIT_CONTAINER_TRUSTSTORE_PATH, + RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> "true", + RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key -> TRUSTSTORE_PASSWORD, + RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key -> TRUSTSTORE_TYPE) + assert(propertiesMap === expectedProperties) + } + + test("Resolving jars should map local paths to their mounted counterparts") { + val resolvedJars = dependencyManagerUnderTest.resolveSparkJars() + val expectedResolvedJars = Seq( + "hdfs://localhost:9000/jars/jar1.jar", + s"file://$JARS_DOWNLOAD_PATH/${new File(JARS(1)).getName}", + s"file://$JARS_DOWNLOAD_PATH/${new File(JARS(2)).getName}") + assert(resolvedJars === expectedResolvedJars) + } + + test("Resolving files should map local paths to their mounted counterparts") { + val resolvedFiles = dependencyManagerUnderTest.resolveSparkFiles() + val expectedResolvedFiles = Seq( + "hdfs://localhost:9000/files/file1.txt", + s"file://$FILES_DOWNLOAD_PATH/${new File(FILES(1)).getName}") + assert(resolvedFiles === expectedResolvedFiles) + } + + test("Downloading init container should be added to pod") { + val driverPod = configureDriverPod() + val podAnnotations = driverPod.getMetadata.getAnnotations + assert(podAnnotations.size === 1) + val initContainerRawAnnotation = podAnnotations.get(INIT_CONTAINER_ANNOTATION) + val initContainers = OBJECT_MAPPER.readValue( + initContainerRawAnnotation, classOf[Array[Container]]) + assert(initContainers.size === 1) + val initContainer = initContainers.head + assert(initContainer.getName === "spark-driver-init") + assert(initContainer.getImage === INIT_CONTAINER_IMAGE) + assert(initContainer.getImagePullPolicy === "IfNotPresent") + val volumeMounts = initContainer.getVolumeMounts + .asScala + .map(mount => (mount.getName, mount.getMountPath)) + .toMap + val expectedVolumeMounts = Map[String, String]( + DOWNLOAD_JARS_VOLUME_NAME -> JARS_DOWNLOAD_PATH, + DOWNLOAD_FILES_VOLUME_NAME -> FILES_DOWNLOAD_PATH, + INIT_CONTAINER_PROPERTIES_FILE_VOLUME -> INIT_CONTAINER_PROPERTIES_FILE_MOUNT_PATH, + INIT_CONTAINER_SECRETS_VOLUME_NAME -> INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH) + assert(volumeMounts === expectedVolumeMounts) + } + + test("Driver pod should have added volumes and volume mounts for file downloads") { + val driverPod = configureDriverPod() + val volumes = driverPod.getSpec.getVolumes.asScala.map(volume => (volume.getName, volume)).toMap + val initContainerPropertiesVolume = volumes(INIT_CONTAINER_PROPERTIES_FILE_VOLUME).getConfigMap + assert(initContainerPropertiesVolume != null) + assert(initContainerPropertiesVolume.getName === "config") + assert(initContainerPropertiesVolume.getItems.asScala.exists { keyToPath => + keyToPath.getKey == INIT_CONTAINER_CONFIG_MAP_KEY && + keyToPath.getPath == INIT_CONTAINER_PROPERTIES_FILE_NAME + }) + val jarsVolume = volumes(DOWNLOAD_JARS_VOLUME_NAME) + assert(jarsVolume.getEmptyDir != null) + val filesVolume = volumes(DOWNLOAD_FILES_VOLUME_NAME) + assert(filesVolume.getEmptyDir != null) + val initContainerSecretVolume = volumes(INIT_CONTAINER_SECRETS_VOLUME_NAME) + assert(initContainerSecretVolume.getSecret != null) + assert(initContainerSecretVolume.getSecret.getSecretName === "secret") + val driverContainer = driverPod.getSpec + .getContainers + .asScala + .find(_.getName == "driver-container").get + val driverContainerVolumeMounts = driverContainer.getVolumeMounts + .asScala + .map(mount => (mount.getName, mount.getMountPath)) + .toMap + val expectedVolumeMountNamesAndPaths = Map[String, String]( + DOWNLOAD_JARS_VOLUME_NAME -> JARS_DOWNLOAD_PATH, + DOWNLOAD_FILES_VOLUME_NAME -> FILES_DOWNLOAD_PATH) + assert(driverContainerVolumeMounts === expectedVolumeMountNamesAndPaths) + val envs = driverContainer.getEnv + assert(envs.size() === 1) + assert(envs.asScala.head.getName === ENV_UPLOADED_JARS_DIR) + assert(envs.asScala.head.getValue === JARS_DOWNLOAD_PATH) + } + + private def configureDriverPod(): Pod = { + val initContainerSecret = new SecretBuilder() + .withNewMetadata().withName("secret").endMetadata() + .addToData("datakey", "datavalue") + .build() + val initContainerConfigMap = new ConfigMapBuilder() + .withNewMetadata().withName("config").endMetadata() + .addToData("datakey", "datavalue") + .build() + val basePod = new PodBuilder() + .withNewMetadata() + .withName("driver-pod") + .endMetadata() + .withNewSpec() + .addNewContainer() + .withName("driver-container") + .withImage("spark-driver:latest") + .endContainer() + .endSpec() + val adjustedPod = dependencyManagerUnderTest.configurePodToMountLocalDependencies( + "driver-container", + initContainerSecret, + initContainerConfigMap, + basePod).build() + adjustedPod + } + + private def testUploadSendsCorrectFiles( + expectedFiles: Seq[String], + capturingArgumentsAnswer: UploadDependenciesArgumentsCapturingAnswer) = { + val requestLabelsBytes = requestBodyBytes(capturingArgumentsAnswer.podLabelsArg) + val requestLabelsString = new String(requestLabelsBytes, Charsets.UTF_8) + val requestLabelsMap = OBJECT_MAPPER.readValue( + requestLabelsString, classOf[Map[String, String]]) + assert(requestLabelsMap === LABELS) + val requestNamespaceBytes = requestBodyBytes(capturingArgumentsAnswer.podNamespaceArg) + val requestNamespaceString = new String(requestNamespaceBytes, Charsets.UTF_8) + assert(requestNamespaceString === NAMESPACE) + val localJarsTarStream = new ByteArrayOutputStream() + CompressionUtils.writeTarGzipToStream(localJarsTarStream, expectedFiles) + val requestResourceBytes = requestBodyBytes(capturingArgumentsAnswer.podResourcesArg) + assert(requestResourceBytes.sameElements(localJarsTarStream.toByteArray)) + } + + private def requestBodyBytes(requestBody: RequestBody): Array[Byte] = { + Utils.tryWithResource(new ByteArrayOutputStream()) { outputStream => + Utils.tryWithResource(Okio.sink(outputStream)) { sink => + Utils.tryWithResource(Okio.buffer(sink)) { bufferedSink => + requestBody.writeTo(bufferedSink) + } + } + outputStream.toByteArray + } + } +} + +private class UploadDependenciesArgumentsCapturingAnswer(returnValue: StagedResourceIdentifier) + extends Answer[Call[StagedResourceIdentifier]] { + + var podLabelsArg: RequestBody = _ + var podNamespaceArg: RequestBody = _ + var podResourcesArg: RequestBody = _ + var kubernetesCredentialsArg: RequestBody = _ + + override def answer(invocationOnMock: InvocationOnMock): Call[StagedResourceIdentifier] = { + podLabelsArg = invocationOnMock.getArgumentAt(0, classOf[RequestBody]) + podNamespaceArg = invocationOnMock.getArgumentAt(1, classOf[RequestBody]) + podResourcesArg = invocationOnMock.getArgumentAt(2, classOf[RequestBody]) + kubernetesCredentialsArg = invocationOnMock.getArgumentAt(3, classOf[RequestBody]) + val responseCall = mock[Call[StagedResourceIdentifier]] + Mockito.when(responseCall.execute()).thenReturn(Response.success(returnValue)) + responseCall + } +} + +private object MountedDependencyManagerSuite { + def createTempFile(extension: String): String = { + val dir = Utils.createTempDir() + val file = new File(dir, s"${UUID.randomUUID().toString}.$extension") + Files.write(UUID.randomUUID().toString, file, Charsets.UTF_8) + file.getAbsolutePath + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala new file mode 100644 index 0000000000000..77eb7f2b9f49c --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import java.io.{ByteArrayOutputStream, File} +import java.util.UUID +import javax.ws.rs.core + +import com.google.common.base.Charsets +import com.google.common.io.Files +import okhttp3.{MediaType, ResponseBody} +import org.mockito.Matchers.any +import org.mockito.Mockito.{doAnswer, when} +import org.mockito.invocation.InvocationOnMock +import org.mockito.stubbing.Answer +import org.scalatest.BeforeAndAfter +import org.scalatest.mock.MockitoSugar._ +import retrofit2.{Call, Callback, Response} + +import org.apache.spark.{SparkConf, SparkFunSuite, SSLOptions} +import org.apache.spark.deploy.kubernetes.CompressionUtils +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.util.Utils + +class KubernetesSparkDependencyDownloadInitContainerSuite + extends SparkFunSuite with BeforeAndAfter { + import KubernetesSparkDependencyDownloadInitContainerSuite.createTempFile + private val STAGING_SERVER_URI = "http://localhost:8000" + private val TRUSTSTORE_FILE = new File(createTempFile(".jks")) + private val TRUSTSTORE_PASSWORD = "trustStorePassword" + private val TRUSTSTORE_TYPE = "jks" + private val STAGING_SERVER_SSL_OPTIONS = SSLOptions( + enabled = true, + trustStore = Some(TRUSTSTORE_FILE), + trustStorePassword = Some(TRUSTSTORE_PASSWORD), + trustStoreType = Some(TRUSTSTORE_TYPE)) + private val JARS = Seq(createTempFile("jar"), createTempFile("jar")) + private val FILES = Seq(createTempFile("txt"), createTempFile("csv")) + private val DOWNLOAD_JARS_SECRET_LOCATION = createTempFile("txt") + private val DOWNLOAD_FILES_SECRET_LOCATION = createTempFile("txt") + private val JARS_RESOURCE_ID = "jarsId" + private val FILES_RESOURCE_ID = "filesId" + + private var sparkConf: SparkConf = _ + private var downloadJarsDir: File = _ + private var downloadFilesDir: File = _ + private var downloadJarsSecretValue: String = _ + private var downloadFilesSecretValue: String = _ + private var jarsCompressedBytes: Array[Byte] = _ + private var filesCompressedBytes: Array[Byte] = _ + private var retrofitClientFactory: RetrofitClientFactory = _ + private var retrofitClient: ResourceStagingServiceRetrofit = _ + private var initContainerUnderTest: KubernetesSparkDependencyDownloadInitContainer = _ + + override def beforeAll(): Unit = { + jarsCompressedBytes = compressPathsToBytes(JARS) + filesCompressedBytes = compressPathsToBytes(FILES) + downloadJarsSecretValue = Files.toString( + new File(DOWNLOAD_JARS_SECRET_LOCATION), Charsets.UTF_8) + downloadFilesSecretValue = Files.toString( + new File(DOWNLOAD_FILES_SECRET_LOCATION), Charsets.UTF_8) + } + + before { + downloadJarsDir = Utils.createTempDir() + downloadFilesDir = Utils.createTempDir() + retrofitClientFactory = mock[RetrofitClientFactory] + retrofitClient = mock[ResourceStagingServiceRetrofit] + sparkConf = new SparkConf(true) + .set(RESOURCE_STAGING_SERVER_URI, STAGING_SERVER_URI) + .set(INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER, JARS_RESOURCE_ID) + .set(INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION, DOWNLOAD_JARS_SECRET_LOCATION) + .set(INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER, FILES_RESOURCE_ID) + .set(INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION, DOWNLOAD_FILES_SECRET_LOCATION) + .set(DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION, downloadJarsDir.getAbsolutePath) + .set(DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION, downloadFilesDir.getAbsolutePath) + .set(RESOURCE_STAGING_SERVER_SSL_ENABLED, true) + .set(RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE, TRUSTSTORE_FILE.getAbsolutePath) + .set(RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD, TRUSTSTORE_PASSWORD) + .set(RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE, TRUSTSTORE_TYPE) + + when(retrofitClientFactory.createRetrofitClient( + STAGING_SERVER_URI, classOf[ResourceStagingServiceRetrofit], STAGING_SERVER_SSL_OPTIONS)) + .thenReturn(retrofitClient) + initContainerUnderTest = new KubernetesSparkDependencyDownloadInitContainer( + sparkConf, retrofitClientFactory) + } + + after { + downloadJarsDir.delete() + downloadFilesDir.delete() + } + + test("Downloads should unpack response body streams to directories") { + val downloadJarsCall = mock[Call[ResponseBody]] + val downloadFilesCall = mock[Call[ResponseBody]] + when(retrofitClient.downloadResources(JARS_RESOURCE_ID, downloadJarsSecretValue)) + .thenReturn(downloadJarsCall) + when(retrofitClient.downloadResources(FILES_RESOURCE_ID, downloadFilesSecretValue)) + .thenReturn(downloadFilesCall) + val jarsResponseBody = ResponseBody.create( + MediaType.parse(core.MediaType.APPLICATION_OCTET_STREAM), jarsCompressedBytes) + val filesResponseBody = ResponseBody.create( + MediaType.parse(core.MediaType.APPLICATION_OCTET_STREAM), filesCompressedBytes) + doAnswer(new InvokeCallbackAnswer(downloadJarsCall, jarsResponseBody)) + .when(downloadJarsCall) + .enqueue(any()) + doAnswer(new InvokeCallbackAnswer(downloadFilesCall, filesResponseBody)) + .when(downloadFilesCall) + .enqueue(any()) + initContainerUnderTest.run() + checkWrittenFilesAreTheSameAsOriginal(JARS, downloadJarsDir) + checkWrittenFilesAreTheSameAsOriginal(FILES, downloadFilesDir) + } + + private def checkWrittenFilesAreTheSameAsOriginal( + originalFiles: Iterable[String], downloadDir: File): Unit = { + originalFiles.map(new File(_)).foreach { file => + val writtenFile = new File(downloadDir, file.getName) + assert(writtenFile.exists) + val originalJarContents = Seq(Files.toByteArray(file): _*) + val writtenJarContents = Seq(Files.toByteArray(writtenFile): _*) + assert(writtenJarContents === originalJarContents) + } + } + + private def compressPathsToBytes(paths: Iterable[String]): Array[Byte] = { + Utils.tryWithResource(new ByteArrayOutputStream()) { compressedBytes => + CompressionUtils.writeTarGzipToStream (compressedBytes, paths) + compressedBytes.toByteArray + } + } +} + +private object KubernetesSparkDependencyDownloadInitContainerSuite { + def createTempFile(extension: String): String = { + val dir = Utils.createTempDir() + val file = new File(dir, s"${UUID.randomUUID().toString}.$extension") + Files.write(UUID.randomUUID().toString, file, Charsets.UTF_8) + file.getAbsolutePath + } +} + +private class InvokeCallbackAnswer(call: Call[ResponseBody], responseBody: ResponseBody) + extends Answer[Unit] { + override def answer(invocationOnMock: InvocationOnMock): Unit = { + val callback = invocationOnMock.getArgumentAt(0, classOf[Callback[ResponseBody]]) + val response = Response.success(responseBody) + callback.onResponse(call, response) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala index 51c5e43af1124..08be8af30b3bc 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala @@ -77,7 +77,7 @@ class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfter { private def runUploadAndDownload(sslOptions: SSLOptions): Unit = { val scheme = if (sslOptions.enabled) "https" else "http" - val retrofitService = RetrofitUtils.createRetrofitClient( + val retrofitService = RetrofitClientFactoryImpl.createRetrofitClient( s"$scheme://127.0.0.1:$serverPort/", classOf[ResourceStagingServiceRetrofit], sslOptions) diff --git a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml index e9f88e37a5f89..a10fe8fb58408 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml @@ -72,27 +72,14 @@ maven-assembly-plugin - driver-docker-dist + docker-dist pre-integration-test single - src/main/assembly/driver-assembly.xml - - posix - - - - executor-docker-dist - pre-integration-test - - single - - - - src/main/assembly/executor-assembly.xml + src/main/assembly/docker-assembly.xml posix diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml b/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/docker-assembly.xml similarity index 95% rename from resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml rename to resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/docker-assembly.xml index b5fcaa75f049c..2b48d366256fe 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/docker-assembly.xml @@ -15,7 +15,7 @@ ~ limitations under the License. --> - driver-docker-dist + docker-dist tar.gz dir @@ -51,9 +51,9 @@ - src/main/docker/driver + src/main/docker/ - + dockerfiles **/* diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml b/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml deleted file mode 100644 index d97ba56562a12..0000000000000 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml +++ /dev/null @@ -1,84 +0,0 @@ - - - executor-docker-dist - - tar.gz - dir - - false - - - - ${project.parent.basedir}/core/src/main/resources/org/apache/spark/ui/static/ - - ui-resources/org/apache/spark/ui/static - - **/* - - - - - ${project.parent.basedir}/sbin/ - - sbin - - **/* - - - - - ${project.parent.basedir}/bin/ - - bin - - **/* - - - - - ${project.parent.basedir}/conf/ - - conf - - **/* - - - - - src/main/docker/executor - - - - **/* - - - - - - jars - true - false - runtime - false - - org.apache.spark:spark-assembly_${scala.binary.version}:pom - org.spark-project.spark:unused - - - - diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-init/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-init/Dockerfile new file mode 100644 index 0000000000000..59029a6c08b4a --- /dev/null +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-init/Dockerfile @@ -0,0 +1,38 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM openjdk:8-alpine + +# If this docker file is being used in the context of building your images from a Spark distribution, the docker build +# command should be invoked from the top level directory of the Spark distribution. E.g.: +# docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . + +RUN apk upgrade --update +RUN apk add --update bash +RUN mkdir -p /opt/spark +RUN touch /opt/spark/RELEASE + +ADD jars /opt/spark/jars +ADD bin /opt/spark/bin +ADD sbin /opt/spark/sbin +ADD conf /opt/spark/conf + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark + +ENTRYPOINT [ "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.v2.KubernetesSparkDependencyDownloadInitContainer" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-v2/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-v2/Dockerfile new file mode 100644 index 0000000000000..40f9459dc06dc --- /dev/null +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-v2/Dockerfile @@ -0,0 +1,43 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM openjdk:8-alpine + +# If this docker file is being used in the context of building your images from a Spark distribution, the docker build +# command should be invoked from the top level directory of the Spark distribution. E.g.: +# docker build -t spark-driver:latest -f dockerfiles/driver/Dockerfile . + +RUN apk upgrade --update +RUN apk add --update bash +RUN mkdir -p /opt/spark +RUN touch /opt/spark/RELEASE + +ADD jars /opt/spark/jars +ADD examples /opt/spark/examples +ADD bin /opt/spark/bin +ADD sbin /opt/spark/sbin +ADD conf /opt/spark/conf + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark + +CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ + if ! [ -z ${SPARK_MOUNTED_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_SUBMIT_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_SUBMIT_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + exec ${JAVA_HOME}/bin/java $SPARK_DRIVER_JAVA_OPTS -cp $SPARK_CLASSPATH -Xms$SPARK_DRIVER_MEMORY -Xmx$SPARK_DRIVER_MEMORY $SPARK_DRIVER_CLASS $SPARK_DRIVER_ARGS diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile new file mode 100644 index 0000000000000..15e1ce75815df --- /dev/null +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile @@ -0,0 +1,38 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM openjdk:8-alpine + +# If this docker file is being used in the context of building your images from a Spark distribution, the docker build +# command should be invoked from the top level directory of the Spark distribution. E.g.: +# docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . + +RUN apk upgrade --update +RUN apk add --update bash +RUN mkdir -p /opt/spark +RUN touch /opt/spark/RELEASE + +ADD jars /opt/spark/jars +ADD bin /opt/spark/bin +ADD sbin /opt/spark/sbin +ADD conf /opt/spark/conf + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark + +ENTRYPOINT [ "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.v2.ResourceStagingServer" ] diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 5418afa25ca85..ac7a549c9b483 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -33,7 +33,11 @@ org.apache.spark spark-kubernetes_${scala.binary.version} ${project.version} - test + + + org.apache.spark + spark-core_${scala.binary.version} + ${project.version} org.apache.spark @@ -66,7 +70,7 @@ spark-docker-minimal-bundle_${scala.binary.version} ${project.version} tar.gz - driver-docker-dist + docker-dist test @@ -147,7 +151,7 @@ - copy-test-spark-jobs-to-docker-driver + copy-test-spark-jobs-to-docker-dist pre-integration-test copy @@ -159,65 +163,20 @@ spark-kubernetes-integration-tests-spark-jobs_${scala.binary.version} ${project.version} jar - ${project.build.directory}/docker/driver/examples/integration-tests-jars + ${project.build.directory}/docker/examples/integration-tests-jars org.apache.spark spark-kubernetes-integration-tests-spark-jobs-helpers_${scala.binary.version} ${project.version} jar - ${project.build.directory}/docker/driver/examples/integration-tests-jars - - - - - - copy-test-spark-jobs-to-docker-executor - pre-integration-test - - copy - - - - - org.apache.spark - spark-kubernetes-integration-tests-spark-jobs_${scala.binary.version} - ${project.version} - jar - ${project.build.directory}/docker/executor/examples/integration-tests-jars - - - org.apache.spark - spark-kubernetes-integration-tests-spark-jobs-helpers_${scala.binary.version} - ${project.version} - jar - ${project.build.directory}/docker/executor/examples/integration-tests-jars - - - - - - unpack-docker-driver-bundle - pre-integration-test - - unpack - - - - - org.apache.spark - spark-docker-minimal-bundle_${scala.binary.version} - ${project.version} - driver-docker-dist - tar.gz - true - ${project.build.directory}/docker/driver + ${project.build.directory}/docker/examples/integration-tests-jars - unpack-docker-executor-bundle + unpack-docker-bundle pre-integration-test unpack @@ -228,10 +187,10 @@ org.apache.spark spark-docker-minimal-bundle_${scala.binary.version} ${project.version} - executor-docker-dist + docker-dist tar.gz true - ${project.build.directory}/docker/executor + ${project.build.directory}/docker/ diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 750e7668b9912..abbf7e4d5ce1b 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -16,119 +16,23 @@ */ package org.apache.spark.deploy.kubernetes.integrationtest -import java.io.File import java.nio.file.Paths -import java.util.UUID -import java.util.concurrent.TimeUnit import com.google.common.base.Charsets -import com.google.common.collect.ImmutableList import com.google.common.io.Files -import com.google.common.util.concurrent.SettableFuture -import io.fabric8.kubernetes.api.model.Pod -import io.fabric8.kubernetes.client.{Config, KubernetesClient, KubernetesClientException, Watcher} -import io.fabric8.kubernetes.client.Watcher.Action -import org.scalatest.BeforeAndAfter -import org.scalatest.concurrent.{Eventually, PatienceConfiguration} +import org.scalatest.Suite +import org.scalatest.concurrent.PatienceConfiguration import org.scalatest.time.{Minutes, Seconds, Span} -import scala.collection.JavaConverters._ -import org.apache.spark.{SparkConf, SparkException, SparkFunSuite} -import org.apache.spark.deploy.SparkSubmit -import org.apache.spark.deploy.kubernetes.SSLUtils -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.SparkFunSuite import org.apache.spark.deploy.kubernetes.integrationtest.docker.SparkDockerImageBuilder import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube -import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 -import org.apache.spark.deploy.kubernetes.submit.v1.{Client, ExternalSuppliedUrisDriverServiceManager} -import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} -import org.apache.spark.util.Utils -private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { - - private val EXAMPLES_JAR_FILE = Paths.get("target", "integration-tests-spark-jobs") - .toFile - .listFiles()(0) - - private val HELPER_JAR_FILE = Paths.get("target", "integration-tests-spark-jobs-helpers") - .toFile - .listFiles()(0) - private val SUBMITTER_LOCAL_MAIN_APP_RESOURCE = s"file://${EXAMPLES_JAR_FILE.getAbsolutePath}" - private val CONTAINER_LOCAL_MAIN_APP_RESOURCE = s"local:///opt/spark/examples/" + - s"integration-tests-jars/${EXAMPLES_JAR_FILE.getName}" - private val CONTAINER_LOCAL_HELPER_JAR_PATH = s"local:///opt/spark/examples/" + - s"integration-tests-jars/${HELPER_JAR_FILE.getName}" - - private val TEST_EXISTENCE_FILE = Paths.get("test-data", "input.txt").toFile - private val TEST_EXISTENCE_FILE_CONTENTS = Files.toString(TEST_EXISTENCE_FILE, Charsets.UTF_8) - private val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) - private val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) - private val SPARK_PI_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + - ".integrationtest.jobs.SparkPiWithInfiniteWait" - private val FILE_EXISTENCE_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + - ".integrationtest.jobs.FileExistenceTest" - private val NAMESPACE = UUID.randomUUID().toString.replaceAll("-", "") - private var minikubeKubernetesClient: KubernetesClient = _ - private var clientConfig: Config = _ - private var sparkConf: SparkConf = _ +private[spark] class KubernetesSuite extends SparkFunSuite { override def beforeAll(): Unit = { Minikube.startMinikube() new SparkDockerImageBuilder(Minikube.getDockerEnv).buildSparkDockerImages() - Minikube.getKubernetesClient.namespaces.createNew() - .withNewMetadata() - .withName(NAMESPACE) - .endMetadata() - .done() - minikubeKubernetesClient = Minikube.getKubernetesClient.inNamespace(NAMESPACE) - clientConfig = minikubeKubernetesClient.getConfiguration - } - - before { - Eventually.eventually(TIMEOUT, INTERVAL) { - val podsList = minikubeKubernetesClient.pods().list() - assert(podsList == null - || podsList.getItems == null - || podsList.getItems.isEmpty - ) - val servicesList = minikubeKubernetesClient.services().list() - assert(servicesList == null - || servicesList.getItems == null - || servicesList.getItems.isEmpty) - } - sparkConf = new SparkConf(true) - .setMaster(s"k8s://https://${Minikube.getMinikubeIp}:8443") - .set(KUBERNETES_SUBMIT_CA_CERT_FILE, clientConfig.getCaCertFile) - .set(KUBERNETES_SUBMIT_CLIENT_KEY_FILE, clientConfig.getClientKeyFile) - .set(KUBERNETES_SUBMIT_CLIENT_CERT_FILE, clientConfig.getClientCertFile) - .set(KUBERNETES_NAMESPACE, NAMESPACE) - .set(DRIVER_DOCKER_IMAGE, "spark-driver:latest") - .set(EXECUTOR_DOCKER_IMAGE, "spark-executor:latest") - .setJars(Seq(HELPER_JAR_FILE.getAbsolutePath)) - .set("spark.executor.memory", "500m") - .set("spark.executor.cores", "1") - .set("spark.executors.instances", "1") - .set("spark.app.name", "spark-pi") - .set("spark.ui.enabled", "true") - .set("spark.testing", "false") - .set(WAIT_FOR_APP_COMPLETION, false) - } - - after { - val pods = minikubeKubernetesClient.pods().list().getItems.asScala - pods.par.foreach(pod => { - minikubeKubernetesClient - .pods() - .withName(pod.getMetadata.getName) - .withGracePeriod(60) - .delete - }) - // spark-submit sets system properties so we have to clear them - new SparkConf(true) - .getAll.map(_._1) - .filter(_ != "spark.docker.test.persistMinikube") - .foreach { System.clearProperty } } override def afterAll(): Unit = { @@ -137,247 +41,33 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { } } - private def getSparkMetricsService(sparkBaseAppName: String): SparkRestApiV1 = { - val serviceName = minikubeKubernetesClient.services() - .withLabel("spark-app-name", sparkBaseAppName) - .list() - .getItems - .get(0) - .getMetadata - .getName - Minikube.getService[SparkRestApiV1](serviceName, NAMESPACE, "spark-ui-port") - } - - private def expectationsForStaticAllocation(sparkMetricsService: SparkRestApiV1): Unit = { - val apps = Eventually.eventually(TIMEOUT, INTERVAL) { - val result = sparkMetricsService - .getApplications(ImmutableList.of(ApplicationStatus.RUNNING, ApplicationStatus.COMPLETED)) - assert(result.size == 1 - && !result.head.id.equalsIgnoreCase("appid") - && !result.head.id.equalsIgnoreCase("{appId}")) - result - } - Eventually.eventually(TIMEOUT, INTERVAL) { - val result = sparkMetricsService.getExecutors(apps.head.id) - assert(result.size == 2) - assert(result.count(exec => exec.id != "driver") == 1) - result - } - Eventually.eventually(TIMEOUT, INTERVAL) { - val result = sparkMetricsService.getStages( - apps.head.id, Seq(StageStatus.COMPLETE).asJava) - assert(result.size == 1) - result - } - } - - test("Run a simple example") { - new Client( - sparkConf = sparkConf, - mainClass = SPARK_PI_MAIN_CLASS, - mainAppResource = SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val sparkMetricsService = getSparkMetricsService("spark-pi") - expectationsForStaticAllocation(sparkMetricsService) + override def nestedSuites: scala.collection.immutable.IndexedSeq[Suite] = { + Vector( + new KubernetesV1Suite, + new KubernetesV2Suite) } +} - test("Run using spark-submit") { - val args = Array( - "--master", s"k8s://https://${Minikube.getMinikubeIp}:8443", - "--deploy-mode", "cluster", - "--kubernetes-namespace", NAMESPACE, - "--name", "spark-pi", - "--executor-memory", "512m", - "--executor-cores", "1", - "--num-executors", "1", - "--jars", HELPER_JAR_FILE.getAbsolutePath, - "--class", SPARK_PI_MAIN_CLASS, - "--conf", "spark.ui.enabled=true", - "--conf", "spark.testing=false", - "--conf", s"${KUBERNETES_SUBMIT_CA_CERT_FILE.key}=${clientConfig.getCaCertFile}", - "--conf", s"${KUBERNETES_SUBMIT_CLIENT_KEY_FILE.key}=${clientConfig.getClientKeyFile}", - "--conf", s"${KUBERNETES_SUBMIT_CLIENT_CERT_FILE.key}=${clientConfig.getClientCertFile}", - "--conf", s"${EXECUTOR_DOCKER_IMAGE.key}=spark-executor:latest", - "--conf", s"${DRIVER_DOCKER_IMAGE.key}=spark-driver:latest", - "--conf", s"${WAIT_FOR_APP_COMPLETION.key}=false", - EXAMPLES_JAR_FILE.getAbsolutePath) - SparkSubmit.main(args) - val sparkMetricsService = getSparkMetricsService("spark-pi") - expectationsForStaticAllocation(sparkMetricsService) - } - - test("Run with the examples jar on the docker image") { - sparkConf.setJars(Seq(CONTAINER_LOCAL_HELPER_JAR_PATH)) - new Client( - sparkConf = sparkConf, - mainClass = SPARK_PI_MAIN_CLASS, - mainAppResource = CONTAINER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val sparkMetricsService = getSparkMetricsService("spark-pi") - expectationsForStaticAllocation(sparkMetricsService) - } - - test("Run with custom labels and annotations") { - sparkConf.set(KUBERNETES_DRIVER_LABELS, "label1=label1value,label2=label2value") - sparkConf.set(KUBERNETES_DRIVER_ANNOTATIONS, "annotation1=annotation1value," + - "annotation2=annotation2value") - new Client( - sparkConf = sparkConf, - mainClass = SPARK_PI_MAIN_CLASS, - mainAppResource = SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val driverPodMetadata = minikubeKubernetesClient - .pods - .withLabel("spark-app-name", "spark-pi") - .list() - .getItems - .get(0) - .getMetadata - val driverPodLabels = driverPodMetadata.getLabels - // We can't match all of the selectors directly since one of the selectors is based on the - // launch time. - assert(driverPodLabels.size === 5, "Unexpected number of pod labels.") - assert(driverPodLabels.get("spark-app-name") === "spark-pi", "Unexpected value for" + - " spark-app-name label.") - assert(driverPodLabels.get("spark-app-id").startsWith("spark-pi"), "Unexpected value for" + - " spark-app-id label (should be prefixed with the app name).") - assert(driverPodLabels.get("label1") === "label1value", "Unexpected value for label1") - assert(driverPodLabels.get("label2") === "label2value", "Unexpected value for label2") - val driverPodAnnotations = driverPodMetadata.getAnnotations - assert(driverPodAnnotations.size === 2, "Unexpected number of pod annotations.") - assert(driverPodAnnotations.get("annotation1") === "annotation1value", - "Unexpected value for annotation1") - assert(driverPodAnnotations.get("annotation2") === "annotation2value", - "Unexpected value for annotation2") - } - - test("Enable SSL on the driver submit server") { - val (keyStoreFile, trustStoreFile) = SSLUtils.generateKeyStoreTrustStorePair( - Minikube.getMinikubeIp, - "changeit", - "changeit", - "changeit") - sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE, s"file://${keyStoreFile.getAbsolutePath}") - sparkConf.set("spark.ssl.kubernetes.driversubmitserver.keyStorePassword", "changeit") - sparkConf.set("spark.ssl.kubernetes.driversubmitserver.keyPassword", "changeit") - sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE, - s"file://${trustStoreFile.getAbsolutePath}") - sparkConf.set("spark.ssl.kubernetes.driversubmitserver.trustStorePassword", "changeit") - sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) - new Client( - sparkConf = sparkConf, - mainClass = SPARK_PI_MAIN_CLASS, - mainAppResource = SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - } - - test("Enable SSL on the driver submit server using PEM files") { - val (keyPem, certPem) = SSLUtils.generateKeyCertPemPair(Minikube.getMinikubeIp) - sparkConf.set(DRIVER_SUBMIT_SSL_KEY_PEM, s"file://${keyPem.getAbsolutePath}") - sparkConf.set(DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM, s"file://${certPem.getAbsolutePath}") - sparkConf.set(DRIVER_SUBMIT_SSL_SERVER_CERT_PEM, s"file://${certPem.getAbsolutePath}") - sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) - new Client( - sparkConf = sparkConf, - mainClass = SPARK_PI_MAIN_CLASS, - mainAppResource = SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - } - - test("Added files should exist on the driver.") { - sparkConf.set("spark.files", TEST_EXISTENCE_FILE.getAbsolutePath) - sparkConf.setAppName("spark-file-existence-test") - val podCompletedFuture = SettableFuture.create[Boolean] - val watch = new Watcher[Pod] { - override def eventReceived(action: Action, pod: Pod): Unit = { - val containerStatuses = pod.getStatus.getContainerStatuses.asScala - val allSuccessful = containerStatuses.nonEmpty && containerStatuses - .forall(status => { - status.getState.getTerminated != null && status.getState.getTerminated.getExitCode == 0 - }) - if (allSuccessful) { - podCompletedFuture.set(true) - } else { - val failedContainers = containerStatuses.filter(container => { - container.getState.getTerminated != null && - container.getState.getTerminated.getExitCode != 0 - }) - if (failedContainers.nonEmpty) { - podCompletedFuture.setException(new SparkException( - "One or more containers in the driver failed with a nonzero exit code.")) - } - } - } - - override def onClose(e: KubernetesClientException): Unit = { - logWarning("Watch closed", e) - } - } - Utils.tryWithResource(minikubeKubernetesClient - .pods - .withLabel("spark-app-name", "spark-file-existence-test") - .watch(watch)) { _ => - new Client( - sparkConf = sparkConf, - mainClass = FILE_EXISTENCE_MAIN_CLASS, - mainAppResource = CONTAINER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array(TEST_EXISTENCE_FILE.getName, TEST_EXISTENCE_FILE_CONTENTS)).run() - assert(podCompletedFuture.get(60, TimeUnit.SECONDS), "Failed to run driver pod") - val driverPod = minikubeKubernetesClient - .pods - .withLabel("spark-app-name", "spark-file-existence-test") - .list() - .getItems - .get(0) - val podLog = minikubeKubernetesClient - .pods - .withName(driverPod.getMetadata.getName) - .getLog - assert(podLog.contains(s"File found at /opt/spark/${TEST_EXISTENCE_FILE.getName}" + - s" with correct contents."), "Job did not find the file as expected.") - } - } +private[spark] object KubernetesSuite { + val EXAMPLES_JAR_FILE = Paths.get("target", "integration-tests-spark-jobs") + .toFile + .listFiles()(0) - test("Use external URI provider") { - val externalUriProviderWatch = new ExternalUriProviderWatch(minikubeKubernetesClient) - Utils.tryWithResource(minikubeKubernetesClient.services() - .withLabel("spark-app-name", "spark-pi") - .watch(externalUriProviderWatch)) { _ => - sparkConf.set(DRIVER_SERVICE_MANAGER_TYPE, ExternalSuppliedUrisDriverServiceManager.TYPE) - new Client( - sparkConf = sparkConf, - mainClass = SPARK_PI_MAIN_CLASS, - mainAppResource = SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val sparkMetricsService = getSparkMetricsService("spark-pi") - expectationsForStaticAllocation(sparkMetricsService) - assert(externalUriProviderWatch.annotationSet.get) - val driverService = minikubeKubernetesClient - .services() - .withLabel("spark-app-name", "spark-pi") - .list() - .getItems - .asScala(0) - assert(driverService.getMetadata.getAnnotations.containsKey(ANNOTATION_PROVIDE_EXTERNAL_URI), - "External URI request annotation was not set on the driver service.") - // Unfortunately we can't check the correctness of the actual value of the URI, as it depends - // on the driver submission port set on the driver service but we remove that port from the - // service once the submission is complete. - assert(driverService.getMetadata.getAnnotations.containsKey(ANNOTATION_RESOLVED_EXTERNAL_URI), - "Resolved URI annotation not set on driver service.") - } - } + val HELPER_JAR_FILE = Paths.get("target", "integration-tests-spark-jobs-helpers") + .toFile + .listFiles()(0) + val SUBMITTER_LOCAL_MAIN_APP_RESOURCE = s"file://${EXAMPLES_JAR_FILE.getAbsolutePath}" + val CONTAINER_LOCAL_MAIN_APP_RESOURCE = s"local:///opt/spark/examples/" + + s"integration-tests-jars/${EXAMPLES_JAR_FILE.getName}" + val CONTAINER_LOCAL_HELPER_JAR_PATH = s"local:///opt/spark/examples/" + + s"integration-tests-jars/${HELPER_JAR_FILE.getName}" - test("Mount the Kubernetes credentials onto the driver pod") { - sparkConf.set(KUBERNETES_DRIVER_CA_CERT_FILE, clientConfig.getCaCertFile) - sparkConf.set(KUBERNETES_DRIVER_CLIENT_KEY_FILE, clientConfig.getClientKeyFile) - sparkConf.set(KUBERNETES_DRIVER_CLIENT_CERT_FILE, clientConfig.getClientCertFile) - new Client( - sparkConf = sparkConf, - mainClass = SPARK_PI_MAIN_CLASS, - mainAppResource = SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val sparkMetricsService = getSparkMetricsService("spark-pi") - expectationsForStaticAllocation(sparkMetricsService) - } + val TEST_EXISTENCE_FILE = Paths.get("test-data", "input.txt").toFile + val TEST_EXISTENCE_FILE_CONTENTS = Files.toString(TEST_EXISTENCE_FILE, Charsets.UTF_8) + val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) + val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) + val SPARK_PI_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + + ".integrationtest.jobs.SparkPiWithInfiniteWait" + val FILE_EXISTENCE_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + + ".integrationtest.jobs.FileExistenceTest" } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala new file mode 100644 index 0000000000000..53e02f9e479c1 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest + +import java.util.UUID + +import org.scalatest.concurrent.Eventually +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube + +private[spark] class KubernetesTestComponents { + + val namespace = UUID.randomUUID().toString.replaceAll("-", "") + val kubernetesClient = Minikube.getKubernetesClient.inNamespace(namespace) + val clientConfig = kubernetesClient.getConfiguration + + def createNamespace(): Unit = { + Minikube.getKubernetesClient.namespaces.createNew() + .withNewMetadata() + .withName(namespace) + .endMetadata() + .done() + } + + def deleteNamespace(): Unit = { + Minikube.getKubernetesClient.namespaces.withName(namespace).delete() + Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { + val namespaceList = Minikube.getKubernetesClient + .namespaces() + .list() + .getItems() + .asScala + require(!namespaceList.exists(_.getMetadata.getName == namespace)) + } + } + + def newSparkConf(): SparkConf = { + new SparkConf(true) + .setMaster(s"k8s://https://${Minikube.getMinikubeIp}:8443") + .set(KUBERNETES_SUBMIT_CA_CERT_FILE, clientConfig.getCaCertFile) + .set(KUBERNETES_SUBMIT_CLIENT_KEY_FILE, clientConfig.getClientKeyFile) + .set(KUBERNETES_SUBMIT_CLIENT_CERT_FILE, clientConfig.getClientCertFile) + .set(KUBERNETES_NAMESPACE, namespace) + .set(DRIVER_DOCKER_IMAGE, "spark-driver:latest") + .set(EXECUTOR_DOCKER_IMAGE, "spark-executor:latest") + .setJars(Seq(KubernetesSuite.HELPER_JAR_FILE.getAbsolutePath)) + .set("spark.executor.memory", "500m") + .set("spark.executor.cores", "1") + .set("spark.executors.instances", "1") + .set("spark.app.name", "spark-pi") + .set("spark.ui.enabled", "true") + .set("spark.testing", "false") + .set(WAIT_FOR_APP_COMPLETION, false) + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala new file mode 100644 index 0000000000000..a4e3353032b71 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala @@ -0,0 +1,306 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest + +import java.util.concurrent.TimeUnit + +import com.google.common.collect.ImmutableList +import com.google.common.util.concurrent.SettableFuture +import io.fabric8.kubernetes.api.model.Pod +import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} +import io.fabric8.kubernetes.client.Watcher.Action +import org.scalatest.{BeforeAndAfter, DoNotDiscover} +import org.scalatest.concurrent.Eventually +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkConf, SparkException, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.SSLUtils +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube +import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 +import org.apache.spark.deploy.kubernetes.submit.v1.{Client, ExternalSuppliedUrisDriverServiceManager} +import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} +import org.apache.spark.util.Utils + +@DoNotDiscover +private[spark] class KubernetesV1Suite extends SparkFunSuite with BeforeAndAfter { + + private var kubernetesTestComponents: KubernetesTestComponents = _ + private var sparkConf: SparkConf = _ + + override def beforeAll(): Unit = { + kubernetesTestComponents = new KubernetesTestComponents() + kubernetesTestComponents.createNamespace() + } + + override def afterAll(): Unit = { + kubernetesTestComponents.deleteNamespace() + } + + before { + Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { + val podsList = kubernetesTestComponents.kubernetesClient.pods().list() + assert(podsList == null + || podsList.getItems == null + || podsList.getItems.isEmpty + ) + val servicesList = kubernetesTestComponents.kubernetesClient.services().list() + assert(servicesList == null + || servicesList.getItems == null + || servicesList.getItems.isEmpty) + } + sparkConf = kubernetesTestComponents.newSparkConf() + } + + after { + val pods = kubernetesTestComponents.kubernetesClient.pods().list().getItems.asScala + pods.par.foreach(pod => { + kubernetesTestComponents.kubernetesClient.pods() + .withName(pod.getMetadata.getName) + .withGracePeriod(60) + .delete + }) + } + + private def getSparkMetricsService(sparkBaseAppName: String): SparkRestApiV1 = { + val serviceName = kubernetesTestComponents.kubernetesClient.services() + .withLabel("spark-app-name", sparkBaseAppName) + .list() + .getItems + .get(0) + .getMetadata + .getName + Minikube.getService[SparkRestApiV1](serviceName, + kubernetesTestComponents.namespace, "spark-ui-port") + } + + private def expectationsForStaticAllocation(sparkMetricsService: SparkRestApiV1): Unit = { + val apps = Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { + val result = sparkMetricsService + .getApplications(ImmutableList.of(ApplicationStatus.RUNNING, ApplicationStatus.COMPLETED)) + assert(result.size == 1 + && !result.head.id.equalsIgnoreCase("appid") + && !result.head.id.equalsIgnoreCase("{appId}")) + result + } + Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { + val result = sparkMetricsService.getExecutors(apps.head.id) + assert(result.size == 2) + assert(result.count(exec => exec.id != "driver") == 1) + result + } + Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { + val result = sparkMetricsService.getStages( + apps.head.id, Seq(StageStatus.COMPLETE).asJava) + assert(result.size == 1) + result + } + } + + test("Run a simple example") { + new Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, + mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() + val sparkMetricsService = getSparkMetricsService("spark-pi") + expectationsForStaticAllocation(sparkMetricsService) + } + + test("Run with the examples jar on the docker image") { + sparkConf.setJars(Seq(KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) + new Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, + mainAppResource = KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() + val sparkMetricsService = getSparkMetricsService("spark-pi") + expectationsForStaticAllocation(sparkMetricsService) + } + + test("Run with custom labels and annotations") { + sparkConf.set(KUBERNETES_DRIVER_LABELS, "label1=label1value,label2=label2value") + sparkConf.set(KUBERNETES_DRIVER_ANNOTATIONS, "annotation1=annotation1value," + + "annotation2=annotation2value") + new Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, + mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() + val driverPodMetadata = kubernetesTestComponents.kubernetesClient + .pods + .withLabel("spark-app-name", "spark-pi") + .list() + .getItems + .get(0) + .getMetadata + val driverPodLabels = driverPodMetadata.getLabels + // We can't match all of the selectors directly since one of the selectors is based on the + // launch time. + assert(driverPodLabels.size === 5, "Unexpected number of pod labels.") + assert(driverPodLabels.get("spark-app-name") === "spark-pi", "Unexpected value for" + + " spark-app-name label.") + assert(driverPodLabels.get("spark-app-id").startsWith("spark-pi"), "Unexpected value for" + + " spark-app-id label (should be prefixed with the app name).") + assert(driverPodLabels.get("label1") === "label1value", "Unexpected value for label1") + assert(driverPodLabels.get("label2") === "label2value", "Unexpected value for label2") + val driverPodAnnotations = driverPodMetadata.getAnnotations + assert(driverPodAnnotations.size === 2, "Unexpected number of pod annotations.") + assert(driverPodAnnotations.get("annotation1") === "annotation1value", + "Unexpected value for annotation1") + assert(driverPodAnnotations.get("annotation2") === "annotation2value", + "Unexpected value for annotation2") + } + + test("Enable SSL on the driver submit server") { + val (keyStoreFile, trustStoreFile) = SSLUtils.generateKeyStoreTrustStorePair( + Minikube.getMinikubeIp, + "changeit", + "changeit", + "changeit") + sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE, s"file://${keyStoreFile.getAbsolutePath}") + sparkConf.set("spark.ssl.kubernetes.driversubmitserver.keyStorePassword", "changeit") + sparkConf.set("spark.ssl.kubernetes.driversubmitserver.keyPassword", "changeit") + sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE, + s"file://${trustStoreFile.getAbsolutePath}") + sparkConf.set("spark.ssl.kubernetes.driversubmitserver.trustStorePassword", "changeit") + sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) + new Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, + mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() + } + + test("Enable SSL on the driver submit server using PEM files") { + val (keyPem, certPem) = SSLUtils.generateKeyCertPemPair(Minikube.getMinikubeIp) + sparkConf.set(DRIVER_SUBMIT_SSL_KEY_PEM, s"file://${keyPem.getAbsolutePath}") + sparkConf.set(DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM, s"file://${certPem.getAbsolutePath}") + sparkConf.set(DRIVER_SUBMIT_SSL_SERVER_CERT_PEM, s"file://${certPem.getAbsolutePath}") + sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) + new Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, + mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() + } + + test("Added files should exist on the driver.") { + sparkConf.set("spark.files", KubernetesSuite.TEST_EXISTENCE_FILE.getAbsolutePath) + sparkConf.setAppName("spark-file-existence-test") + val podCompletedFuture = SettableFuture.create[Boolean] + val watch = new Watcher[Pod] { + override def eventReceived(action: Action, pod: Pod): Unit = { + val containerStatuses = pod.getStatus.getContainerStatuses.asScala + val allSuccessful = containerStatuses.nonEmpty && containerStatuses + .forall(status => { + status.getState.getTerminated != null && status.getState.getTerminated.getExitCode == 0 + }) + if (allSuccessful) { + podCompletedFuture.set(true) + } else { + val failedContainers = containerStatuses.filter(container => { + container.getState.getTerminated != null && + container.getState.getTerminated.getExitCode != 0 + }) + if (failedContainers.nonEmpty) { + podCompletedFuture.setException(new SparkException( + "One or more containers in the driver failed with a nonzero exit code.")) + } + } + } + + override def onClose(e: KubernetesClientException): Unit = { + logWarning("Watch closed", e) + } + } + Utils.tryWithResource(kubernetesTestComponents.kubernetesClient + .pods + .withLabel("spark-app-name", "spark-file-existence-test") + .watch(watch)) { _ => + new Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.FILE_EXISTENCE_MAIN_CLASS, + mainAppResource = KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array(KubernetesSuite.TEST_EXISTENCE_FILE.getName, + KubernetesSuite.TEST_EXISTENCE_FILE_CONTENTS)).run() + assert(podCompletedFuture.get(60, TimeUnit.SECONDS), "Failed to run driver pod") + val driverPod = kubernetesTestComponents.kubernetesClient + .pods + .withLabel("spark-app-name", "spark-file-existence-test") + .list() + .getItems + .get(0) + val podLog = kubernetesTestComponents.kubernetesClient + .pods + .withName(driverPod.getMetadata.getName) + .getLog + assert(podLog.contains(s"File found at" + + s" /opt/spark/${KubernetesSuite.TEST_EXISTENCE_FILE.getName} with correct contents."), + "Job did not find the file as expected.") + } + } + + test("Use external URI provider") { + val externalUriProviderWatch = + new ExternalUriProviderWatch(kubernetesTestComponents.kubernetesClient) + Utils.tryWithResource(kubernetesTestComponents.kubernetesClient.services() + .withLabel("spark-app-name", "spark-pi") + .watch(externalUriProviderWatch)) { _ => + sparkConf.set(DRIVER_SERVICE_MANAGER_TYPE, ExternalSuppliedUrisDriverServiceManager.TYPE) + new Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, + mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() + val sparkMetricsService = getSparkMetricsService("spark-pi") + expectationsForStaticAllocation(sparkMetricsService) + assert(externalUriProviderWatch.annotationSet.get) + val driverService = kubernetesTestComponents.kubernetesClient + .services() + .withLabel("spark-app-name", "spark-pi") + .list() + .getItems + .asScala(0) + assert(driverService.getMetadata.getAnnotations.containsKey(ANNOTATION_PROVIDE_EXTERNAL_URI), + "External URI request annotation was not set on the driver service.") + // Unfortunately we can't check the correctness of the actual value of the URI, as it depends + // on the driver submission port set on the driver service but we remove that port from the + // service once the submission is complete. + assert(driverService.getMetadata.getAnnotations.containsKey(ANNOTATION_RESOLVED_EXTERNAL_URI), + "Resolved URI annotation not set on driver service.") + } + } + + test("Mount the Kubernetes credentials onto the driver pod") { + sparkConf.set(KUBERNETES_DRIVER_CA_CERT_FILE, + kubernetesTestComponents.clientConfig.getCaCertFile) + sparkConf.set(KUBERNETES_DRIVER_CLIENT_KEY_FILE, + kubernetesTestComponents.clientConfig.getClientKeyFile) + sparkConf.set(KUBERNETES_DRIVER_CLIENT_CERT_FILE, + kubernetesTestComponents.clientConfig.getClientCertFile) + new Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, + mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() + val sparkMetricsService = getSparkMetricsService("spark-pi") + expectationsForStaticAllocation(sparkMetricsService) + } + +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala new file mode 100644 index 0000000000000..0d74067334028 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest + +import java.util.UUID + +import org.scalatest.{BeforeAndAfter, DoNotDiscover} +import org.scalatest.concurrent.Eventually + +import org.apache.spark.{SparkConf, SparkFunSuite, SSLOptions} +import org.apache.spark.deploy.kubernetes.SSLUtils +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube +import org.apache.spark.deploy.kubernetes.submit.v2.{MountedDependencyManagerProviderImpl, SubmissionKubernetesClientProviderImpl} + +@DoNotDiscover +private[spark] class KubernetesV2Suite extends SparkFunSuite with BeforeAndAfter { + + private val APP_LOCATOR_LABEL = UUID.randomUUID().toString.replaceAll("-", "") + private var kubernetesTestComponents: KubernetesTestComponents = _ + private var sparkConf: SparkConf = _ + private var resourceStagingServerLauncher: ResourceStagingServerLauncher = _ + + override def beforeAll(): Unit = { + kubernetesTestComponents = new KubernetesTestComponents + resourceStagingServerLauncher = new ResourceStagingServerLauncher( + kubernetesTestComponents.kubernetesClient.inNamespace(kubernetesTestComponents.namespace)) + } + + before { + sparkConf = kubernetesTestComponents.newSparkConf() + .set(INIT_CONTAINER_DOCKER_IMAGE, s"spark-driver-init:latest") + .set(DRIVER_DOCKER_IMAGE, s"spark-driver-v2:latest") + .set(KUBERNETES_DRIVER_LABELS, s"spark-app-locator=$APP_LOCATOR_LABEL") + kubernetesTestComponents.createNamespace() + } + + after { + kubernetesTestComponents.deleteNamespace() + } + + test("Use submission v2.") { + launchStagingServer(SSLOptions()) + runSparkAppAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) + } + + test("Enable SSL on the submission server") { + val (keyStore, trustStore) = SSLUtils.generateKeyStoreTrustStorePair( + ipAddress = Minikube.getMinikubeIp, + keyStorePassword = "keyStore", + keyPassword = "key", + trustStorePassword = "trustStore") + sparkConf.set(RESOURCE_STAGING_SERVER_SSL_ENABLED, true) + .set("spark.ssl.kubernetes.resourceStagingServer.keyStore", keyStore.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.trustStore", trustStore.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.keyStorePassword", "keyStore") + .set("spark.ssl.kubernetes.resourceStagingServer.keyPassword", "key") + .set("spark.ssl.kubernetes.resourceStagingServer.trustStorePassword", "trustStore") + launchStagingServer(SSLOptions( + enabled = true, + keyStore = Some(keyStore), + trustStore = Some(trustStore), + keyStorePassword = Some("keyStore"), + keyPassword = Some("key"), + trustStorePassword = Some("trustStore"))) + runSparkAppAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) + } + + test("Use container-local resources without the resource staging server") { + sparkConf.setJars(Seq( + KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, + KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) + runSparkAppAndVerifyCompletion(KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE) + } + + private def launchStagingServer(resourceStagingServerSslOptions: SSLOptions): Unit = { + val resourceStagingServerPort = resourceStagingServerLauncher.launchStagingServer( + resourceStagingServerSslOptions) + val resourceStagingServerUriScheme = if (resourceStagingServerSslOptions.enabled) { + "https" + } else { + "http" + } + sparkConf.set(RESOURCE_STAGING_SERVER_URI, + s"$resourceStagingServerUriScheme://${Minikube.getMinikubeIp}:$resourceStagingServerPort") + } + + private def runSparkAppAndVerifyCompletion(appResource: String): Unit = { + val client = new org.apache.spark.deploy.kubernetes.submit.v2.Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, + appArgs = Array.empty[String], + mainAppResource = appResource, + kubernetesClientProvider = + new SubmissionKubernetesClientProviderImpl(sparkConf), + mountedDependencyManagerProvider = + new MountedDependencyManagerProviderImpl(sparkConf)) + client.run() + val driverPod = kubernetesTestComponents.kubernetesClient + .pods() + .withLabel("spark-app-locator", APP_LOCATOR_LABEL) + .list() + .getItems + .get(0) + Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { + assert(kubernetesTestComponents.kubernetesClient + .pods() + .withName(driverPod.getMetadata.getName) + .getLog + .contains("Pi is roughly 3"), "The application did not compute the value of pi.") + } + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala new file mode 100644 index 0000000000000..ca549fa27d630 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest + +import java.io.StringWriter +import java.util.Properties +import java.util.concurrent.TimeUnit + +import com.google.common.io.{BaseEncoding, Files} +import com.google.common.util.concurrent.SettableFuture +import io.fabric8.kubernetes.api.model.{ConfigMapBuilder, Endpoints, HasMetadata, HTTPGetActionBuilder, KeyToPathBuilder, Pod, PodBuilder, SecretBuilder, ServiceBuilder} +import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watcher} +import io.fabric8.kubernetes.client.Watcher.Action +import io.fabric8.kubernetes.client.internal.readiness.Readiness +import scala.collection.JavaConverters._ + +import org.apache.spark.SSLOptions +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.submit.v2.ContainerNameEqualityPredicate +import org.apache.spark.util.Utils + +/** + * Launches a pod that runs the resource staging server, exposing it over a NodePort. + */ +private[spark] class ResourceStagingServerLauncher(kubernetesClient: KubernetesClient) { + + private val KEYSTORE_DIR = "/mnt/secrets/spark-staging" + private val KEYSTORE_FILE = s"$KEYSTORE_DIR/keyStore" + private val PROPERTIES_FILE_NAME = "staging-server.properties" + private val PROPERTIES_DIR = "/var/data/spark-staging-server" + private val PROPERTIES_FILE_PATH = s"$PROPERTIES_DIR/$PROPERTIES_FILE_NAME" + private var activeResources = Seq.empty[HasMetadata] + + // Returns the NodePort the staging server is listening on + def launchStagingServer(sslOptions: SSLOptions): Int = { + val stagingServerProperties = new Properties() + val stagingServerSecret = sslOptions.keyStore.map { keyStore => + val keyStoreBytes = Files.toByteArray(keyStore) + val keyStoreBase64 = BaseEncoding.base64().encode(keyStoreBytes) + new SecretBuilder() + .withNewMetadata() + .withName("resource-staging-server-keystore") + .endMetadata() + .addToData("keyStore", keyStoreBase64) + .build() + } + stagingServerProperties.setProperty( + RESOURCE_STAGING_SERVER_SSL_ENABLED.key, sslOptions.enabled.toString) + sslOptions.keyStorePassword.foreach { password => + stagingServerProperties.setProperty( + "spark.ssl.kubernetes.resourceStagingServer.keyStorePassword", password) + } + sslOptions.keyPassword.foreach { password => + stagingServerProperties.setProperty( + "spark.ssl.kubernetes.resourceStagingServer.keyPassword", password) + } + stagingServerSecret.foreach { _ => + stagingServerProperties.setProperty( + "spark.ssl.kubernetes.resourceStagingServer.keyStore", KEYSTORE_FILE) + } + val propertiesWriter = new StringWriter() + stagingServerProperties.store(propertiesWriter, "Resource staging server properties.") + val stagingServerConfigMap = new ConfigMapBuilder() + .withNewMetadata() + .withName(s"staging-server-properties") + .endMetadata() + .addToData("staging-server", propertiesWriter.toString) + .build() + val probePingHttpGet = new HTTPGetActionBuilder() + .withScheme(if (sslOptions.enabled) "HTTPS" else "HTTP") + .withPath("/api/v0/ping") + .withNewPort(RESOURCE_STAGING_SERVER_PORT.defaultValue.get) + .build() + val basePod = new PodBuilder() + .withNewMetadata() + .withName("resource-staging-server") + .addToLabels("resource-staging-server", "staging-server") + .endMetadata() + .withNewSpec() + .addNewVolume() + .withName("staging-server-properties") + .withNewConfigMap() + .withName(stagingServerConfigMap.getMetadata.getName) + .withItems( + new KeyToPathBuilder() + .withKey("staging-server") + .withPath(PROPERTIES_FILE_NAME) + .build()) + .endConfigMap() + .endVolume() + .addNewContainer() + .withName("staging-server-container") + .withImage("spark-resource-staging-server:latest") + .withImagePullPolicy("IfNotPresent") + .withNewReadinessProbe() + .withHttpGet(probePingHttpGet) + .endReadinessProbe() + .addNewVolumeMount() + .withName("staging-server-properties") + .withMountPath(PROPERTIES_DIR) + .endVolumeMount() + .addToArgs(PROPERTIES_FILE_PATH) + .endContainer() + .endSpec() + val withMountedKeyStorePod = stagingServerSecret.map { secret => + basePod.editSpec() + .addNewVolume() + .withName("keystore-volume") + .withNewSecret() + .withSecretName(secret.getMetadata.getName) + .endSecret() + .endVolume() + .editMatchingContainer(new ContainerNameEqualityPredicate("staging-server-container")) + .addNewVolumeMount() + .withName("keystore-volume") + .withMountPath(KEYSTORE_DIR) + .endVolumeMount() + .endContainer() + .endSpec() + }.getOrElse(basePod).build() + val stagingServerService = new ServiceBuilder() + .withNewMetadata() + .withName("resource-staging-server") + .endMetadata() + .withNewSpec() + .withType("NodePort") + .addToSelector("resource-staging-server", "staging-server") + .addNewPort() + .withName("staging-server-port") + .withPort(RESOURCE_STAGING_SERVER_PORT.defaultValue.get) + .withNewTargetPort(RESOURCE_STAGING_SERVER_PORT.defaultValue.get) + .endPort() + .endSpec() + .build() + val stagingServerPodReadyWatcher = new ReadinessWatcher[Pod] + val serviceReadyWatcher = new ReadinessWatcher[Endpoints] + val allResources = Seq( + stagingServerService, + stagingServerConfigMap, + withMountedKeyStorePod) ++ + stagingServerSecret.toSeq + Utils.tryWithResource(kubernetesClient.pods() + .withName(withMountedKeyStorePod.getMetadata.getName) + .watch(stagingServerPodReadyWatcher)) { _ => + Utils.tryWithResource(kubernetesClient.endpoints() + .withName(stagingServerService.getMetadata.getName) + .watch(serviceReadyWatcher)) { _ => + activeResources = kubernetesClient.resourceList(allResources: _*) + .createOrReplace() + .asScala + stagingServerPodReadyWatcher.waitUntilReady() + serviceReadyWatcher.waitUntilReady() + } + } + kubernetesClient.services().withName(stagingServerService.getMetadata.getName).get() + .getSpec + .getPorts + .get(0) + .getNodePort + } + + def tearDownStagingServer(): Unit = { + kubernetesClient.resourceList(activeResources: _*).delete() + activeResources = Seq.empty[HasMetadata] + } + + private class ReadinessWatcher[T <: HasMetadata] extends Watcher[T] { + + private val signal = SettableFuture.create[Boolean] + + override def eventReceived(action: Action, resource: T): Unit = { + if ((action == Action.MODIFIED || action == Action.ADDED) && + Readiness.isReady(resource)) { + signal.set(true) + } + } + + override def onClose(cause: KubernetesClientException): Unit = {} + + def waitUntilReady(): Boolean = signal.get(30, TimeUnit.SECONDS) + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala index 1aa6a7b7e70c2..d807c4d81009b 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala @@ -19,14 +19,20 @@ package org.apache.spark.deploy.kubernetes.integrationtest.docker import java.net.URI import java.nio.file.Paths -import com.fasterxml.jackson.jaxrs.json.JacksonJaxbJsonProvider -import com.spotify.docker.client.{DefaultDockerClient, DockerCertificates} +import com.spotify.docker.client.{DefaultDockerClient, DockerCertificates, LoggingBuildHandler} import org.apache.http.client.utils.URIBuilder import org.scalatest.concurrent.{Eventually, PatienceConfiguration} import org.scalatest.time.{Minutes, Seconds, Span} private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, String]) { + private val DOCKER_BUILD_PATH = Paths.get("target", "docker") + // Dockerfile paths must be relative to the build path. + private val DRIVER_V1_DOCKER_FILE = "dockerfiles/driver/Dockerfile" + private val DRIVER_V2_DOCKER_FILE = "dockerfiles/driver-v2/Dockerfile" + private val EXECUTOR_DOCKER_FILE = "dockerfiles/executor/Dockerfile" + private val DRIVER_INIT_DOCKER_FILE = "dockerfiles/driver-init/Dockerfile" + private val STAGING_SERVER_DOCKER_FILE = "dockerfiles/resource-staging-server/Dockerfile" private val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) private val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) private val dockerHost = dockerEnv.getOrElse("DOCKER_HOST", @@ -52,7 +58,18 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, def buildSparkDockerImages(): Unit = { Eventually.eventually(TIMEOUT, INTERVAL) { dockerClient.ping() } - dockerClient.build(Paths.get("target", "docker", "driver"), "spark-driver") - dockerClient.build(Paths.get("target", "docker", "executor"), "spark-executor") + buildImage("spark-driver", DRIVER_V1_DOCKER_FILE) + buildImage("spark-executor", EXECUTOR_DOCKER_FILE) + buildImage("spark-driver-v2", DRIVER_V2_DOCKER_FILE) + buildImage("spark-resource-staging-server", STAGING_SERVER_DOCKER_FILE) + buildImage("spark-driver-init", DRIVER_INIT_DOCKER_FILE) + } + + private def buildImage(name: String, dockerFile: String): Unit = { + dockerClient.build( + DOCKER_BUILD_PATH, + name, + dockerFile, + new LoggingBuildHandler()) } } From 6b489c2fb07e99066aa0cd1bddb923a3339371b3 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Mon, 1 May 2017 18:54:46 -0700 Subject: [PATCH 096/156] Enable testing against GCE clusters (#243) * Part 1: making test code cluster-agnostic * Final checked * Move all test code into KubernetesTestComponents * Addressed comments * Fixed doc * Restructure the test backends (#248) * Restructured the test backends * Address comments * var -> val * Comments * removed deadcode --- resource-managers/kubernetes/README.md | 8 +++ .../ExternalUriProviderWatch.scala | 2 +- .../integrationtest/KubernetesSuite.scala | 15 ++--- .../KubernetesTestComponents.scala | 54 +++++++++++----- .../integrationtest/KubernetesV1Suite.scala | 24 +++++-- .../integrationtest/KubernetesV2Suite.scala | 22 +++++-- .../integrationtest/ProcessUtils.scala | 55 ++++++++++++++++ .../backend/GCE/GCETestBackend.scala | 40 ++++++++++++ .../backend/IntegrationTestBackend.scala | 39 ++++++++++++ .../{ => backend}/minikube/Minikube.scala | 63 ++----------------- .../minikube/MinikubeTestBackend.scala | 47 ++++++++++++++ .../integrationtest/constants.scala | 22 +++++++ 12 files changed, 299 insertions(+), 92 deletions(-) create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ProcessUtils.scala create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/GCE/GCETestBackend.scala create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/IntegrationTestBackend.scala rename resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/{ => backend}/minikube/Minikube.scala (64%) create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/MinikubeTestBackend.scala create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/constants.scala diff --git a/resource-managers/kubernetes/README.md b/resource-managers/kubernetes/README.md index d70c38fdc64d5..fd1ad29eb795d 100644 --- a/resource-managers/kubernetes/README.md +++ b/resource-managers/kubernetes/README.md @@ -61,6 +61,14 @@ build/mvn integration-test \ -pl resource-managers/kubernetes/integration-tests -am ``` +# Running against an arbitrary cluster + +In order to run against any cluster, use the following: +build/mvn integration-test \ + -Pkubernetes -Pkubernetes-integration-tests \ + -pl resource-managers/kubernetes/integration-tests -am + -DextraScalaTestArgs="-Dspark.kubernetes.test.master=k8s://https:// -Dspark.docker.test.driverImage= -Dspark.docker.test.executorImage=" + # Preserve the Minikube VM The integration tests make use of [Minikube](https://github.com/kubernetes/minikube), which fires up a virtual machine diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ExternalUriProviderWatch.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ExternalUriProviderWatch.scala index 3199a8c385f95..f402d240bfc33 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ExternalUriProviderWatch.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ExternalUriProviderWatch.scala @@ -24,7 +24,7 @@ import io.fabric8.kubernetes.client.Watcher.Action import scala.collection.JavaConverters._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube +import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube import org.apache.spark.internal.Logging /** diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index abbf7e4d5ce1b..bd5ff7a005d46 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -25,26 +25,23 @@ import org.scalatest.concurrent.PatienceConfiguration import org.scalatest.time.{Minutes, Seconds, Span} import org.apache.spark.SparkFunSuite -import org.apache.spark.deploy.kubernetes.integrationtest.docker.SparkDockerImageBuilder -import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube +import org.apache.spark.deploy.kubernetes.integrationtest.backend.{IntegrationTestBackend, IntegrationTestBackendFactory} private[spark] class KubernetesSuite extends SparkFunSuite { + private val testBackend: IntegrationTestBackend = IntegrationTestBackendFactory.getTestBackend() override def beforeAll(): Unit = { - Minikube.startMinikube() - new SparkDockerImageBuilder(Minikube.getDockerEnv).buildSparkDockerImages() + testBackend.initialize() } override def afterAll(): Unit = { - if (!System.getProperty("spark.docker.test.persistMinikube", "false").toBoolean) { - Minikube.deleteMinikube() - } + testBackend.cleanUp() } override def nestedSuites: scala.collection.immutable.IndexedSeq[Suite] = { Vector( - new KubernetesV1Suite, - new KubernetesV2Suite) + new KubernetesV1Suite(testBackend), + new KubernetesV2Suite(testBackend)) } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala index 53e02f9e479c1..8cdacee655c05 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala @@ -17,22 +17,27 @@ package org.apache.spark.deploy.kubernetes.integrationtest import java.util.UUID +import javax.net.ssl.X509TrustManager -import org.scalatest.concurrent.Eventually import scala.collection.JavaConverters._ +import scala.reflect.ClassTag + +import io.fabric8.kubernetes.client.DefaultKubernetesClient +import io.fabric8.kubernetes.client.internal.SSLUtils +import org.scalatest.concurrent.Eventually import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube +import org.apache.spark.deploy.rest.kubernetes.v1.HttpClientUtil -private[spark] class KubernetesTestComponents { +private[spark] class KubernetesTestComponents(defaultClient: DefaultKubernetesClient) { val namespace = UUID.randomUUID().toString.replaceAll("-", "") - val kubernetesClient = Minikube.getKubernetesClient.inNamespace(namespace) + val kubernetesClient = defaultClient.inNamespace(namespace) val clientConfig = kubernetesClient.getConfiguration def createNamespace(): Unit = { - Minikube.getKubernetesClient.namespaces.createNew() + defaultClient.namespaces.createNew() .withNewMetadata() .withName(namespace) .endMetadata() @@ -40,9 +45,9 @@ private[spark] class KubernetesTestComponents { } def deleteNamespace(): Unit = { - Minikube.getKubernetesClient.namespaces.withName(namespace).delete() + defaultClient.namespaces.withName(namespace).delete() Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { - val namespaceList = Minikube.getKubernetesClient + val namespaceList = defaultClient .namespaces() .list() .getItems() @@ -53,13 +58,12 @@ private[spark] class KubernetesTestComponents { def newSparkConf(): SparkConf = { new SparkConf(true) - .setMaster(s"k8s://https://${Minikube.getMinikubeIp}:8443") - .set(KUBERNETES_SUBMIT_CA_CERT_FILE, clientConfig.getCaCertFile) - .set(KUBERNETES_SUBMIT_CLIENT_KEY_FILE, clientConfig.getClientKeyFile) - .set(KUBERNETES_SUBMIT_CLIENT_CERT_FILE, clientConfig.getClientCertFile) + .setMaster(s"k8s://${kubernetesClient.getMasterUrl}") .set(KUBERNETES_NAMESPACE, namespace) - .set(DRIVER_DOCKER_IMAGE, "spark-driver:latest") - .set(EXECUTOR_DOCKER_IMAGE, "spark-executor:latest") + .set(DRIVER_DOCKER_IMAGE, + System.getProperty("spark.docker.test.driverImage", "spark-driver:latest")) + .set(EXECUTOR_DOCKER_IMAGE, + System.getProperty("spark.docker.test.executorImage", "spark-executor:latest")) .setJars(Seq(KubernetesSuite.HELPER_JAR_FILE.getAbsolutePath)) .set("spark.executor.memory", "500m") .set("spark.executor.cores", "1") @@ -69,4 +73,26 @@ private[spark] class KubernetesTestComponents { .set("spark.testing", "false") .set(WAIT_FOR_APP_COMPLETION, false) } -} + + def getService[T: ClassTag]( + serviceName: String, + namespace: String, + servicePortName: String, + servicePath: String = ""): T = synchronized { + val kubernetesMaster = s"${defaultClient.getMasterUrl}" + + val url = s"${ + Array[String]( + s"${kubernetesClient.getMasterUrl}", + "api", "v1", "proxy", + "namespaces", namespace, + "services", serviceName).mkString("/") + }" + + s":$servicePortName$servicePath" + val userHome = System.getProperty("user.home") + val kubernetesConf = kubernetesClient.getConfiguration + val sslContext = SSLUtils.sslContext(kubernetesConf) + val trustManager = SSLUtils.trustManagers(kubernetesConf)(0).asInstanceOf[X509TrustManager] + HttpClientUtil.createClient[T](Set(url), 5, sslContext.getSocketFactory, trustManager) + } +} \ No newline at end of file diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala index a4e3353032b71..4cbd074547915 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala @@ -18,6 +18,8 @@ package org.apache.spark.deploy.kubernetes.integrationtest import java.util.concurrent.TimeUnit +import scala.collection.JavaConverters._ + import com.google.common.collect.ImmutableList import com.google.common.util.concurrent.SettableFuture import io.fabric8.kubernetes.api.model.Pod @@ -25,26 +27,28 @@ import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action import org.scalatest.{BeforeAndAfter, DoNotDiscover} import org.scalatest.concurrent.Eventually -import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException, SparkFunSuite} import org.apache.spark.deploy.kubernetes.SSLUtils import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube +import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackend +import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube +import org.apache.spark.deploy.kubernetes.integrationtest.constants.{GCE_TEST_BACKEND, MINIKUBE_TEST_BACKEND} import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 import org.apache.spark.deploy.kubernetes.submit.v1.{Client, ExternalSuppliedUrisDriverServiceManager} import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} import org.apache.spark.util.Utils @DoNotDiscover -private[spark] class KubernetesV1Suite extends SparkFunSuite with BeforeAndAfter { +private[spark] class KubernetesV1Suite(testBackend: IntegrationTestBackend) + extends SparkFunSuite with BeforeAndAfter { private var kubernetesTestComponents: KubernetesTestComponents = _ private var sparkConf: SparkConf = _ override def beforeAll(): Unit = { - kubernetesTestComponents = new KubernetesTestComponents() + kubernetesTestComponents = new KubernetesTestComponents(testBackend.getKubernetesClient) kubernetesTestComponents.createNamespace() } @@ -85,7 +89,7 @@ private[spark] class KubernetesV1Suite extends SparkFunSuite with BeforeAndAfter .get(0) .getMetadata .getName - Minikube.getService[SparkRestApiV1](serviceName, + kubernetesTestComponents.getService[SparkRestApiV1](serviceName, kubernetesTestComponents.namespace, "spark-ui-port") } @@ -168,6 +172,8 @@ private[spark] class KubernetesV1Suite extends SparkFunSuite with BeforeAndAfter } test("Enable SSL on the driver submit server") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + val (keyStoreFile, trustStoreFile) = SSLUtils.generateKeyStoreTrustStorePair( Minikube.getMinikubeIp, "changeit", @@ -188,6 +194,8 @@ private[spark] class KubernetesV1Suite extends SparkFunSuite with BeforeAndAfter } test("Enable SSL on the driver submit server using PEM files") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + val (keyPem, certPem) = SSLUtils.generateKeyCertPemPair(Minikube.getMinikubeIp) sparkConf.set(DRIVER_SUBMIT_SSL_KEY_PEM, s"file://${keyPem.getAbsolutePath}") sparkConf.set(DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM, s"file://${certPem.getAbsolutePath}") @@ -201,6 +209,8 @@ private[spark] class KubernetesV1Suite extends SparkFunSuite with BeforeAndAfter } test("Added files should exist on the driver.") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + sparkConf.set("spark.files", KubernetesSuite.TEST_EXISTENCE_FILE.getAbsolutePath) sparkConf.setAppName("spark-file-existence-test") val podCompletedFuture = SettableFuture.create[Boolean] @@ -257,6 +267,8 @@ private[spark] class KubernetesV1Suite extends SparkFunSuite with BeforeAndAfter } test("Use external URI provider") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + val externalUriProviderWatch = new ExternalUriProviderWatch(kubernetesTestComponents.kubernetesClient) Utils.tryWithResource(kubernetesTestComponents.kubernetesClient.services() @@ -288,6 +300,8 @@ private[spark] class KubernetesV1Suite extends SparkFunSuite with BeforeAndAfter } test("Mount the Kubernetes credentials onto the driver pod") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + sparkConf.set(KUBERNETES_DRIVER_CA_CERT_FILE, kubernetesTestComponents.clientConfig.getCaCertFile) sparkConf.set(KUBERNETES_DRIVER_CLIENT_KEY_FILE, diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala index 0d74067334028..8fa7cbd52ee83 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala @@ -21,14 +21,17 @@ import java.util.UUID import org.scalatest.{BeforeAndAfter, DoNotDiscover} import org.scalatest.concurrent.Eventually -import org.apache.spark.{SparkConf, SparkFunSuite, SSLOptions} +import org.apache.spark._ import org.apache.spark.deploy.kubernetes.SSLUtils import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube +import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackend +import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube +import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND import org.apache.spark.deploy.kubernetes.submit.v2.{MountedDependencyManagerProviderImpl, SubmissionKubernetesClientProviderImpl} @DoNotDiscover -private[spark] class KubernetesV2Suite extends SparkFunSuite with BeforeAndAfter { +private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) + extends SparkFunSuite with BeforeAndAfter { private val APP_LOCATOR_LABEL = UUID.randomUUID().toString.replaceAll("-", "") private var kubernetesTestComponents: KubernetesTestComponents = _ @@ -36,7 +39,7 @@ private[spark] class KubernetesV2Suite extends SparkFunSuite with BeforeAndAfter private var resourceStagingServerLauncher: ResourceStagingServerLauncher = _ override def beforeAll(): Unit = { - kubernetesTestComponents = new KubernetesTestComponents + kubernetesTestComponents = new KubernetesTestComponents(testBackend.getKubernetesClient) resourceStagingServerLauncher = new ResourceStagingServerLauncher( kubernetesTestComponents.kubernetesClient.inNamespace(kubernetesTestComponents.namespace)) } @@ -54,11 +57,15 @@ private[spark] class KubernetesV2Suite extends SparkFunSuite with BeforeAndAfter } test("Use submission v2.") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + launchStagingServer(SSLOptions()) runSparkAppAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) } test("Enable SSL on the submission server") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + val (keyStore, trustStore) = SSLUtils.generateKeyStoreTrustStorePair( ipAddress = Minikube.getMinikubeIp, keyStorePassword = "keyStore", @@ -81,6 +88,8 @@ private[spark] class KubernetesV2Suite extends SparkFunSuite with BeforeAndAfter } test("Use container-local resources without the resource staging server") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + sparkConf.setJars(Seq( KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) @@ -88,6 +97,8 @@ private[spark] class KubernetesV2Suite extends SparkFunSuite with BeforeAndAfter } private def launchStagingServer(resourceStagingServerSslOptions: SSLOptions): Unit = { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + val resourceStagingServerPort = resourceStagingServerLauncher.launchStagingServer( resourceStagingServerSslOptions) val resourceStagingServerUriScheme = if (resourceStagingServerSslOptions.enabled) { @@ -96,7 +107,8 @@ private[spark] class KubernetesV2Suite extends SparkFunSuite with BeforeAndAfter "http" } sparkConf.set(RESOURCE_STAGING_SERVER_URI, - s"$resourceStagingServerUriScheme://${Minikube.getMinikubeIp}:$resourceStagingServerPort") + s"$resourceStagingServerUriScheme://" + + s"${Minikube.getMinikubeIp}:$resourceStagingServerPort") } private def runSparkAppAndVerifyCompletion(appResource: String): Unit = { diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ProcessUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ProcessUtils.scala new file mode 100644 index 0000000000000..d0bfac3085487 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ProcessUtils.scala @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest + +import java.io.{BufferedReader, InputStreamReader} +import java.util.concurrent.TimeUnit + +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.internal.Logging +import org.apache.spark.util.Utils + +object ProcessUtils extends Logging { + /** + * executeProcess is used to run a command and return the output if it + * completes within timeout seconds. + */ + def executeProcess(fullCommand: Array[String], timeout: Long): Seq[String] = { + val pb = new ProcessBuilder().command(fullCommand: _*) + pb.redirectErrorStream(true) + val proc = pb.start() + val outputLines = new ArrayBuffer[String] + + Utils.tryWithResource(new InputStreamReader(proc.getInputStream)) { procOutput => + Utils.tryWithResource(new BufferedReader(procOutput)) { (bufferedOutput: BufferedReader) => + var line: String = null + do { + line = bufferedOutput.readLine() + if (line != null) { + logInfo(line) + outputLines += line + } + } while (line != null) + } + } + assert(proc.waitFor(timeout, TimeUnit.SECONDS), + s"Timed out while executing ${fullCommand.mkString(" ")}") + assert(proc.exitValue == 0, s"Failed to execute ${fullCommand.mkString(" ")}") + outputLines.toSeq + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/GCE/GCETestBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/GCE/GCETestBackend.scala new file mode 100644 index 0000000000000..1ef096be4af02 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/GCE/GCETestBackend.scala @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest.backend.GCE + +import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient} + +import org.apache.spark.deploy.kubernetes.config.resolveK8sMaster +import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackend +import org.apache.spark.deploy.kubernetes.integrationtest.constants.GCE_TEST_BACKEND + +private[spark] class GCETestBackend(val master: String) extends IntegrationTestBackend { + private var defaultClient: DefaultKubernetesClient = _ + + override def initialize(): Unit = { + var k8ConfBuilder = new ConfigBuilder() + .withApiVersion("v1") + .withMasterUrl(resolveK8sMaster(master)) + defaultClient = new DefaultKubernetesClient(k8ConfBuilder.build) + } + + override def getKubernetesClient(): DefaultKubernetesClient = { + defaultClient + } + + override def name(): String = GCE_TEST_BACKEND +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/IntegrationTestBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/IntegrationTestBackend.scala new file mode 100644 index 0000000000000..c5bc923dd51a6 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/IntegrationTestBackend.scala @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.kubernetes.integrationtest.backend + +import io.fabric8.kubernetes.client.DefaultKubernetesClient + +import org.apache.spark.deploy.kubernetes.integrationtest.backend.GCE.GCETestBackend +import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.{Minikube, MinikubeTestBackend} +import org.apache.spark.deploy.kubernetes.integrationtest.docker.SparkDockerImageBuilder + +private[spark] trait IntegrationTestBackend { + def name(): String + def initialize(): Unit + def getKubernetesClient(): DefaultKubernetesClient + def cleanUp(): Unit = {} +} + +private[spark] object IntegrationTestBackendFactory { + def getTestBackend(): IntegrationTestBackend = { + Option(System.getProperty("spark.kubernetes.test.master")) + .map(new GCETestBackend(_)) + .getOrElse(new MinikubeTestBackend()) + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/Minikube.scala similarity index 64% rename from resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala rename to resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/Minikube.scala index 81491be944d3e..7c4b344e8f72b 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/Minikube.scala @@ -14,20 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.integrationtest.minikube +package org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube -import java.io.{BufferedReader, InputStreamReader} import java.nio.file.Paths -import java.util.concurrent.TimeUnit -import java.util.regex.Pattern -import javax.net.ssl.X509TrustManager import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient} -import io.fabric8.kubernetes.client.internal.SSLUtils -import scala.collection.mutable.ArrayBuffer -import scala.reflect.ClassTag -import org.apache.spark.deploy.rest.kubernetes.v1.HttpClientUtil +import org.apache.spark.deploy.kubernetes.integrationtest.ProcessUtils import org.apache.spark.internal.Logging import org.apache.spark.util.Utils @@ -93,7 +86,7 @@ private[spark] object Minikube extends Logging { } def getKubernetesClient: DefaultKubernetesClient = synchronized { - val kubernetesMaster = s"https://$getMinikubeIp:8443" + val kubernetesMaster = s"https://${getMinikubeIp}:8443" val userHome = System.getProperty("user.home") val kubernetesConf = new ConfigBuilder() .withApiVersion("v1") @@ -105,32 +98,6 @@ private[spark] object Minikube extends Logging { new DefaultKubernetesClient(kubernetesConf) } - def getService[T: ClassTag]( - serviceName: String, - namespace: String, - servicePortName: String, - servicePath: String = ""): T = synchronized { - val kubernetesMaster = s"https://$getMinikubeIp:8443" - val url = s"${ - Array[String]( - kubernetesMaster, - "api", "v1", "proxy", - "namespaces", namespace, - "services", serviceName).mkString("/")}" + - s":$servicePortName$servicePath" - val userHome = System.getProperty("user.home") - val kubernetesConf = new ConfigBuilder() - .withApiVersion("v1") - .withMasterUrl(kubernetesMaster) - .withCaCertFile(Paths.get(userHome, ".minikube", "ca.crt").toFile.getAbsolutePath) - .withClientCertFile(Paths.get(userHome, ".minikube", "apiserver.crt").toFile.getAbsolutePath) - .withClientKeyFile(Paths.get(userHome, ".minikube", "apiserver.key").toFile.getAbsolutePath) - .build() - val sslContext = SSLUtils.sslContext(kubernetesConf) - val trustManager = SSLUtils.trustManagers(kubernetesConf)(0).asInstanceOf[X509TrustManager] - HttpClientUtil.createClient[T](Set(url), 5, sslContext.getSocketFactory, trustManager) - } - def executeMinikubeSsh(command: String): Unit = { executeMinikube("ssh", command) } @@ -141,28 +108,8 @@ private[spark] object Minikube extends Logging { throw new IllegalStateException("Failed to make the Minikube binary executable.") } } - val fullCommand = Array(MINIKUBE_EXECUTABLE_DEST.getAbsolutePath, action) ++ args - val pb = new ProcessBuilder().command(fullCommand: _*) - pb.redirectErrorStream(true) - val proc = pb.start() - val outputLines = new ArrayBuffer[String] - - Utils.tryWithResource(new InputStreamReader(proc.getInputStream)) { procOutput => - Utils.tryWithResource(new BufferedReader(procOutput)) { (bufferedOutput: BufferedReader) => - var line: String = null - do { - line = bufferedOutput.readLine() - if (line != null) { - logInfo(line) - outputLines += line - } - } while (line != null) - } - } - assert(proc.waitFor(MINIKUBE_STARTUP_TIMEOUT_SECONDS, TimeUnit.SECONDS), - s"Timed out while executing $action on minikube.") - assert(proc.exitValue == 0, s"Failed to execute minikube $action ${args.mkString(" ")}") - outputLines.toSeq + ProcessUtils.executeProcess(Array(MINIKUBE_EXECUTABLE_DEST.getAbsolutePath, action) ++ args, + MINIKUBE_STARTUP_TIMEOUT_SECONDS) } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/MinikubeTestBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/MinikubeTestBackend.scala new file mode 100644 index 0000000000000..6e0049b813719 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/MinikubeTestBackend.scala @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube + +import io.fabric8.kubernetes.client.DefaultKubernetesClient + +import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackend +import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND +import org.apache.spark.deploy.kubernetes.integrationtest.docker.SparkDockerImageBuilder + +private[spark] class MinikubeTestBackend extends IntegrationTestBackend { + private var defaultClient: DefaultKubernetesClient = _ + + override def initialize(): Unit = { + Minikube.startMinikube() + new SparkDockerImageBuilder(Minikube.getDockerEnv).buildSparkDockerImages() + defaultClient = Minikube.getKubernetesClient + } + + override def getKubernetesClient(): DefaultKubernetesClient = { + defaultClient + } + + override def cleanUp(): Unit = { + if (!System.getProperty("spark.docker.test.persistMinikube", "false").toBoolean) { + Minikube.deleteMinikube() + } + } + + override def name(): String = MINIKUBE_TEST_BACKEND + + +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/constants.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/constants.scala new file mode 100644 index 0000000000000..8207198b529d2 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/constants.scala @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest + +package object constants { + val MINIKUBE_TEST_BACKEND = "minikube" + val GCE_TEST_BACKEND = "gce" +} \ No newline at end of file From 0e1cb4077cdb4aa6a54cbe172561c3d6deb965e4 Mon Sep 17 00:00:00 2001 From: Erik Erlandson Date: Tue, 2 May 2017 16:52:22 -0700 Subject: [PATCH 097/156] Update running-on-kubernetes.md (#259) --- docs/running-on-kubernetes.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 794099638f80c..66ea381e306a5 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -9,6 +9,7 @@ currently limited and not well-tested. This should not be used in production env ## Prerequisites * You must have a running Kubernetes cluster with access configured to it using [kubectl](https://kubernetes.io/docs/user-guide/prereqs/). If you do not already have a working Kubernetes cluster, you may setup a test cluster on your local machine using [minikube](https://kubernetes.io/docs/getting-started-guides/minikube/). + * We recommend that minikube be updated to the most recent version (0.18.0 at the time of this documentation), as some earlier versions may not start up the kubernetes cluster with all the necessary components. * You must have appropriate permissions to create and list [pods](https://kubernetes.io/docs/user-guide/pods/), [nodes](https://kubernetes.io/docs/admin/node/) and [services](https://kubernetes.io/docs/user-guide/services/) in your cluster. You can verify that you can list these resources by running `kubectl get nodes`, `kubectl get pods` and `kubectl get svc` which should give you a list of nodes, pods and services (if any) respectively. * You must have a spark distribution with Kubernetes support. This may be obtained from the [release tarball](https://github.com/apache-spark-on-k8s/spark/releases) or by [building Spark with Kubernetes support](../resource-managers/kubernetes/README.md#building-spark-with-kubernetes-support). From ba151c01ed5f1b670bd92977fba1ba683ed7de5e Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Wed, 3 May 2017 22:11:49 +0000 Subject: [PATCH 098/156] Build with sbt and fix scalastyle checks. (#241) --- project/SparkBuild.scala | 8 +++++--- .../org/apache/spark/deploy/kubernetes/SSLUtils.scala | 2 +- .../spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala | 2 +- .../v2/ResourceStagingServerSslOptionsProviderSuite.scala | 3 ++- .../kubernetes/docker-minimal-bundle/pom.xml | 2 +- .../integration-tests-spark-jobs-helpers/pom.xml | 3 +++ .../kubernetes/integration-tests-spark-jobs/pom.xml | 3 +++ resource-managers/kubernetes/integration-tests/pom.xml | 3 +++ 8 files changed, 19 insertions(+), 7 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index e3fbe0379fb7b..01e7e445713ac 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -57,9 +57,11 @@ object BuildCommons { ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects val optionallyEnabledProjects@Seq(mesos, yarn, java8Tests, sparkGangliaLgpl, - streamingKinesisAsl, dockerIntegrationTests) = - Seq("mesos", "yarn", "java8-tests", "ganglia-lgpl", "streaming-kinesis-asl", - "docker-integration-tests").map(ProjectRef(buildLocation, _)) + streamingKinesisAsl, dockerIntegrationTests, kubernetes, _*) = + Seq("mesos", "yarn", "java8-tests", "ganglia-lgpl", "streaming-kinesis-asl", "docker-integration-tests", + "kubernetes", "kubernetes-integration-tests", "kubernetes-integration-tests-spark-jobs", + "kubernetes-integration-tests-spark-jobs-helpers", "kubernetes-docker-minimal-bundle" + ).map(ProjectRef(buildLocation, _)) val assemblyProjects@Seq(networkYarn, streamingFlumeAssembly, streamingKafkaAssembly, streamingKafka010Assembly, streamingKinesisAslAssembly) = Seq("network-yarn", "streaming-flume-assembly", "streaming-kafka-0-8-assembly", "streaming-kafka-0-10-assembly", "streaming-kinesis-asl-assembly") diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala index dacb017d8a513..0cb056dcf5493 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala @@ -19,8 +19,8 @@ package org.apache.spark.deploy.kubernetes import java.io.{File, FileOutputStream, OutputStreamWriter} import java.math.BigInteger import java.nio.file.Files -import java.security.cert.X509Certificate import java.security.{KeyPair, KeyPairGenerator, KeyStore, SecureRandom} +import java.security.cert.X509Certificate import java.util.{Calendar, Random} import javax.security.auth.x500.X500Principal diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala index 9e2ab26460412..e6536fbaa6941 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala @@ -22,7 +22,7 @@ import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, Container, import io.fabric8.kubernetes.client.KubernetesClient import io.fabric8.kubernetes.client.dsl.{MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource} import org.hamcrest.{BaseMatcher, Description} -import org.mockito.Matchers.{any, anyVararg, argThat, startsWith, eq => mockitoEq} +import org.mockito.Matchers.{any, anyVararg, argThat, eq => mockitoEq, startsWith} import org.mockito.Mockito.when import org.mockito.invocation.InvocationOnMock import org.mockito.stubbing.Answer diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala index 290b46a537bf3..10aced9000bf8 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala @@ -96,7 +96,8 @@ class ResourceStagingServerSslOptionsProviderSuite extends SparkFunSuite with Be .set("spark.ssl.kubernetes.resourceStagingServer.keyStore", keyStoreFile.getAbsolutePath) .set("spark.ssl.kubernetes.resourceStagingServer.keyStorePasswordFile", keyStorePasswordFile.getAbsolutePath) - .set("spark.ssl.kubernetes.resourceStagingServer.keyPasswordFile", keyPasswordFile.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.keyPasswordFile", + keyPasswordFile.getAbsolutePath) val sslOptions = sslOptionsProvider.getSslOptions assert(sslOptions.keyStorePassword === Some("keyStorePassword"), "Incorrect keyStore password or it was not set.") diff --git a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml index a10fe8fb58408..c66b87ac0952d 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml @@ -31,7 +31,7 @@ pom - docker-minimal-bundle + kubernetes-docker-minimal-bundle none pre-integration-test diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml index b9c29b26eb648..581bf9453f2f2 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml @@ -27,6 +27,9 @@ spark-kubernetes-integration-tests-spark-jobs-helpers_2.11 jar Spark Project Kubernetes Integration Tests Spark Jobs Helpers + + kubernetes-integration-tests-spark-jobs-helpers + diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml index 16dd0c9322c13..9639811479ff5 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml @@ -27,6 +27,9 @@ spark-kubernetes-integration-tests-spark-jobs_2.11 jar Spark Project Kubernetes Integration Tests Spark Jobs + + kubernetes-integration-tests-spark-jobs + diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index ac7a549c9b483..c94893cbce410 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -25,6 +25,9 @@ spark-kubernetes-integration-tests_2.11 + + kubernetes-integration-tests + jar Spark Project Kubernetes Integration Tests From 4ac0de130dccd9c639431a08a4fcfe85aad1a3a1 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Wed, 3 May 2017 15:33:14 -0700 Subject: [PATCH 099/156] Updating images in doc (#219) --- docs/running-on-kubernetes.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 66ea381e306a5..5377d61d35b2f 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -25,11 +25,11 @@ If you wish to use pre-built docker images, you may use the images published in ComponentImage Spark Driver Image - kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-rc1 + kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 Spark Executor Image - kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-rc1 + kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 @@ -45,7 +45,7 @@ For example, if the registry host is `registry-host` and the registry is listeni docker build -t registry-host:5000/spark-executor:latest -f dockerfiles/executor/Dockerfile . docker push registry-host:5000/spark-driver:latest docker push registry-host:5000/spark-executor:latest - + ## Submitting Applications to Kubernetes Kubernetes applications can be executed via `spark-submit`. For example, to compute the value of pi, assuming the images @@ -58,8 +58,8 @@ are set up as described above: --kubernetes-namespace default \ --conf spark.executor.instances=5 \ --conf spark.app.name=spark-pi \ - --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-rc1 \ - --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-rc1 \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 \ examples/jars/spark_examples_2.11-2.2.0.jar The Spark master, specified either via passing the `--master` command line argument to `spark-submit` or by setting @@ -79,7 +79,7 @@ In the above example, the specific Kubernetes cluster can be used with spark sub Note that applications can currently only be executed in cluster mode, where the driver and its executors are running on the cluster. - + ### Specifying input files Spark supports specifying JAR paths that are either on the submitting host's disk, or are located on the disk of the @@ -109,8 +109,8 @@ If our local proxy were listening on port 8001, we would have our submission loo --kubernetes-namespace default \ --conf spark.executor.instances=5 \ --conf spark.app.name=spark-pi \ - --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-rc1 \ - --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-rc1 \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 \ examples/jars/spark_examples_2.11-2.2.0.jar Communication between Spark and Kubernetes clusters is performed using the fabric8 kubernetes-client library. From 8ccb305b9102a374a15eaeef41f6afe86a636a88 Mon Sep 17 00:00:00 2001 From: Johannes Scheuermann Date: Fri, 5 May 2017 20:25:24 +0200 Subject: [PATCH 100/156] Correct readme links (#266) --- docs/running-on-kubernetes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 5377d61d35b2f..02933c28bbc66 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -189,7 +189,7 @@ from the other deployment modes. See the [configuration page](configuration.html The namespace that will be used for running the driver and executor pods. When using spark-submit in cluster mode, this can also be passed to spark-submit via the - --kubernetes-namespace command line argument. + --kubernetes-namespace command line argument. The namespace must already exist. From 0a8080a1b621d310213b213fc0bf7c61a61067ee Mon Sep 17 00:00:00 2001 From: Erik Erlandson Date: Tue, 9 May 2017 14:12:23 -0700 Subject: [PATCH 101/156] edit readme with a working build example command (#254) --- resource-managers/kubernetes/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/resource-managers/kubernetes/README.md b/resource-managers/kubernetes/README.md index fd1ad29eb795d..734c29947b6d9 100644 --- a/resource-managers/kubernetes/README.md +++ b/resource-managers/kubernetes/README.md @@ -14,10 +14,10 @@ important matters to keep in mind when developing this feature. # Building Spark with Kubernetes Support -To build Spark with Kubernetes support, use the `kubernetes` profile when invoking Maven. For example, to simply compile -the Kubernetes core implementation module along with its dependencies: +To build Spark with Kubernetes support, use the `kubernetes` profile when invoking Maven. - build/mvn compile -Pkubernetes -pl resource-managers/kubernetes/core -am -DskipTests + git checkout branch-2.1-kubernetes + build/mvn package -Pkubernetes -DskipTests To build a distribution of Spark with Kubernetes support, use the `dev/make-distribution.sh` script, and add the `kubernetes` profile as part of the build arguments. Any other build arguments can be specified as one would expect when From 26f747ebc14a10812d76038f79a98788fba28486 Mon Sep 17 00:00:00 2001 From: Erik Erlandson Date: Tue, 9 May 2017 23:20:48 -0700 Subject: [PATCH 102/156] Fix watcher conditional logic (#269) --- .../apache/spark/deploy/kubernetes/submit/v1/Client.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala index e1cfac8feba37..65e47ddca4bfe 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala @@ -538,7 +538,9 @@ private[spark] class Client( private class DriverEndpointsReadyWatcher(resolvedDriverEndpoints: SettableFuture[Endpoints]) extends Watcher[Endpoints] { override def eventReceived(action: Action, endpoints: Endpoints): Unit = { - if ((action == Action.ADDED) || (action == Action.MODIFIED) + if ((action == Action.ADDED || action == Action.MODIFIED) + && (endpoints != null) + && (endpoints.getSubsets != null) && endpoints.getSubsets.asScala.nonEmpty && endpoints.getSubsets.asScala.exists(_.getAddresses.asScala.nonEmpty) && !resolvedDriverEndpoints.isDone) { @@ -554,7 +556,7 @@ private[spark] class Client( private class DriverServiceReadyWatcher(resolvedDriverService: SettableFuture[Service]) extends Watcher[Service] { override def eventReceived(action: Action, service: Service): Unit = { - if ((action == Action.ADDED) || (action == Action.MODIFIED) + if ((action == Action.ADDED || action == Action.MODIFIED) && !resolvedDriverService.isDone) { resolvedDriverService.set(service) } From 546f09ce497aceab8c13daf53f99773977507836 Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Wed, 10 May 2017 00:47:39 -0700 Subject: [PATCH 103/156] Dispatch tasks to right executors that have tasks' input HDFS data (#216) * Dispatch tasks to right executors that have tasks' input HDFS data on local disks * Fix style issues * Clean up unnecessary fields * Clean up a misleading method name * Address review comments * Fix import ordering * Delete executor pods in watcher * Fix the driver hang by unblocking the main thread * Fix import order * Clear runningExecutorPods * Fix incorrect merge * Address review comments * Clean up imports --- .../spark/scheduler/TaskSetManager.scala | 2 +- .../kubernetes/KubernetesClientBuilder.scala | 16 +++- .../kubernetes/KubernetesClusterManager.scala | 3 +- .../KubernetesClusterSchedulerBackend.scala | 73 +++++++++++++++++-- .../KubernetesTaskSchedulerImpl.scala | 27 +++++++ .../kubernetes/KubernetesTaskSetManager.scala | 63 ++++++++++++++++ 6 files changed, 172 insertions(+), 12 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSchedulerImpl.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala index b766e4148e496..30df8862c3589 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala @@ -221,7 +221,7 @@ private[spark] class TaskSetManager( * Return the pending tasks list for a given host, or an empty list if * there is no map entry for that host */ - private def getPendingTasksForHost(host: String): ArrayBuffer[Int] = { + protected def getPendingTasksForHost(host: String): ArrayBuffer[Int] = { pendingTasksForHost.getOrElse(host, ArrayBuffer()) } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala index 6725992aae978..31c6eda77d058 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala @@ -21,10 +21,13 @@ import java.io.File import com.google.common.base.Charsets import com.google.common.io.Files import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient} +import io.fabric8.kubernetes.client.utils.HttpClientUtils +import okhttp3.Dispatcher import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.util.ThreadUtils private[spark] class KubernetesClientBuilder(sparkConf: SparkConf, namespace: String) { private val SERVICE_ACCOUNT_TOKEN = new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH) @@ -78,6 +81,17 @@ private[spark] class KubernetesClientBuilder(sparkConf: SparkConf, namespace: St } serviceAccountConfigBuilder } - new DefaultKubernetesClient(configBuilder.build) + // Disable the ping thread that is not daemon, in order to allow + // the driver main thread to shut down upon errors. Otherwise, the driver + // will hang indefinitely. + val config = configBuilder + .withWebsocketPingInterval(0) + .build() + val httpClient = HttpClientUtils.createHttpClient(config).newBuilder() + // Use a Dispatcher with a custom executor service that creates daemon threads. The default + // executor service used by Dispatcher creates non-daemon threads. + .dispatcher(new Dispatcher(ThreadUtils.newDaemonCachedThreadPool("spark-on-k8s"))) + .build() + new DefaultKubernetesClient(httpClient, config) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala index 36f7149a832c3..70098f1f46ac0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala @@ -24,7 +24,7 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager { override def canCreate(masterURL: String): Boolean = masterURL.startsWith("k8s") override def createTaskScheduler(sc: SparkContext, masterURL: String): TaskScheduler = { - val scheduler = new TaskSchedulerImpl(sc) + val scheduler = new KubernetesTaskSchedulerImpl(sc) sc.taskScheduler = scheduler scheduler } @@ -37,6 +37,5 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager { override def initialize(scheduler: TaskScheduler, backend: SchedulerBackend): Unit = { scheduler.asInstanceOf[TaskSchedulerImpl].initialize(backend) } - } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 15457db7e1459..a2294a6766980 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -16,13 +16,18 @@ */ package org.apache.spark.scheduler.cluster.kubernetes -import java.util.concurrent.atomic.{AtomicInteger, AtomicLong} +import java.io.Closeable +import java.util.concurrent.atomic.{AtomicInteger, AtomicLong, AtomicReference} -import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, - EnvVarSourceBuilder, Pod, QuantityBuilder} import scala.collection.JavaConverters._ +import scala.collection.mutable import scala.concurrent.{ExecutionContext, Future} +import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, + EnvVarSourceBuilder, Pod, QuantityBuilder} +import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} +import io.fabric8.kubernetes.client.Watcher.Action + import org.apache.spark.{SparkContext, SparkException} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ @@ -38,8 +43,11 @@ private[spark] class KubernetesClusterSchedulerBackend( import KubernetesClusterSchedulerBackend._ - private val EXECUTOR_MODIFICATION_LOCK = new Object - private val runningExecutorPods = new scala.collection.mutable.HashMap[String, Pod] + private val RUNNING_EXECUTOR_PODS_LOCK = new Object + private val runningExecutorPods = new mutable.HashMap[String, Pod] // Indexed by executor IDs. + + private val EXECUTOR_PODS_BY_IPS_LOCK = new Object + private val executorPodsByIPs = new mutable.HashMap[String, Pod] // Indexed by executor IP addrs. private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) private val kubernetesNamespace = conf.get(KUBERNETES_NAMESPACE) @@ -87,6 +95,7 @@ private[spark] class KubernetesClusterSchedulerBackend( super.minRegisteredRatio } + private val executorWatchResource = new AtomicReference[Closeable] protected var totalExpectedExecutors = new AtomicInteger(0) private val driverUrl = RpcEndpointAddress( @@ -119,6 +128,8 @@ private[spark] class KubernetesClusterSchedulerBackend( override def start(): Unit = { super.start() + executorWatchResource.set(kubernetesClient.pods().withLabel(SPARK_APP_ID_LABEL, applicationId()) + .watch(new ExecutorPodsWatcher())) if (!Utils.isDynamicAllocationEnabled(sc.conf)) { doRequestTotalExecutors(initialExecutors) } @@ -133,11 +144,22 @@ private[spark] class KubernetesClusterSchedulerBackend( // When using Utils.tryLogNonFatalError some of the code fails but without any logs or // indication as to why. try { - runningExecutorPods.values.foreach(kubernetesClient.pods().delete(_)) + RUNNING_EXECUTOR_PODS_LOCK.synchronized { + runningExecutorPods.values.foreach(kubernetesClient.pods().delete(_)) + runningExecutorPods.clear() + } + EXECUTOR_PODS_BY_IPS_LOCK.synchronized { + executorPodsByIPs.clear() + } + val resource = executorWatchResource.getAndSet(null) + if (resource != null) { + resource.close() + } } catch { case e: Throwable => logError("Uncaught exception while shutting down controllers.", e) } try { + logInfo("Closing kubernetes client") kubernetesClient.close() } catch { case e: Throwable => logError("Uncaught exception closing Kubernetes client.", e) @@ -231,7 +253,7 @@ private[spark] class KubernetesClusterSchedulerBackend( } override def doRequestTotalExecutors(requestedTotal: Int): Future[Boolean] = Future[Boolean] { - EXECUTOR_MODIFICATION_LOCK.synchronized { + RUNNING_EXECUTOR_PODS_LOCK.synchronized { if (requestedTotal > totalExpectedExecutors.get) { logInfo(s"Requesting ${requestedTotal - totalExpectedExecutors.get}" + s" additional executors, expecting total $requestedTotal and currently" + @@ -246,7 +268,7 @@ private[spark] class KubernetesClusterSchedulerBackend( } override def doKillExecutors(executorIds: Seq[String]): Future[Boolean] = Future[Boolean] { - EXECUTOR_MODIFICATION_LOCK.synchronized { + RUNNING_EXECUTOR_PODS_LOCK.synchronized { for (executor <- executorIds) { runningExecutorPods.remove(executor) match { case Some(pod) => kubernetesClient.pods().delete(pod) @@ -256,6 +278,41 @@ private[spark] class KubernetesClusterSchedulerBackend( } true } + + def getExecutorPodByIP(podIP: String): Option[Pod] = { + EXECUTOR_PODS_BY_IPS_LOCK.synchronized { + executorPodsByIPs.get(podIP) + } + } + + private class ExecutorPodsWatcher extends Watcher[Pod] { + + override def eventReceived(action: Action, pod: Pod): Unit = { + if (action == Action.MODIFIED && pod.getStatus.getPhase == "Running" + && pod.getMetadata.getDeletionTimestamp == null) { + val podIP = pod.getStatus.getPodIP + val clusterNodeName = pod.getSpec.getNodeName + logDebug(s"Executor pod $pod ready, launched at $clusterNodeName as IP $podIP.") + EXECUTOR_PODS_BY_IPS_LOCK.synchronized { + executorPodsByIPs += ((podIP, pod)) + } + } else if ((action == Action.MODIFIED && pod.getMetadata.getDeletionTimestamp != null) || + action == Action.DELETED || action == Action.ERROR) { + val podName = pod.getMetadata.getName + val podIP = pod.getStatus.getPodIP + logDebug(s"Executor pod $podName at IP $podIP was at $action.") + if (podIP != null) { + EXECUTOR_PODS_BY_IPS_LOCK.synchronized { + executorPodsByIPs -= podIP + } + } + } + } + + override def onClose(cause: KubernetesClientException): Unit = { + logDebug("Executor pod watch closed.", cause) + } + } } private object KubernetesClusterSchedulerBackend { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSchedulerImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSchedulerImpl.scala new file mode 100644 index 0000000000000..a5e126480b83d --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSchedulerImpl.scala @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.scheduler.cluster.kubernetes + +import org.apache.spark.SparkContext +import org.apache.spark.scheduler.{TaskSchedulerImpl, TaskSet, TaskSetManager} + +private[spark] class KubernetesTaskSchedulerImpl(sc: SparkContext) extends TaskSchedulerImpl(sc) { + + override def createTaskSetManager(taskSet: TaskSet, maxTaskFailures: Int): TaskSetManager = { + new KubernetesTaskSetManager(this, taskSet, maxTaskFailures) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala new file mode 100644 index 0000000000000..5cea95be382f0 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.scheduler.cluster.kubernetes + +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.scheduler.{TaskSchedulerImpl, TaskSet, TaskSetManager} + +private[spark] class KubernetesTaskSetManager( + sched: TaskSchedulerImpl, + taskSet: TaskSet, + maxTaskFailures: Int) extends TaskSetManager(sched, taskSet, maxTaskFailures) { + + /** + * Overrides the lookup to use not only the executor pod IP, but also the cluster node + * name and host IP address that the pod is running on. The base class may have populated + * the lookup target map with HDFS datanode locations if this task set reads HDFS data. + * Those datanode locations are based on cluster node names or host IP addresses. Using + * only executor pod IPs may not match them. + */ + override def getPendingTasksForHost(executorIP: String): ArrayBuffer[Int] = { + val pendingTasksExecutorIP = super.getPendingTasksForHost(executorIP) + if (pendingTasksExecutorIP.nonEmpty) { + pendingTasksExecutorIP + } else { + val backend = sched.backend.asInstanceOf[KubernetesClusterSchedulerBackend] + val pod = backend.getExecutorPodByIP(executorIP) + if (pod.nonEmpty) { + val clusterNodeName = pod.get.getSpec.getNodeName + val pendingTasksClusterNodeName = super.getPendingTasksForHost(clusterNodeName) + if (pendingTasksClusterNodeName.nonEmpty) { + logDebug(s"Got preferred task list $pendingTasksClusterNodeName for executor host " + + s"$executorIP using cluster node name $clusterNodeName") + pendingTasksClusterNodeName + } else { + val clusterNodeIP = pod.get.getStatus.getHostIP + val pendingTasksClusterNodeIP = super.getPendingTasksForHost(clusterNodeIP) + if (pendingTasksClusterNodeIP.nonEmpty) { + logDebug(s"Got preferred task list $pendingTasksClusterNodeIP for executor host " + + s"$executorIP using cluster node IP $clusterNodeIP") + } + pendingTasksClusterNodeIP + } + } else { + pendingTasksExecutorIP // Empty + } + } + } +} From eb45ae558130ef0db6da53503cc71589c8dca267 Mon Sep 17 00:00:00 2001 From: Ye Yin Date: Wed, 17 May 2017 00:10:45 +0800 Subject: [PATCH 104/156] Add parameter for driver pod name (#258) * Add parameter for driver pod name * Mark KUBERNETES_DRIVER_POD_NAME not being internal. Update docment. * Add test case for driver pod name * Diff driver pod name with appid * replace 'spark.kubernetes.driver.pod.name` with KUBERNETES_DRIVER_POD_NAME * Update readme to complete item --- docs/running-on-kubernetes.md | 7 +++++++ .../apache/spark/deploy/kubernetes/config.scala | 1 - .../deploy/kubernetes/submit/v1/Client.scala | 12 +++++++----- .../deploy/kubernetes/submit/v2/Client.scala | 6 ++++-- .../integrationtest/KubernetesV1Suite.scala | 16 ++++++++++++++++ 5 files changed, 34 insertions(+), 8 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 02933c28bbc66..be410f18b5cfc 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -350,6 +350,13 @@ from the other deployment modes. See the [configuration page](configuration.html resource. + + spark.kubernetes.driver.pod.name + (none) + + Name of the driver pod. If not set, the driver pod name is set to "spark.app.name" suffixed by the current timestamp to avoid name conflicts. + + spark.kubernetes.submission.waitAppCompletion true diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 1c8b6798bbdd5..e379b40e376fc 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -267,7 +267,6 @@ package object config extends Logging { private[spark] val KUBERNETES_DRIVER_POD_NAME = ConfigBuilder("spark.kubernetes.driver.pod.name") .doc("Name of the driver pod.") - .internal() .stringConf .createOptional diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala index 65e47ddca4bfe..a4dfe90f71a8a 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala @@ -51,6 +51,8 @@ private[spark] class Client( private val appName = sparkConf.getOption("spark.app.name") .getOrElse("spark") private val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") + private val kubernetesDriverPodName = sparkConf.get(KUBERNETES_DRIVER_POD_NAME) + .getOrElse(kubernetesAppId) private val secretName = s"$SUBMISSION_APP_SECRET_PREFIX-$kubernetesAppId" private val secretDirectory = s"$DRIVER_CONTAINER_SUBMISSION_SECRETS_BASE_DIR/$kubernetesAppId" private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) @@ -150,7 +152,7 @@ private[spark] class Client( loggingInterval) Utils.tryWithResource(kubernetesClient .pods() - .withName(kubernetesAppId) + .withName(kubernetesDriverPodName) .watch(loggingWatch)) { _ => val resourceCleanShutdownHook = ShutdownHookManager.addShutdownHook(() => kubernetesResourceCleaner.deleteAllRegisteredResourcesFromKubernetes(kubernetesClient)) @@ -247,7 +249,7 @@ private[spark] class Client( logWarning(s"Warning: Provided app id in spark.app.id as $id will be" + s" overridden as $kubernetesAppId") } - sparkConf.set(KUBERNETES_DRIVER_POD_NAME, kubernetesAppId) + sparkConf.setIfMissing(KUBERNETES_DRIVER_POD_NAME, kubernetesDriverPodName) sparkConf.set(KUBERNETES_DRIVER_SERVICE_NAME, driverService.getMetadata.getName) sparkConf.set("spark.app.id", kubernetesAppId) sparkConf.setIfMissing("spark.app.name", appName) @@ -314,7 +316,7 @@ private[spark] class Client( val podWatcher = new DriverPodReadyWatcher(podReadyFuture) Utils.tryWithResource(kubernetesClient .pods() - .withName(kubernetesAppId) + .withName(kubernetesDriverPodName) .watch(podWatcher)) { _ => Utils.tryWithResource(kubernetesClient .services() @@ -445,7 +447,7 @@ private[spark] class Client( .build() val driverPod = kubernetesClient.pods().createNew() .withNewMetadata() - .withName(kubernetesAppId) + .withName(kubernetesDriverPodName) .withLabels(driverKubernetesSelectors.asJava) .withAnnotations(customAnnotations.asJava) .endMetadata() @@ -571,7 +573,7 @@ private[spark] class Client( kubernetesClient: KubernetesClient, e: Throwable): String = { val driverPod = try { - kubernetesClient.pods().withName(kubernetesAppId).get() + kubernetesClient.pods().withName(kubernetesDriverPodName).get() } catch { case throwable: Throwable => logError(s"Timed out while waiting $driverSubmitTimeoutSecs seconds for the" + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala index 69dbfd041bb86..a70c93942ffb5 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala @@ -58,6 +58,8 @@ private[spark] class Client( private val appName = sparkConf.getOption("spark.app.name") .getOrElse("spark") private val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") + private val kubernetesDriverPodName = sparkConf.get(KUBERNETES_DRIVER_POD_NAME) + .getOrElse(kubernetesAppId) private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) private val maybeStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) private val driverMemoryMb = sparkConf.get(org.apache.spark.internal.config.DRIVER_MEMORY) @@ -123,7 +125,7 @@ private[spark] class Client( .build() val basePod = new PodBuilder() .withNewMetadata() - .withName(kubernetesAppId) + .withName(kubernetesDriverPodName) .addToLabels(allLabels.asJava) .addToAnnotations(parsedCustomAnnotations.asJava) .endMetadata() @@ -176,7 +178,7 @@ private[spark] class Client( if (resolvedFiles.nonEmpty) { resolvedSparkConf.set("spark.files", resolvedFiles.mkString(",")) } - resolvedSparkConf.set(KUBERNETES_DRIVER_POD_NAME, kubernetesAppId) + resolvedSparkConf.setIfMissing(KUBERNETES_DRIVER_POD_NAME, kubernetesDriverPodName) resolvedSparkConf.set("spark.app.id", kubernetesAppId) // We don't need this anymore since we just set the JVM options on the environment resolvedSparkConf.remove(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala index 4cbd074547915..f09339a9c3e08 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala @@ -171,6 +171,22 @@ private[spark] class KubernetesV1Suite(testBackend: IntegrationTestBackend) "Unexpected value for annotation2") } + test("Run with driver pod name") { + sparkConf.set(KUBERNETES_DRIVER_POD_NAME, "spark-pi") + new Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, + mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() + val driverPodMetadata = kubernetesTestComponents.kubernetesClient + .pods() + .withName("spark-pi") + .get() + .getMetadata() + val driverName = driverPodMetadata.getName + assert(driverName === "spark-pi", "Unexpected driver pod name.") + } + test("Enable SSL on the driver submit server") { assume(testBackend.name == MINIKUBE_TEST_BACKEND) From e9da54952ce95f192b5ed9168ec4e9b74a5b6ca5 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Wed, 17 May 2017 09:44:22 -0700 Subject: [PATCH 105/156] Dynamic allocation (#272) * dynamic allocation: shuffle service docker, yaml and test fixture * dynamic allocation: changes to spark-core * dynamic allocation: tests * dynamic allocation: docs * dynamic allocation: kubernetes allocator and executor accounting * dynamic allocation: shuffle service, node caching --- conf/kubernetes-shuffle-service.yaml | 53 +++++ .../CoarseGrainedExecutorBackend.scala | 2 +- .../cluster/CoarseGrainedClusterMessage.scala | 2 +- .../CoarseGrainedSchedulerBackend.scala | 2 +- .../apache/spark/storage/BlockManager.scala | 10 +- docs/running-on-kubernetes.md | 66 +++++- resource-managers/kubernetes/README.md | 6 +- .../kubernetes/ConfigurationUtils.scala | 41 ++++ .../spark/deploy/kubernetes/config.scala | 45 ++++ .../spark/deploy/kubernetes/constants.scala | 1 + .../KubernetesClusterSchedulerBackend.scala | 224 ++++++++++++++---- .../cluster/kubernetes/ShufflePodCache.scala | 91 +++++++ .../main/docker/shuffle-service/Dockerfile | 39 +++ .../integrationtest/jobs/GroupByTest.scala | 54 +++++ .../integrationtest/KubernetesSuite.scala | 4 + .../integrationtest/KubernetesV2Suite.scala | 99 +++++++- .../docker/SparkDockerImageBuilder.scala | 2 + 17 files changed, 682 insertions(+), 59 deletions(-) create mode 100644 conf/kubernetes-shuffle-service.yaml create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/ShufflePodCache.scala create mode 100644 resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile create mode 100644 resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/GroupByTest.scala diff --git a/conf/kubernetes-shuffle-service.yaml b/conf/kubernetes-shuffle-service.yaml new file mode 100644 index 0000000000000..3aeb1f54f301c --- /dev/null +++ b/conf/kubernetes-shuffle-service.yaml @@ -0,0 +1,53 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + labels: + app: spark-shuffle-service + spark-version: 2.1.0 + name: shuffle +spec: + template: + metadata: + labels: + app: spark-shuffle-service + spark-version: 2.1.0 + spec: + volumes: + - name: temp-volume + hostPath: + path: '/var/tmp' # change this path according to your cluster configuration. + containers: + - name: shuffle + # This is an official image that is built + # from the dockerfiles/shuffle directory + # in the spark distribution. + image: kubespark/spark-shuffle:v2.1.0-kubernetes-0.1.0-alpha.3 + volumeMounts: + - mountPath: '/tmp' + name: temp-volume + # more volumes can be mounted here. + # The spark job must be configured to use these + # mounts using the configuration: + # spark.kubernetes.shuffle.dir=,,... + resources: + requests: + cpu: "1" + limits: + cpu: "1" \ No newline at end of file diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala index 92a27902c6696..f0e13aa6bf109 100644 --- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala +++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala @@ -200,7 +200,7 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging { new SecurityManager(executorConf), clientMode = true) val driver = fetcher.setupEndpointRefByURI(driverUrl) - val cfg = driver.askWithRetry[SparkAppConfig](RetrieveSparkAppConfig) + val cfg = driver.askWithRetry[SparkAppConfig](RetrieveSparkAppConfig(executorId)) val props = cfg.sparkProperties ++ Seq[(String, String)](("spark.app.id", appId)) fetcher.shutdown() diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala index 0a4f19d76073e..2406999f9ee92 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala @@ -28,7 +28,7 @@ private[spark] sealed trait CoarseGrainedClusterMessage extends Serializable private[spark] object CoarseGrainedClusterMessages { - case object RetrieveSparkAppConfig extends CoarseGrainedClusterMessage + case class RetrieveSparkAppConfig(executorId: String) extends CoarseGrainedClusterMessage case class SparkAppConfig( sparkProperties: Seq[(String, String)], diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala index 3452487e72e88..89e59353de845 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala @@ -206,7 +206,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp removeExecutor(executorId, reason) context.reply(true) - case RetrieveSparkAppConfig => + case RetrieveSparkAppConfig(executorId) => val reply = SparkAppConfig(sparkProperties, SparkEnv.get.securityManager.getIOEncryptionKey()) context.reply(reply) diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index 04521c9159eac..18f7d135acdd2 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -184,8 +184,14 @@ private[spark] class BlockManager( blockManagerId = if (idFromMaster != null) idFromMaster else id shuffleServerId = if (externalShuffleServiceEnabled) { - logInfo(s"external shuffle service port = $externalShuffleServicePort") - BlockManagerId(executorId, blockTransferService.hostName, externalShuffleServicePort) + val shuffleServerHostName = if (blockManagerId.isDriver) { + blockTransferService.hostName + } else { + conf.get("spark.shuffle.service.host", blockTransferService.hostName) + } + logInfo(s"external shuffle service host = $shuffleServerHostName, " + + s"port = $externalShuffleServicePort") + BlockManagerId(executorId, shuffleServerHostName, externalShuffleServicePort) } else { blockManagerId } diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index be410f18b5cfc..5b7bb6cc612c5 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -189,7 +189,7 @@ from the other deployment modes. See the [configuration page](configuration.html The namespace that will be used for running the driver and executor pods. When using spark-submit in cluster mode, this can also be passed to spark-submit via the - --kubernetes-namespace command line argument. The namespace must already exist. + --kubernetes-namespace command line argument. @@ -208,6 +208,37 @@ from the other deployment modes. See the [configuration page](configuration.html Docker tag format. + + spark.kubernetes.shuffle.namespace + default + + Namespace in which the shuffle service pods are present. The shuffle service must be + created in the cluster prior to attempts to use it. + + + + spark.kubernetes.shuffle.labels + (none) + + Labels that will be used to look up shuffle service pods. This should be a comma-separated list of label key-value pairs, + where each label is in the format key=value. The labels chosen must be such that + they match exactly one shuffle service pod on each node that executors are launched. + + + + spark.kubernetes.allocation.batch.size + 5 + + Number of pods to launch at once in each round of executor pod allocation. + + + + spark.kubernetes.allocation.batch.delay + 1 + + Number of seconds to wait between each round of executor pod allocation. + + spark.kubernetes.authenticate.submission.caCertFile (none) @@ -389,10 +420,41 @@ from the other deployment modes. See the [configuration page](configuration.html +## Dynamic Executor Scaling + +Spark on Kubernetes supports Dynamic Allocation with cluster mode. This mode requires running +an external shuffle service. This is typically a [daemonset](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/) +with a provisioned [hostpath](https://kubernetes.io/docs/concepts/storage/volumes/#hostpath) volume. +This shuffle service may be shared by executors belonging to different SparkJobs. Using Spark with dynamic allocation +on Kubernetes assumes that a cluster administrator has set up one or more shuffle-service daemonsets in the cluster. + +A sample configuration file is provided in `conf/kubernetes-shuffle-service.yaml` which can be customized as needed +for a particular cluster. It is important to note that `spec.template.metadata.labels` are setup appropriately for the shuffle +service because there may be multiple shuffle service instances running in a cluster. The labels give us a way to target a particular +shuffle service. + +For example, if the shuffle service we want to use is in the default namespace, and +has pods with labels `app=spark-shuffle-service` and `spark-version=2.1.0`, we can +use those tags to target that particular shuffle service at job launch time. In order to run a job with dynamic allocation enabled, +the command may then look like the following: + + bin/spark-submit \ + --deploy-mode cluster \ + --class org.apache.spark.examples.GroupByTest \ + --master k8s://: \ + --kubernetes-namespace default \ + --conf spark.app.name=group-by-test \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:latest \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:latest \ + --conf spark.dynamicAllocation.enabled=true \ + --conf spark.shuffle.service.enabled=true \ + --conf spark.kubernetes.shuffle.namespace=default \ + --conf spark.kubernetes.shuffle.labels="app=spark-shuffle-service,spark-version=2.1.0" \ + examples/jars/spark_examples_2.11-2.2.0.jar 10 400000 2 + ## Current Limitations Running Spark on Kubernetes is currently an experimental feature. Some restrictions on the current implementation that should be lifted in the future include: -* Applications can only use a fixed number of executors. Dynamic allocation is not supported. * Applications can only run in cluster mode. * Only Scala and Java applications can be run. diff --git a/resource-managers/kubernetes/README.md b/resource-managers/kubernetes/README.md index 734c29947b6d9..fd1ad29eb795d 100644 --- a/resource-managers/kubernetes/README.md +++ b/resource-managers/kubernetes/README.md @@ -14,10 +14,10 @@ important matters to keep in mind when developing this feature. # Building Spark with Kubernetes Support -To build Spark with Kubernetes support, use the `kubernetes` profile when invoking Maven. +To build Spark with Kubernetes support, use the `kubernetes` profile when invoking Maven. For example, to simply compile +the Kubernetes core implementation module along with its dependencies: - git checkout branch-2.1-kubernetes - build/mvn package -Pkubernetes -DskipTests + build/mvn compile -Pkubernetes -pl resource-managers/kubernetes/core -am -DskipTests To build a distribution of Spark with Kubernetes support, use the `dev/make-distribution.sh` script, and add the `kubernetes` profile as part of the build arguments. Any other build arguments can be specified as one would expect when diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala new file mode 100644 index 0000000000000..f3bd598556019 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.kubernetes + +import org.apache.spark.SparkException + +object ConfigurationUtils { + def parseKeyValuePairs( + maybeKeyValues: Option[String], + configKey: String, + keyValueType: String): Map[String, String] = { + + maybeKeyValues.map(keyValues => { + keyValues.split(",").map(_.trim).filterNot(_.isEmpty).map(keyValue => { + keyValue.split("=", 2).toSeq match { + case Seq(k, v) => + (k, v) + case _ => + throw new SparkException(s"Custom $keyValueType set by $configKey must be a" + + s" comma-separated list of key-value pairs, with format =." + + s" Got value: $keyValue. All values: $keyValues") + } + }).toMap + }).getOrElse(Map.empty[String, String]) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index e379b40e376fc..09b2d38cb8e38 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -157,6 +157,13 @@ package object config extends Logging { .stringConf .createOptional + private[spark] val SPARK_SHUFFLE_SERVICE_HOST = + ConfigBuilder("spark.shuffle.service.host") + .doc("Host for Spark Shuffle Service") + .internal() + .stringConf + .createOptional + // Note that while we set a default for this when we start up the // scheduler, the specific default value is dynamically determined // based on the executor memory. @@ -270,6 +277,44 @@ package object config extends Logging { .stringConf .createOptional + private[spark] val KUBERNETES_SHUFFLE_NAMESPACE = + ConfigBuilder("spark.kubernetes.shuffle.namespace") + .doc("Namespace of the shuffle service") + .stringConf + .createWithDefault("default") + + private[spark] val KUBERNETES_SHUFFLE_SVC_IP = + ConfigBuilder("spark.kubernetes.shuffle.ip") + .doc("This setting is for debugging only. Setting this " + + "allows overriding the IP that the executor thinks its colocated " + + "shuffle service is on") + .stringConf + .createOptional + + private[spark] val KUBERNETES_SHUFFLE_LABELS = + ConfigBuilder("spark.kubernetes.shuffle.labels") + .doc("Labels to identify the shuffle service") + .stringConf + .createOptional + + private[spark] val KUBERNETES_SHUFFLE_DIR = + ConfigBuilder("spark.kubernetes.shuffle.dir") + .doc("Path to the shared shuffle directories.") + .stringConf + .createOptional + + private[spark] val KUBERNETES_ALLOCATION_BATCH_SIZE = + ConfigBuilder("spark.kubernetes.allocation.batch.size") + .doc("Number of pods to launch at once in each round of dynamic allocation. ") + .intConf + .createWithDefault(5) + + private[spark] val KUBERNETES_ALLOCATION_BATCH_DELAY = + ConfigBuilder("spark.kubernetes.allocation.batch.delay") + .doc("Number of seconds to wait between each round of executor allocation. ") + .longConf + .createWithDefault(1) + private[spark] val DRIVER_SERVICE_MANAGER_TYPE = ConfigBuilder("spark.kubernetes.driver.serviceManagerType") .doc("A tag indicating which class to use for creating the Kubernetes service and" + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index f82cb88b4c622..27e47eb61933f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -113,4 +113,5 @@ package object constants { s"$INIT_CONTAINER_PROPERTIES_FILE_MOUNT_PATH/$INIT_CONTAINER_PROPERTIES_FILE_NAME" private[spark] val DOWNLOAD_JARS_VOLUME_NAME = "download-jars" private[spark] val DOWNLOAD_FILES_VOLUME_NAME = "download-files" + private[spark] val DEFAULT_SHUFFLE_MOUNT_NAME = "shuffle" } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index a2294a6766980..669a073b1fab6 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -17,22 +17,25 @@ package org.apache.spark.scheduler.cluster.kubernetes import java.io.Closeable +import java.util.concurrent.TimeUnit import java.util.concurrent.atomic.{AtomicInteger, AtomicLong, AtomicReference} import scala.collection.JavaConverters._ import scala.collection.mutable import scala.concurrent.{ExecutionContext, Future} -import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, - EnvVarSourceBuilder, Pod, QuantityBuilder} +import io.fabric8.kubernetes.api.model._ import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action +import org.apache.commons.io.FilenameUtils -import org.apache.spark.{SparkContext, SparkException} +import org.apache.spark.{SparkContext, SparkEnv, SparkException} +import org.apache.spark.deploy.kubernetes.ConfigurationUtils import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.rpc.RpcEndpointAddress +import org.apache.spark.rpc.{RpcCallContext, RpcEndpointAddress, RpcEnv} import org.apache.spark.scheduler.TaskSchedulerImpl +import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.{RetrieveSparkAppConfig, SparkAppConfig} import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend import org.apache.spark.util.{ThreadUtils, Utils} @@ -49,6 +52,7 @@ private[spark] class KubernetesClusterSchedulerBackend( private val EXECUTOR_PODS_BY_IPS_LOCK = new Object private val executorPodsByIPs = new mutable.HashMap[String, Pod] // Indexed by executor IP addrs. + private var shufflePodCache: Option[ShufflePodCache] = None private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) private val kubernetesNamespace = conf.get(KUBERNETES_NAMESPACE) private val executorPort = conf.getInt("spark.executor.port", DEFAULT_STATIC_PORT) @@ -88,6 +92,28 @@ private[spark] class KubernetesClusterSchedulerBackend( throw new SparkException(s"Executor cannot find driver pod", throwable) } + private val shuffleServiceConfig: Option[ShuffleServiceConfig] = + if (Utils.isDynamicAllocationEnabled(sc.conf)) { + val shuffleNamespace = conf.get(KUBERNETES_SHUFFLE_NAMESPACE) + val parsedShuffleLabels = ConfigurationUtils.parseKeyValuePairs( + conf.get(KUBERNETES_SHUFFLE_LABELS), KUBERNETES_SHUFFLE_LABELS.key, + "shuffle-labels") + if (parsedShuffleLabels.size == 0) { + throw new SparkException(s"Dynamic allocation enabled " + + s"but no ${KUBERNETES_SHUFFLE_LABELS.key} specified") + } + + val shuffleDirs = conf.get(KUBERNETES_SHUFFLE_DIR).map { + _.split(",") + }.getOrElse(Utils.getConfiguredLocalDirs(conf)) + Some( + ShuffleServiceConfig(shuffleNamespace, + parsedShuffleLabels, + shuffleDirs)) + } else { + None + } + override val minRegisteredRatio = if (conf.getOption("spark.scheduler.minRegisteredResourcesRatio").isEmpty) { 0.8 @@ -105,6 +131,38 @@ private[spark] class KubernetesClusterSchedulerBackend( private val initialExecutors = getInitialTargetExecutorNumber(1) + private val podAllocationInterval = conf.get(KUBERNETES_ALLOCATION_BATCH_DELAY) + require(podAllocationInterval > 0, s"Allocation batch delay " + + s"${KUBERNETES_ALLOCATION_BATCH_DELAY} " + + s"is ${podAllocationInterval}, should be a positive integer") + + private val podAllocationSize = conf.get(KUBERNETES_ALLOCATION_BATCH_SIZE) + require(podAllocationSize > 0, s"Allocation batch size " + + s"${KUBERNETES_ALLOCATION_BATCH_SIZE} " + + s"is ${podAllocationSize}, should be a positive integer") + + private val allocator = ThreadUtils + .newDaemonSingleThreadScheduledExecutor("kubernetes-pod-allocator") + + private val allocatorRunnable: Runnable = new Runnable { + override def run(): Unit = { + if (totalRegisteredExecutors.get() < runningExecutorPods.size) { + logDebug("Waiting for pending executors before scaling") + } else if (totalExpectedExecutors.get() <= runningExecutorPods.size) { + logDebug("Maximum allowed executor limit reached. Not scaling up further.") + } else { + RUNNING_EXECUTOR_PODS_LOCK.synchronized { + for (i <- 0 until math.min( + totalExpectedExecutors.get - runningExecutorPods.size, podAllocationSize)) { + runningExecutorPods += allocateNewExecutorPod() + logInfo( + s"Requesting a new executor, total executors is now ${runningExecutorPods.size}") + } + } + } + } + } + private def getInitialTargetExecutorNumber(defaultNumExecutors: Int = 1): Int = { if (Utils.isDynamicAllocationEnabled(conf)) { val minNumExecutors = conf.getInt("spark.dynamicAllocation.minExecutors", 0) @@ -118,6 +176,7 @@ private[spark] class KubernetesClusterSchedulerBackend( } else { conf.getInt("spark.executor.instances", defaultNumExecutors) } + } override def applicationId(): String = conf.get("spark.app.id", super.applicationId()) @@ -130,12 +189,25 @@ private[spark] class KubernetesClusterSchedulerBackend( super.start() executorWatchResource.set(kubernetesClient.pods().withLabel(SPARK_APP_ID_LABEL, applicationId()) .watch(new ExecutorPodsWatcher())) + + allocator.scheduleWithFixedDelay( + allocatorRunnable, 0, podAllocationInterval, TimeUnit.SECONDS) + if (!Utils.isDynamicAllocationEnabled(sc.conf)) { doRequestTotalExecutors(initialExecutors) + } else { + shufflePodCache = shuffleServiceConfig + .map { config => new ShufflePodCache( + kubernetesClient, config.shuffleNamespace, config.shuffleLabels) } + shufflePodCache.foreach(_.start()) } } override def stop(): Unit = { + // stop allocation of new resources and caches. + allocator.shutdown() + shufflePodCache.foreach(_.stop()) + // send stop message to executors so they shut down cleanly super.stop() @@ -214,37 +286,60 @@ private[spark] class KubernetesClusterSchedulerBackend( .withContainerPort(port._2) .build() }) + + val basePodBuilder = new PodBuilder() + .withNewMetadata() + .withName(name) + .withLabels(selectors) + .withOwnerReferences() + .addNewOwnerReference() + .withController(true) + .withApiVersion(driverPod.getApiVersion) + .withKind(driverPod.getKind) + .withName(driverPod.getMetadata.getName) + .withUid(driverPod.getMetadata.getUid) + .endOwnerReference() + .endMetadata() + .withNewSpec() + .withHostname(hostname) + .addNewContainer() + .withName(s"executor") + .withImage(executorDockerImage) + .withImagePullPolicy("IfNotPresent") + .withNewResources() + .addToRequests("memory", executorMemoryQuantity) + .addToLimits("memory", executorMemoryLimitQuantity) + .addToRequests("cpu", executorCpuQuantity) + .addToLimits("cpu", executorCpuQuantity) + .endResources() + .withEnv(requiredEnv.asJava) + .withPorts(requiredPorts.asJava) + .endContainer() + .endSpec() + + val resolvedPodBuilder = shuffleServiceConfig + .map { config => + config.shuffleDirs.foldLeft(basePodBuilder) { (builder, dir) => + builder + .editSpec() + .addNewVolume() + .withName(FilenameUtils.getBaseName(dir)) + .withNewHostPath() + .withPath(dir) + .endHostPath() + .endVolume() + .editFirstContainer() + .addNewVolumeMount() + .withName(FilenameUtils.getBaseName(dir)) + .withMountPath(dir) + .endVolumeMount() + .endContainer() + .endSpec() + } + }.getOrElse(basePodBuilder) + try { - (executorId, kubernetesClient.pods().createNew() - .withNewMetadata() - .withName(name) - .withLabels(selectors) - .withOwnerReferences() - .addNewOwnerReference() - .withController(true) - .withApiVersion(driverPod.getApiVersion) - .withKind(driverPod.getKind) - .withName(driverPod.getMetadata.getName) - .withUid(driverPod.getMetadata.getUid) - .endOwnerReference() - .endMetadata() - .withNewSpec() - .withHostname(hostname) - .addNewContainer() - .withName(s"executor") - .withImage(executorDockerImage) - .withImagePullPolicy("IfNotPresent") - .withNewResources() - .addToRequests("memory", executorMemoryQuantity) - .addToLimits("memory", executorMemoryLimitQuantity) - .addToRequests("cpu", executorCpuQuantity) - .addToLimits("cpu", executorCpuQuantity) - .endResources() - .withEnv(requiredEnv.asJava) - .withPorts(requiredPorts.asJava) - .endContainer() - .endSpec() - .done()) + (executorId, kubernetesClient.pods().create(resolvedPodBuilder.build())) } catch { case throwable: Throwable => logError("Failed to allocate executor pod.", throwable) @@ -252,18 +347,13 @@ private[spark] class KubernetesClusterSchedulerBackend( } } + override def createDriverEndpoint( + properties: Seq[(String, String)]): DriverEndpoint = { + new KubernetesDriverEndpoint(rpcEnv, properties) + } + override def doRequestTotalExecutors(requestedTotal: Int): Future[Boolean] = Future[Boolean] { - RUNNING_EXECUTOR_PODS_LOCK.synchronized { - if (requestedTotal > totalExpectedExecutors.get) { - logInfo(s"Requesting ${requestedTotal - totalExpectedExecutors.get}" - + s" additional executors, expecting total $requestedTotal and currently" + - s" expected ${totalExpectedExecutors.get}") - for (i <- 0 until (requestedTotal - totalExpectedExecutors.get)) { - runningExecutorPods += allocateNewExecutorPod() - } - } - totalExpectedExecutors.set(requestedTotal) - } + totalExpectedExecutors.set(requestedTotal) true } @@ -313,6 +403,50 @@ private[spark] class KubernetesClusterSchedulerBackend( logDebug("Executor pod watch closed.", cause) } } + + private class KubernetesDriverEndpoint( + rpcEnv: RpcEnv, + sparkProperties: Seq[(String, String)]) + extends DriverEndpoint(rpcEnv, sparkProperties) { + override def receiveAndReply( + context: RpcCallContext): PartialFunction[Any, Unit] = { + new PartialFunction[Any, Unit]() { + override def isDefinedAt(msg: Any): Boolean = { + msg match { + case RetrieveSparkAppConfig(executorId) => + Utils.isDynamicAllocationEnabled(sc.conf) + case _ => false + } + } + + override def apply(msg: Any): Unit = { + msg match { + case RetrieveSparkAppConfig(executorId) => + RUNNING_EXECUTOR_PODS_LOCK.synchronized { + var resolvedProperties = sparkProperties + val runningExecutorPod = kubernetesClient + .pods() + .withName(runningExecutorPods(executorId).getMetadata.getName) + .get() + val nodeName = runningExecutorPod.getSpec.getNodeName + val shufflePodIp = shufflePodCache.get.getShufflePodForExecutor(nodeName) + resolvedProperties = resolvedProperties ++ Seq( + (SPARK_SHUFFLE_SERVICE_HOST.key, shufflePodIp)) + + val reply = SparkAppConfig( + resolvedProperties, + SparkEnv.get.securityManager.getIOEncryptionKey()) + context.reply(reply) + } + } + } + }.orElse(super.receiveAndReply(context)) + } + } + + case class ShuffleServiceConfig(shuffleNamespace: String, + shuffleLabels: Map[String, String], + shuffleDirs: Seq[String]) } private object KubernetesClusterSchedulerBackend { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/ShufflePodCache.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/ShufflePodCache.scala new file mode 100644 index 0000000000000..53b4e745ce7c7 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/ShufflePodCache.scala @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.scheduler.cluster.kubernetes + +import scala.collection.JavaConverters._ + +import io.fabric8.kubernetes.api.model.Pod +import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watch, Watcher} +import io.fabric8.kubernetes.client.Watcher.Action +import io.fabric8.kubernetes.client.internal.readiness.Readiness + +import org.apache.spark.SparkException +import org.apache.spark.internal.Logging + +private[spark] class ShufflePodCache ( + client: KubernetesClient, + dsNamespace: String, + dsLabels: Map[String, String]) extends Logging { + + private var shufflePodCache = scala.collection.mutable.Map[String, String]() + private var watcher: Watch = _ + + def start(): Unit = { + // seed the initial cache. + val pods = client.pods().withLabels(dsLabels.asJava).list() + pods.getItems.asScala.foreach { + pod => + if (Readiness.isReady(pod)) { + addShufflePodToCache(pod) + } else { + logWarning(s"Found unready shuffle pod ${pod.getMetadata.getName} " + + s"on node ${pod.getSpec.getNodeName}") + } + } + + watcher = client + .pods() + .withLabels(dsLabels.asJava) + .watch(new Watcher[Pod] { + override def eventReceived(action: Watcher.Action, p: Pod): Unit = { + action match { + case Action.DELETED | Action.ERROR => + shufflePodCache.remove(p.getSpec.getNodeName) + case Action.ADDED | Action.MODIFIED if Readiness.isReady(p) => + addShufflePodToCache(p) + } + } + override def onClose(e: KubernetesClientException): Unit = {} + }) + } + + private def addShufflePodToCache(pod: Pod): Unit = { + if (shufflePodCache.contains(pod.getSpec.getNodeName)) { + val registeredPodName = shufflePodCache.get(pod.getSpec.getNodeName).get + logError(s"Ambiguous specification of shuffle service pod. " + + s"Found multiple matching pods: ${pod.getMetadata.getName}, " + + s"${registeredPodName} on ${pod.getSpec.getNodeName}") + + throw new SparkException(s"Ambiguous specification of shuffle service pod. " + + s"Found multiple matching pods: ${pod.getMetadata.getName}, " + + s"${registeredPodName} on ${pod.getSpec.getNodeName}") + } else { + shufflePodCache(pod.getSpec.getNodeName) = pod.getStatus.getPodIP + } + } + + def stop(): Unit = { + watcher.close() + } + + def getShufflePodForExecutor(executorNode: String): String = { + shufflePodCache.get(executorNode) + .getOrElse(throw new SparkException(s"Unable to find shuffle pod on node $executorNode")) + } +} + diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile new file mode 100644 index 0000000000000..630d3408519ac --- /dev/null +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile @@ -0,0 +1,39 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM openjdk:8-alpine + +# If this docker file is being used in the context of building your images from a Spark distribution, the docker build +# command should be invoked from the top level directory of the Spark distribution. E.g.: +# docker build -t spark-shuffle:latest -f dockerfiles/shuffle/Dockerfile . + +RUN apk upgrade --update +RUN apk add --update bash +RUN mkdir -p /opt/spark +RUN touch /opt/spark/RELEASE + +ADD jars /opt/spark/jars +ADD examples /opt/spark/examples +ADD bin /opt/spark/bin +ADD sbin /opt/spark/sbin +ADD conf /opt/spark/conf + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark + +CMD ["/bin/sh","-c","/opt/spark/bin/spark-class org.apache.spark.deploy.ExternalShuffleService 1"] \ No newline at end of file diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/GroupByTest.scala b/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/GroupByTest.scala new file mode 100644 index 0000000000000..fe47d42485b24 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/GroupByTest.scala @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest.jobs + +import java.util.Random + +import org.apache.spark.sql.SparkSession + +object GroupByTest { + def main(args: Array[String]) { + val spark = SparkSession + .builder + .appName("GroupBy Test") + .getOrCreate() + + val numMappers = if (args.length > 0) args(0).toInt else 5 + val numKVPairs = if (args.length > 1) args(1).toInt else 200000 + val valSize = if (args.length > 2) args(2).toInt else 2 + val numReducers = if (args.length > 3) args(3).toInt else numMappers + + val pairs1 = spark.sparkContext.parallelize(0 until numMappers, numMappers).flatMap { p => + val ranGen = new Random + val arr1 = new Array[(Int, Array[Byte])](numKVPairs) + for (i <- 0 until numKVPairs) { + val byteArr = new Array[Byte](valSize) + ranGen.nextBytes(byteArr) + arr1(i) = (ranGen.nextInt(Int.MaxValue), byteArr) + } + arr1 + }.cache() + // Enforce that everything has been calculated and in cache + pairs1.count() + + // scalastyle:off println + println("The Result is", pairs1.groupByKey(numReducers).count()) + // scalastyle:on println + spark.stop() + } +} + diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index bd5ff7a005d46..56fcf692b8ff7 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -67,4 +67,8 @@ private[spark] object KubernetesSuite { ".integrationtest.jobs.SparkPiWithInfiniteWait" val FILE_EXISTENCE_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + ".integrationtest.jobs.FileExistenceTest" + val GROUP_BY_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + + ".integrationtest.jobs.GroupByTest" + + case class ShuffleNotReadyException() extends Exception } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala index 8fa7cbd52ee83..ae02de7937c6a 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala @@ -18,6 +18,10 @@ package org.apache.spark.deploy.kubernetes.integrationtest import java.util.UUID +import scala.collection.JavaConverters._ + +import com.google.common.collect.ImmutableList +import io.fabric8.kubernetes.client.internal.readiness.Readiness import org.scalatest.{BeforeAndAfter, DoNotDiscover} import org.scalatest.concurrent.Eventually @@ -27,7 +31,10 @@ import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackend import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND +import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 +import org.apache.spark.deploy.kubernetes.submit.v1.Client import org.apache.spark.deploy.kubernetes.submit.v2.{MountedDependencyManagerProviderImpl, SubmissionKubernetesClientProviderImpl} +import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} @DoNotDiscover private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) @@ -60,7 +67,7 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) assume(testBackend.name == MINIKUBE_TEST_BACKEND) launchStagingServer(SSLOptions()) - runSparkAppAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) + runSparkPiAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) } test("Enable SSL on the submission server") { @@ -84,7 +91,7 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) keyStorePassword = Some("keyStore"), keyPassword = Some("key"), trustStorePassword = Some("trustStore"))) - runSparkAppAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) + runSparkPiAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) } test("Use container-local resources without the resource staging server") { @@ -93,7 +100,22 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) sparkConf.setJars(Seq( KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) - runSparkAppAndVerifyCompletion(KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE) + runSparkPiAndVerifyCompletion(KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE) + } + + test("Dynamic executor scaling basic test") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + + launchStagingServer(SSLOptions()) + createShuffleServiceDaemonSet() + + sparkConf.setJars(Seq(KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) + sparkConf.set("spark.dynamicAllocation.enabled", "true") + sparkConf.set("spark.shuffle.service.enabled", "true") + sparkConf.set("spark.kubernetes.shuffle.labels", "app=spark-shuffle-service") + sparkConf.set("spark.kubernetes.shuffle.namespace", kubernetesTestComponents.namespace) + sparkConf.set("spark.app.name", "group-by-test") + runSparkGroupByTestAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) } private def launchStagingServer(resourceStagingServerSslOptions: SSLOptions): Unit = { @@ -111,7 +133,7 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) s"${Minikube.getMinikubeIp}:$resourceStagingServerPort") } - private def runSparkAppAndVerifyCompletion(appResource: String): Unit = { + private def runSparkPiAndVerifyCompletion(appResource: String): Unit = { val client = new org.apache.spark.deploy.kubernetes.submit.v2.Client( sparkConf = sparkConf, mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, @@ -136,4 +158,73 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) .contains("Pi is roughly 3"), "The application did not compute the value of pi.") } } + + private def runSparkGroupByTestAndVerifyCompletion(appResource: String): Unit = { + val client = new org.apache.spark.deploy.kubernetes.submit.v2.Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.GROUP_BY_MAIN_CLASS, + appArgs = Array.empty[String], + mainAppResource = appResource, + kubernetesClientProvider = + new SubmissionKubernetesClientProviderImpl(sparkConf), + mountedDependencyManagerProvider = + new MountedDependencyManagerProviderImpl(sparkConf)) + client.run() + val driverPod = kubernetesTestComponents.kubernetesClient + .pods() + .withLabel("spark-app-locator", APP_LOCATOR_LABEL) + .list() + .getItems + .get(0) + Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { + assert(kubernetesTestComponents.kubernetesClient + .pods() + .withName(driverPod.getMetadata.getName) + .getLog + .contains("The Result is"), "The application did not complete.") + } + } + + private def createShuffleServiceDaemonSet(): Unit = { + val ds = kubernetesTestComponents.kubernetesClient.extensions().daemonSets() + .createNew() + .withNewMetadata() + .withName("shuffle") + .endMetadata() + .withNewSpec() + .withNewTemplate() + .withNewMetadata() + .withLabels(Map("app" -> "spark-shuffle-service").asJava) + .endMetadata() + .withNewSpec() + .addNewVolume() + .withName("shuffle-dir") + .withNewHostPath() + .withPath("/tmp") + .endHostPath() + .endVolume() + .addNewContainer() + .withName("shuffle") + .withImage("spark-shuffle:latest") + .withImagePullPolicy("IfNotPresent") + .addNewVolumeMount() + .withName("shuffle-dir") + .withMountPath("/tmp") + .endVolumeMount() + .endContainer() + .endSpec() + .endTemplate() + .endSpec() + .done() + + // wait for daemonset to become available. + Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { + val pods = kubernetesTestComponents.kubernetesClient.pods() + .withLabel("app", "spark-shuffle-service").list().getItems() + + if (pods.size() == 0 || Readiness.isReady(pods.get(0))) { + throw KubernetesSuite.ShuffleNotReadyException() + } + } + } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala index d807c4d81009b..52b8c7d7359a6 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala @@ -31,6 +31,7 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, private val DRIVER_V1_DOCKER_FILE = "dockerfiles/driver/Dockerfile" private val DRIVER_V2_DOCKER_FILE = "dockerfiles/driver-v2/Dockerfile" private val EXECUTOR_DOCKER_FILE = "dockerfiles/executor/Dockerfile" + private val SHUFFLE_SERVICE_DOCKER_FILE = "dockerfiles/shuffle-service/Dockerfile" private val DRIVER_INIT_DOCKER_FILE = "dockerfiles/driver-init/Dockerfile" private val STAGING_SERVER_DOCKER_FILE = "dockerfiles/resource-staging-server/Dockerfile" private val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) @@ -60,6 +61,7 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, Eventually.eventually(TIMEOUT, INTERVAL) { dockerClient.ping() } buildImage("spark-driver", DRIVER_V1_DOCKER_FILE) buildImage("spark-executor", EXECUTOR_DOCKER_FILE) + buildImage("spark-shuffle", SHUFFLE_SERVICE_DOCKER_FILE) buildImage("spark-driver-v2", DRIVER_V2_DOCKER_FILE) buildImage("spark-resource-staging-server", STAGING_SERVER_DOCKER_FILE) buildImage("spark-driver-init", DRIVER_INIT_DOCKER_FILE) From f005268e7809b75b3d29726946fab9aa127cd45a Mon Sep 17 00:00:00 2001 From: mccheah Date: Wed, 17 May 2017 11:55:23 -0700 Subject: [PATCH 106/156] Download remotely-located resources on driver and executor startup via init-container (#251) * Download remotely-located resources on driver startup. Use init-container in executors. * FIx owner reference slightly * Clean up config * Don't rely too heavily on conventions that can change * Fix flaky test * Tidy up file resolver * Whitespace arrangement * Indentation change * Fix more indentation * Consolidate init container component providers * Minor method signature and comment changes * Rename class for consistency * Resolve conflicts * Fix flaky test * Add some tests and some refactoring. * Make naming consistent for Staged -> Submitted * Add unit test for the submission client. * Refine expectations * Rename variables and fix typos * Address more comments. Remove redundant SingleKeyConfigMap. * Minor test adjustments. * add another test * Fix conflicts. --- ...nerResourceStagingServerSecretPlugin.scala | 62 ++ .../SparkPodInitContainerBootstrap.scala | 103 ++++ .../spark/deploy/kubernetes/config.scala | 117 +++- .../spark/deploy/kubernetes/constants.scala | 37 +- .../submit}/KubernetesFileUtils.scala | 5 +- .../deploy/kubernetes/submit/v1/Client.scala | 3 +- ...DriverSubmitSslConfigurationProvider.scala | 3 +- .../deploy/kubernetes/submit/v2/Client.scala | 188 +++--- .../v2/ContainerLocalizedFilesResolver.scala | 68 +++ ...riverInitContainerComponentsProvider.scala | 155 +++++ .../ExecutorInitContainerConfiguration.scala | 47 ++ .../submit/v2/InitContainerUtil.scala | 49 ++ .../v2/MountedDependencyManagerImpl.scala | 324 ----------- .../v2/MountedDependencyManagerProvider.scala | 58 -- ...opertiesConfigMapFromScalaMapBuilder.scala | 48 ++ .../SparkInitContainerConfigMapBuilder.scala | 69 +++ ...dDependencyInitContainerConfigPlugin.scala | 69 +++ .../v2/SubmittedDependencySecretBuilder.scala | 66 +++ .../v2/SubmittedDependencyUploaderImpl.scala | 116 ++++ .../submit/v2/SubmittedResources.scala} | 17 +- .../v1/KubernetesSparkRestServer.scala | 1 + ...SparkDependencyDownloadInitContainer.scala | 181 ++++-- .../v2/ResourceStagingService.scala | 3 +- .../v2/ResourceStagingServiceImpl.scala | 5 +- .../v2/ResourceStagingServiceRetrofit.scala | 4 +- .../kubernetes/KubernetesClusterManager.scala | 49 +- .../KubernetesClusterSchedulerBackend.scala | 35 +- .../SparkPodInitContainerBootstrapSuite.scala | 164 ++++++ ...dencyInitContainerVolumesPluginSuite.scala | 60 ++ .../kubernetes/submit/v2/ClientV2Suite.scala | 542 +++++++++--------- ...ContainerLocalizedFilesResolverSuite.scala | 69 +++ ...cutorInitContainerConfigurationSuite.scala | 56 ++ .../v2/MountedDependencyManagerSuite.scala | 323 ----------- ...rkInitContainerConfigMapBuilderSuite.scala | 101 ++++ ...ndencyInitContainerConfigPluginSuite.scala | 83 +++ ...ubmittedDependencySecretBuilderSuite.scala | 83 +++ .../v2/SubmittedDependencyUploaderSuite.scala | 177 ++++++ ...DependencyDownloadInitContainerSuite.scala | 70 ++- .../src/main/docker/executor/Dockerfile | 5 +- .../kubernetes/integration-tests/pom.xml | 22 + .../integration-test-asset-server/Dockerfile | 21 + .../integrationtest/KubernetesV2Suite.scala | 57 +- .../ResourceStagingServerLauncher.scala | 30 +- .../SparkReadinessWatcher.scala | 41 ++ .../StaticAssetServerLauncher.scala | 64 +++ .../docker/SparkDockerImageBuilder.scala | 3 + 46 files changed, 2620 insertions(+), 1233 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPlugin.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/{rest/kubernetes/v1 => kubernetes/submit}/KubernetesFileUtils.scala (88%) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolver.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfiguration.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/InitContainerUtil.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerImpl.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerProvider.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/PropertiesConfigMapFromScalaMapBuilder.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilder.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/{rest/kubernetes/v2/StagedResourceIdentifier.scala => kubernetes/submit/v2/SubmittedResources.scala} (51%) create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SubmittedDependencyInitContainerVolumesPluginSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolverSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfigurationSuite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilderSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderSuite.scala create mode 100644 resource-managers/kubernetes/integration-tests/src/main/docker/integration-test-asset-server/Dockerfile create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/SparkReadinessWatcher.scala create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/StaticAssetServerLauncher.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPlugin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPlugin.scala new file mode 100644 index 0000000000000..45b881a8a3737 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPlugin.scala @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder, Secret} + +import org.apache.spark.deploy.kubernetes.constants._ + +private[spark] trait InitContainerResourceStagingServerSecretPlugin { + + /** + * Configure the init-container to mount the secret files that allow it to retrieve dependencies + * from a resource staging server. + */ + def mountResourceStagingServerSecretIntoInitContainer( + initContainer: ContainerBuilder): ContainerBuilder + + /** + * Configure the pod to attach a Secret volume which hosts secret files allowing the + * init-container to retrieve dependencies from the resource staging server. + */ + def addResourceStagingServerSecretVolumeToPod(basePod: PodBuilder): PodBuilder +} + +private[spark] class InitContainerResourceStagingServerSecretPluginImpl( + initContainerSecretName: String, + initContainerSecretMountPath: String) + extends InitContainerResourceStagingServerSecretPlugin { + + override def mountResourceStagingServerSecretIntoInitContainer( + initContainer: ContainerBuilder): ContainerBuilder = { + initContainer.addNewVolumeMount() + .withName(INIT_CONTAINER_SECRET_VOLUME_NAME) + .withMountPath(initContainerSecretMountPath) + .endVolumeMount() + } + + override def addResourceStagingServerSecretVolumeToPod(basePod: PodBuilder): PodBuilder = { + basePod.editSpec() + .addNewVolume() + .withName(INIT_CONTAINER_SECRET_VOLUME_NAME) + .withNewSecret() + .withSecretName(initContainerSecretName) + .endSecret() + .endVolume() + .endSpec() + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala new file mode 100644 index 0000000000000..227420db4636d --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import io.fabric8.kubernetes.api.model.{ContainerBuilder, EmptyDirVolumeSource, PodBuilder, VolumeMount, VolumeMountBuilder} + +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.v2.{ContainerNameEqualityPredicate, InitContainerUtil} + +private[spark] trait SparkPodInitContainerBootstrap { + /** + * Bootstraps an init-container that downloads dependencies to be used by a main container. + * Note that this primarily assumes that the init-container's configuration is being provided + * by a ConfigMap that was installed by some other component; that is, the implementation + * here makes no assumptions about how the init-container is specifically configured. For + * example, this class is unaware if the init-container is fetching remote dependencies or if + * it is fetching dependencies from a resource staging server. + */ + def bootstrapInitContainerAndVolumes( + mainContainerName: String, originalPodSpec: PodBuilder): PodBuilder +} + +private[spark] class SparkPodInitContainerBootstrapImpl( + initContainerImage: String, + jarsDownloadPath: String, + filesDownloadPath: String, + downloadTimeoutMinutes: Long, + initContainerConfigMapName: String, + initContainerConfigMapKey: String, + resourceStagingServerSecretPlugin: Option[InitContainerResourceStagingServerSecretPlugin]) + extends SparkPodInitContainerBootstrap { + + override def bootstrapInitContainerAndVolumes( + mainContainerName: String, + originalPodSpec: PodBuilder): PodBuilder = { + val sharedVolumeMounts = Seq[VolumeMount]( + new VolumeMountBuilder() + .withName(INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME) + .withMountPath(jarsDownloadPath) + .build(), + new VolumeMountBuilder() + .withName(INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME) + .withMountPath(filesDownloadPath) + .build()) + + val initContainer = new ContainerBuilder() + .withName(s"spark-init") + .withImage(initContainerImage) + .withImagePullPolicy("IfNotPresent") + .addNewVolumeMount() + .withName(INIT_CONTAINER_PROPERTIES_FILE_VOLUME) + .withMountPath(INIT_CONTAINER_PROPERTIES_FILE_DIR) + .endVolumeMount() + .addToVolumeMounts(sharedVolumeMounts: _*) + .addToArgs(INIT_CONTAINER_PROPERTIES_FILE_PATH) + val resolvedInitContainer = resourceStagingServerSecretPlugin.map { plugin => + plugin.mountResourceStagingServerSecretIntoInitContainer(initContainer) + }.getOrElse(initContainer).build() + val podWithBasicVolumes = InitContainerUtil.appendInitContainer( + originalPodSpec, resolvedInitContainer) + .editSpec() + .addNewVolume() + .withName(INIT_CONTAINER_PROPERTIES_FILE_VOLUME) + .withNewConfigMap() + .withName(initContainerConfigMapName) + .addNewItem() + .withKey(initContainerConfigMapKey) + .withPath(INIT_CONTAINER_PROPERTIES_FILE_NAME) + .endItem() + .endConfigMap() + .endVolume() + .addNewVolume() + .withName(INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME) + .withEmptyDir(new EmptyDirVolumeSource()) + .endVolume() + .addNewVolume() + .withName(INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME) + .withEmptyDir(new EmptyDirVolumeSource()) + .endVolume() + .editMatchingContainer(new ContainerNameEqualityPredicate(mainContainerName)) + .addToVolumeMounts(sharedVolumeMounts: _*) + .endContainer() + .endSpec() + resourceStagingServerSecretPlugin.map { plugin => + plugin.addResourceStagingServerSecretVolumeToPod(podWithBasicVolumes) + }.getOrElse(podWithBasicVolumes) + } + +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 09b2d38cb8e38..f0a39fe359227 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -349,42 +349,43 @@ package object config extends Logging { .stringConf .createOptional + private[spark] val RESOURCE_STAGING_SERVER_SSL_NAMESPACE = "kubernetes.resourceStagingServer" private[spark] val RESOURCE_STAGING_SERVER_CERT_PEM = - ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.serverCertPem") + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.serverCertPem") .doc("Certificate PEM file to use when having the Kubernetes dependency server" + " listen on TLS.") .stringConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_KEYSTORE_PASSWORD_FILE = - ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.keyStorePasswordFile") + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.keyStorePasswordFile") .doc("File containing the keystore password for the Kubernetes dependency server.") .stringConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_KEYSTORE_KEY_PASSWORD_FILE = - ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.keyPasswordFile") + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.keyPasswordFile") .doc("File containing the key password for the Kubernetes dependency server.") .stringConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_SSL_ENABLED = - ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.enabled") + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.enabled") .doc("Whether or not to use SSL when communicating with the dependency server.") .booleanConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE = - ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.trustStore") + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.trustStore") .doc("File containing the trustStore to communicate with the Kubernetes dependency server.") .stringConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD = - ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.trustStorePassword") + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.trustStorePassword") .doc("Password for the trustStore for talking to the dependency server.") .stringConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE = - ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.trustStoreType") + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.trustStoreType") .doc("Type of trustStore for communicating with the dependency server.") .stringConf .createOptional @@ -397,64 +398,120 @@ package object config extends Logging { .createOptional private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER = - ConfigBuilder("spark.kubernetes.driver.initcontainer.downloadJarsResourceIdentifier") + ConfigBuilder("spark.kubernetes.initcontainer.downloadJarsResourceIdentifier") .doc("Identifier for the jars tarball that was uploaded to the staging service.") .internal() .stringConf .createOptional private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION = - ConfigBuilder("spark.kubernetes.driver.initcontainer.downloadJarsSecretLocation") + ConfigBuilder("spark.kubernetes.initcontainer.downloadJarsSecretLocation") .doc("Location of the application secret to use when the init-container contacts the" + " resource staging server to download jars.") .internal() .stringConf - .createWithDefault(INIT_CONTAINER_DOWNLOAD_JARS_SECRET_PATH) + .createWithDefault(s"$INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH/" + + s"$INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY") private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER = - ConfigBuilder("spark.kubernetes.driver.initcontainer.downloadFilesResourceIdentifier") + ConfigBuilder("spark.kubernetes.initcontainer.downloadFilesResourceIdentifier") .doc("Identifier for the files tarball that was uploaded to the staging service.") .internal() .stringConf .createOptional private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION = - ConfigBuilder("spark.kubernetes.driver.initcontainer.downloadFilesSecretLocation") + ConfigBuilder("spark.kubernetes.initcontainer.downloadFilesSecretLocation") .doc("Location of the application secret to use when the init-container contacts the" + " resource staging server to download files.") .internal() .stringConf - .createWithDefault(INIT_CONTAINER_DOWNLOAD_FILES_SECRET_PATH) + .createWithDefault( + s"$INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH/$INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY") + + private[spark] val INIT_CONTAINER_REMOTE_JARS = + ConfigBuilder("spark.kubernetes.initcontainer.remoteJars") + .doc("Comma-separated list of jar URIs to download in the init-container. This is" + + " calculated from spark.jars.") + .internal() + .stringConf + .createOptional + + private[spark] val INIT_CONTAINER_REMOTE_FILES = + ConfigBuilder("spark.kubernetes.initcontainer.remoteFiles") + .doc("Comma-separated list of file URIs to download in the init-container. This is" + + " calculated from spark.files.") + .internal() + .stringConf + .createOptional private[spark] val INIT_CONTAINER_DOCKER_IMAGE = - ConfigBuilder("spark.kubernetes.driver.initcontainer.docker.image") - .doc("Image for the driver's init-container that downloads mounted dependencies.") + ConfigBuilder("spark.kubernetes.initcontainer.docker.image") + .doc("Image for the driver and executor's init-container that downloads dependencies.") .stringConf - .createWithDefault(s"spark-driver-init:$sparkVersion") + .createWithDefault(s"spark-init:$sparkVersion") - private[spark] val DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION = - ConfigBuilder("spark.kubernetes.driver.mountdependencies.jarsDownloadDir") - .doc("Location to download local jars to in the driver. When using spark-submit, this" + - " directory must be empty and will be mounted as an empty directory volume on the" + - " driver pod.") + private[spark] val INIT_CONTAINER_JARS_DOWNLOAD_LOCATION = + ConfigBuilder("spark.kubernetes.mountdependencies.jarsDownloadDir") + .doc("Location to download jars to in the driver and executors. When using" + + " spark-submit, this directory must be empty and will be mounted as an empty directory" + + " volume on the driver and executor pod.") .stringConf - .createWithDefault("/var/spark-data/spark-local-jars") + .createWithDefault("/var/spark-data/spark-submitted-jars") - private[spark] val DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION = - ConfigBuilder("spark.kubernetes.driver.mountdependencies.filesDownloadDir") - .doc("Location to download local files to in the driver. When using spark-submit, this" + - " directory must be empty and will be mounted as an empty directory volume on the" + - " driver pod.") + private[spark] val INIT_CONTAINER_FILES_DOWNLOAD_LOCATION = + ConfigBuilder("spark.kubernetes.mountdependencies.filesDownloadDir") + .doc("Location to download files to in the driver and executors. When using" + + " spark-submit, this directory must be empty and will be mounted as an empty directory" + + " volume on the driver and executor pods.") .stringConf - .createWithDefault("/var/spark-data/spark-local-files") + .createWithDefault("/var/spark-data/spark-submitted-files") - private[spark] val DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT = + private[spark] val INIT_CONTAINER_MOUNT_TIMEOUT = ConfigBuilder("spark.kubernetes.mountdependencies.mountTimeout") .doc("Timeout before aborting the attempt to download and unpack local dependencies from" + - " the dependency staging server when initializing the driver pod.") + " remote locations and the resource staging server when initializing the driver and" + + " executor pods.") .timeConf(TimeUnit.MINUTES) .createWithDefault(5) + private[spark] val EXECUTOR_INIT_CONTAINER_CONFIG_MAP = + ConfigBuilder("spark.kubernetes.initcontainer.executor.configmapname") + .doc("Name of the config map to use in the init-container that retrieves submitted files" + + " for the executor.") + .internal() + .stringConf + .createOptional + + private[spark] val EXECUTOR_INIT_CONTAINER_CONFIG_MAP_KEY = + ConfigBuilder("spark.kubernetes.initcontainer.executor.configmapkey") + .doc("Key for the entry in the init container config map for submitted files that" + + " corresponds to the properties for this init-container.") + .internal() + .stringConf + .createOptional + + private[spark] val EXECUTOR_INIT_CONTAINER_SECRET = + ConfigBuilder("spark.kubernetes.initcontainer.executor.stagingServerSecret.name") + .doc("Name of the secret to mount into the init-container that retrieves submitted files.") + .internal() + .stringConf + .createOptional + + private[spark] val EXECUTOR_INIT_CONTAINER_SECRET_MOUNT_DIR = + ConfigBuilder("spark.kubernetes.initcontainer.executor.stagingServerSecret.mountDir") + .doc("Directory to mount the resource staging server secrets into for the executor" + + " init-containers. This must be exactly the same as the directory that the submission" + + " client mounted the secret into because the config map's properties specify the" + + " secret location as to be the same between the driver init-container and the executor" + + " init-container. Thus the submission client will always set this and the driver will" + + " never rely on a constant or convention, in order to protect against cases where the" + + " submission client has a different version from the driver itself, and hence might" + + " have different constants loaded in constants.scala.") + .internal() + .stringConf + .createOptional + private[spark] def resolveK8sMaster(rawMasterString: String): String = { if (!rawMasterString.startsWith("k8s://")) { throw new IllegalArgumentException("Master URL should start with k8s:// in Kubernetes mode.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 27e47eb61933f..4c4f7b9fc3b23 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -70,8 +70,8 @@ package object constants { private[spark] val ENV_EXECUTOR_ID = "SPARK_EXECUTOR_ID" private[spark] val ENV_EXECUTOR_POD_IP = "SPARK_EXECUTOR_POD_IP" private[spark] val ENV_DRIVER_MEMORY = "SPARK_DRIVER_MEMORY" - private[spark] val ENV_UPLOADED_JARS_DIR = "SPARK_UPLOADED_JARS_DIR" private[spark] val ENV_SUBMIT_EXTRA_CLASSPATH = "SPARK_SUBMIT_EXTRA_CLASSPATH" + private[spark] val ENV_EXECUTOR_EXTRA_CLASSPATH = "SPARK_SUBMIT_EXTRA_CLASSPATH" private[spark] val ENV_MOUNTED_CLASSPATH = "SPARK_MOUNTED_CLASSPATH" private[spark] val ENV_DRIVER_MAIN_CLASS = "SPARK_DRIVER_CLASS" private[spark] val ENV_DRIVER_ARGS = "SPARK_DRIVER_ARGS" @@ -92,26 +92,21 @@ package object constants { // V2 submission init container private[spark] val INIT_CONTAINER_ANNOTATION = "pod.beta.kubernetes.io/init-containers" - private[spark] val INIT_CONTAINER_SECRETS_VOLUME_NAME = "dependency-secret" - private[spark] val INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH = "/mnt/secrets/spark-init" - private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY = "downloadJarsSecret" - private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY = "downloadFilesSecret" - private[spark] val INIT_CONTAINER_TRUSTSTORE_SECRET_KEY = "trustStore" - private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_SECRET_PATH = - s"$INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH/$INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY" - private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_SECRET_PATH = - s"$INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH/$INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY" - private[spark] val INIT_CONTAINER_TRUSTSTORE_PATH = - s"$INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH/$INIT_CONTAINER_TRUSTSTORE_SECRET_KEY" - private[spark] val INIT_CONTAINER_DOWNLOAD_CREDENTIALS_PATH = - "/mnt/secrets/kubernetes-credentials" - private[spark] val INIT_CONTAINER_CONFIG_MAP_KEY = "init-driver" - private[spark] val INIT_CONTAINER_PROPERTIES_FILE_VOLUME = "init-container-properties" - private[spark] val INIT_CONTAINER_PROPERTIES_FILE_MOUNT_PATH = "/etc/spark-init/" - private[spark] val INIT_CONTAINER_PROPERTIES_FILE_NAME = "init-driver.properties" + private[spark] val INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH = + "/mnt/secrets/spark-init" + private[spark] val INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY = + "downloadSubmittedJarsSecret" + private[spark] val INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY = + "downloadSubmittedFilesSecret" + private[spark] val INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY = "trustStore" + private[spark] val INIT_CONTAINER_CONFIG_MAP_KEY = "download-submitted-files" + private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME = "download-jars-volume" + private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME = "download-files" + private[spark] val INIT_CONTAINER_PROPERTIES_FILE_VOLUME = "spark-init-properties" + private[spark] val INIT_CONTAINER_PROPERTIES_FILE_DIR = "/etc/spark-init" + private[spark] val INIT_CONTAINER_PROPERTIES_FILE_NAME = "spark-init.properties" private[spark] val INIT_CONTAINER_PROPERTIES_FILE_PATH = - s"$INIT_CONTAINER_PROPERTIES_FILE_MOUNT_PATH/$INIT_CONTAINER_PROPERTIES_FILE_NAME" - private[spark] val DOWNLOAD_JARS_VOLUME_NAME = "download-jars" - private[spark] val DOWNLOAD_FILES_VOLUME_NAME = "download-files" + s"$INIT_CONTAINER_PROPERTIES_FILE_DIR/$INIT_CONTAINER_PROPERTIES_FILE_NAME" private[spark] val DEFAULT_SHUFFLE_MOUNT_NAME = "shuffle" + private[spark] val INIT_CONTAINER_SECRET_VOLUME_NAME = "spark-init-secret" } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesFileUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala similarity index 88% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesFileUtils.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala index b8e644219097e..1b0af3fa9fb01 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesFileUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v1 +package org.apache.spark.deploy.kubernetes.submit import org.apache.spark.util.Utils @@ -41,4 +41,7 @@ private[spark] object KubernetesFileUtils { Option(Utils.resolveURI(uri).getScheme).getOrElse("file") == "file" } + def getOnlyRemoteFiles(uris: Iterable[String]): Iterable[String] = { + filterUriStringsByScheme(uris, scheme => scheme != "file" && scheme != "local") + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala index a4dfe90f71a8a..0f1e7886a1ba2 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala @@ -33,7 +33,8 @@ import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.deploy.kubernetes.CompressionUtils import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.kubernetes.v1.{AppResource, ContainerAppResource, HttpClientUtil, KubernetesCreateSubmissionRequest, KubernetesCredentials, KubernetesFileUtils, KubernetesSparkRestApi, RemoteAppResource, UploadedAppResource} +import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils +import org.apache.spark.deploy.rest.kubernetes.v1.{AppResource, ContainerAppResource, HttpClientUtil, KubernetesCreateSubmissionRequest, KubernetesCredentials, KubernetesSparkRestApi, RemoteAppResource, UploadedAppResource} import org.apache.spark.internal.Logging import org.apache.spark.util.{ShutdownHookManager, Utils} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala index 10ffddcd7e7fc..174e9c57a65ca 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala @@ -29,7 +29,8 @@ import scala.collection.JavaConverters._ import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.kubernetes.v1.{KubernetesFileUtils, PemsToKeyStoreConverter} +import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils +import org.apache.spark.deploy.rest.kubernetes.v1.PemsToKeyStoreConverter import org.apache.spark.util.Utils /** diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala index a70c93942ffb5..a403a91840bd6 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala @@ -19,11 +19,10 @@ package org.apache.spark.deploy.kubernetes.submit.v2 import java.io.File import java.util.Collections -import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, HasMetadata, OwnerReferenceBuilder, PodBuilder} +import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, OwnerReferenceBuilder, PodBuilder} import scala.collection.JavaConverters._ -import scala.collection.mutable -import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException} +import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.internal.Logging @@ -35,33 +34,25 @@ import org.apache.spark.util.Utils * * This class is responsible for instantiating Kubernetes resources that allow a Spark driver to * run in a pod on the Kubernetes cluster with the Spark configurations specified by spark-submit. - * Application submitters that desire to provide their application's dependencies from their local - * disk must provide a resource staging server URI to this client so that the client can push the - * local resources to the resource staging server and have the driver pod pull the resources in an - * init-container. Interactions with the resource staging server are offloaded to the - * {@link MountedDependencyManager} class. If instead the application submitter has their - * dependencies pre-staged in remote locations like HDFS or their own HTTP servers already, then - * the mounted dependency manager is bypassed entirely, but the init-container still needs to - * fetch these remote dependencies (TODO https://github.com/apache-spark-on-k8s/spark/issues/238). + * The API of this class makes it such that much of the specific behavior can be stubbed for + * testing; most of the detailed logic must be dependency-injected when constructing an instance + * of this client. Therefore the submission process is designed to be as modular as possible, + * where different steps of submission should be factored out into separate classes. */ private[spark] class Client( + appName: String, + kubernetesAppId: String, mainClass: String, sparkConf: SparkConf, appArgs: Array[String], - mainAppResource: String, + sparkJars: Seq[String], + sparkFiles: Seq[String], kubernetesClientProvider: SubmissionKubernetesClientProvider, - mountedDependencyManagerProvider: MountedDependencyManagerProvider) extends Logging { + initContainerComponentsProvider: DriverInitContainerComponentsProvider) extends Logging { - private val namespace = sparkConf.get(KUBERNETES_NAMESPACE) - private val master = resolveK8sMaster(sparkConf.get("spark.master")) - private val launchTime = System.currentTimeMillis - private val appName = sparkConf.getOption("spark.app.name") - .getOrElse("spark") - private val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") private val kubernetesDriverPodName = sparkConf.get(KUBERNETES_DRIVER_POD_NAME) .getOrElse(kubernetesAppId) private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) - private val maybeStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) private val driverMemoryMb = sparkConf.get(org.apache.spark.internal.config.DRIVER_MEMORY) private val memoryOverheadMb = sparkConf .get(KUBERNETES_DRIVER_MEMORY_OVERHEAD) @@ -70,22 +61,15 @@ private[spark] class Client( private val driverContainerMemoryWithOverhead = driverMemoryMb + memoryOverheadMb private val customLabels = sparkConf.get(KUBERNETES_DRIVER_LABELS) private val customAnnotations = sparkConf.get(KUBERNETES_DRIVER_ANNOTATIONS) - private val sparkJars = sparkConf.getOption("spark.jars") - .map(_.split(",")) - .getOrElse(Array.empty[String]) ++ - Option(mainAppResource) - .filterNot(_ == SparkLauncher.NO_RESOURCE) - .toSeq - private val sparkFiles = sparkConf.getOption("spark.files") - .map(_.split(",")) - .getOrElse(Array.empty[String]) private val driverExtraClasspath = sparkConf.get( org.apache.spark.internal.config.DRIVER_CLASS_PATH) private val driverJavaOptions = sparkConf.get( org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) def run(): Unit = { + validateNoDuplicateFileNames(sparkJars) + validateNoDuplicateFileNames(sparkFiles) val parsedCustomLabels = parseKeyValuePairs(customLabels, KUBERNETES_DRIVER_LABELS.key, "labels") require(!parsedCustomLabels.contains(SPARK_APP_ID_LABEL), s"Label with key " + @@ -133,50 +117,39 @@ private[spark] class Client( .addToContainers(driverContainer) .endSpec() - val nonDriverPodKubernetesResources = mutable.Buffer[HasMetadata]() - val resolvedJars = mutable.Buffer[String]() - val resolvedFiles = mutable.Buffer[String]() - val driverPodWithMountedDeps = maybeStagingServerUri.map { stagingServerUri => - val mountedDependencyManager = mountedDependencyManagerProvider.getMountedDependencyManager( - kubernetesAppId, - stagingServerUri, - allLabels, - namespace, - sparkJars, - sparkFiles) - val jarsResourceIdentifier = mountedDependencyManager.uploadJars() - val filesResourceIdentifier = mountedDependencyManager.uploadFiles() - val initContainerKubernetesSecret = mountedDependencyManager.buildInitContainerSecret( - jarsResourceIdentifier.resourceSecret, filesResourceIdentifier.resourceSecret) - val initContainerConfigMap = mountedDependencyManager.buildInitContainerConfigMap( - jarsResourceIdentifier.resourceId, filesResourceIdentifier.resourceId) - resolvedJars ++= mountedDependencyManager.resolveSparkJars() - resolvedFiles ++= mountedDependencyManager.resolveSparkFiles() - nonDriverPodKubernetesResources += initContainerKubernetesSecret - nonDriverPodKubernetesResources += initContainerConfigMap - mountedDependencyManager.configurePodToMountLocalDependencies( - driverContainer.getName, initContainerKubernetesSecret, initContainerConfigMap, basePod) - }.getOrElse { - sparkJars.map(Utils.resolveURI).foreach { jar => - require(Option.apply(jar.getScheme).getOrElse("file") != "file", - "When submitting with local jars, a resource staging server must be provided to" + - s" deploy your jars into the driver pod. Cannot send jar with URI $jar.") - } - sparkFiles.map(Utils.resolveURI).foreach { file => - require(Option.apply(file.getScheme).getOrElse("file") != "file", - "When submitting with local files, a resource staging server must be provided to" + - s" deploy your files into the driver pod. Cannot send file with URI $file") - } - resolvedJars ++= sparkJars - resolvedFiles ++= sparkFiles - basePod + val maybeSubmittedDependencyUploader = initContainerComponentsProvider + .provideInitContainerSubmittedDependencyUploader(allLabels) + val maybeSubmittedResourceIdentifiers = maybeSubmittedDependencyUploader.map { uploader => + SubmittedResources(uploader.uploadJars(), uploader.uploadFiles()) } - val resolvedSparkConf = sparkConf.clone() - if (resolvedJars.nonEmpty) { - resolvedSparkConf.set("spark.jars", resolvedJars.mkString(",")) + val maybeSecretBuilder = initContainerComponentsProvider + .provideSubmittedDependenciesSecretBuilder( + maybeSubmittedResourceIdentifiers.map(_.secrets())) + val maybeSubmittedDependenciesSecret = maybeSecretBuilder.map(_.build()) + val initContainerConfigMap = initContainerComponentsProvider + .provideInitContainerConfigMapBuilder(maybeSubmittedResourceIdentifiers.map(_.ids())) + .build() + val podWithInitContainer = initContainerComponentsProvider + .provideInitContainerBootstrap() + .bootstrapInitContainerAndVolumes(driverContainer.getName, basePod) + + val driverOwnedResources = Seq(initContainerConfigMap) ++ + maybeSubmittedDependenciesSecret.toSeq + + val containerLocalizedFilesResolver = initContainerComponentsProvider + .provideContainerLocalizedFilesResolver() + val resolvedSparkJars = containerLocalizedFilesResolver.resolveSubmittedSparkJars() + val resolvedSparkFiles = containerLocalizedFilesResolver.resolveSubmittedSparkFiles() + + val executorInitContainerConfiguration = initContainerComponentsProvider + .provideExecutorInitContainerConfiguration() + val resolvedSparkConf = executorInitContainerConfiguration + .configureSparkConfForExecutorInitContainer(sparkConf) + if (resolvedSparkJars.nonEmpty) { + resolvedSparkConf.set("spark.jars", resolvedSparkJars.mkString(",")) } - if (resolvedFiles.nonEmpty) { - resolvedSparkConf.set("spark.files", resolvedFiles.mkString(",")) + if (resolvedSparkFiles.nonEmpty) { + resolvedSparkConf.set("spark.files", resolvedSparkFiles.mkString(",")) } resolvedSparkConf.setIfMissing(KUBERNETES_DRIVER_POD_NAME, kubernetesDriverPodName) resolvedSparkConf.set("spark.app.id", kubernetesAppId) @@ -188,19 +161,16 @@ private[spark] class Client( resolvedSparkConf.get(KUBERNETES_DRIVER_OAUTH_TOKEN).foreach { _ => resolvedSparkConf.set(KUBERNETES_DRIVER_OAUTH_TOKEN.key, "") } - - val mountedClassPath = resolvedJars.map(Utils.resolveURI).filter { jarUri => - val scheme = Option.apply(jarUri.getScheme).getOrElse("file") - scheme == "local" || scheme == "file" - }.map(_.getPath).mkString(File.pathSeparator) - val resolvedDriverJavaOpts = resolvedSparkConf.getAll.map { case (confKey, confValue) => - s"-D$confKey=$confValue" + val resolvedLocalClasspath = containerLocalizedFilesResolver + .resolveSubmittedAndRemoteSparkJars() + val resolvedDriverJavaOpts = resolvedSparkConf.getAll.map { + case (confKey, confValue) => s"-D$confKey=$confValue" }.mkString(" ") + driverJavaOptions.map(" " + _).getOrElse("") - val resolvedDriverPod = driverPodWithMountedDeps.editSpec() + val resolvedDriverPod = podWithInitContainer.editSpec() .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainer.getName)) .addNewEnv() .withName(ENV_MOUNTED_CLASSPATH) - .withValue(mountedClassPath) + .withValue(resolvedLocalClasspath.mkString(File.pathSeparator)) .endEnv() .addNewEnv() .withName(ENV_DRIVER_JAVA_OPTS) @@ -218,11 +188,11 @@ private[spark] class Client( .withKind(createdDriverPod.getKind) .withController(true) .build() - nonDriverPodKubernetesResources.foreach { resource => + driverOwnedResources.foreach { resource => val originalMetadata = resource.getMetadata originalMetadata.setOwnerReferences(Collections.singletonList(driverPodOwnerReference)) } - kubernetesClient.resourceList(nonDriverPodKubernetesResources: _*).createOrReplace() + kubernetesClient.resourceList(driverOwnedResources: _*).createOrReplace() } catch { case e: Throwable => kubernetesClient.pods().delete(createdDriverPod) @@ -231,6 +201,17 @@ private[spark] class Client( } } + private def validateNoDuplicateFileNames(allFiles: Seq[String]): Unit = { + val fileNamesToUris = allFiles.map { file => + (new File(Utils.resolveURI(file).getPath).getName, file) + } + fileNamesToUris.groupBy(_._1).foreach { + case (fileName, urisWithFileName) => + require(urisWithFileName.size == 1, "Cannot add multiple files with the same name, but" + + s" file name $fileName is shared by all of these URIs: $urisWithFileName") + } + } + private def parseKeyValuePairs( maybeKeyValues: Option[String], configKey: String, @@ -249,3 +230,46 @@ private[spark] class Client( }).getOrElse(Map.empty[String, String]) } } + +private[spark] object Client { + def main(args: Array[String]): Unit = { + val sparkConf = new SparkConf(true) + val mainAppResource = args(0) + val mainClass = args(1) + val appArgs = args.drop(2) + run(sparkConf, mainAppResource, mainClass, appArgs) + } + + def run( + sparkConf: SparkConf, + mainAppResource: String, + mainClass: String, + appArgs: Array[String]): Unit = { + val sparkJars = sparkConf.getOption("spark.jars") + .map(_.split(",")) + .getOrElse(Array.empty[String]) ++ + Option(mainAppResource) + .filterNot(_ == SparkLauncher.NO_RESOURCE) + .toSeq + val launchTime = System.currentTimeMillis + val sparkFiles = sparkConf.getOption("spark.files") + .map(_.split(",")) + .getOrElse(Array.empty[String]) + val appName = sparkConf.getOption("spark.app.name") + .getOrElse("spark") + val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") + val initContainerComponentsProvider = new DriverInitContainerComponentsProviderImpl( + sparkConf, kubernetesAppId, sparkJars, sparkFiles) + val kubernetesClientProvider = new SubmissionKubernetesClientProviderImpl(sparkConf) + new Client( + appName, + kubernetesAppId, + mainClass, + sparkConf, + appArgs, + sparkJars, + sparkFiles, + kubernetesClientProvider, + initContainerComponentsProvider).run() + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolver.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolver.scala new file mode 100644 index 0000000000000..5505d87fa8072 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolver.scala @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.File + +import org.apache.spark.util.Utils + +private[spark] trait ContainerLocalizedFilesResolver { + def resolveSubmittedAndRemoteSparkJars(): Seq[String] + def resolveSubmittedSparkJars(): Seq[String] + def resolveSubmittedSparkFiles(): Seq[String] +} + +private[spark] class ContainerLocalizedFilesResolverImpl( + sparkJars: Seq[String], + sparkFiles: Seq[String], + jarsDownloadPath: String, + filesDownloadPath: String) extends ContainerLocalizedFilesResolver { + + override def resolveSubmittedAndRemoteSparkJars(): Seq[String] = { + sparkJars.map { jar => + val jarUri = Utils.resolveURI(jar) + Option(jarUri.getScheme).getOrElse("file") match { + case "local" => + jarUri.getPath + case _ => + val jarFileName = new File(jarUri.getPath).getName + s"$jarsDownloadPath/$jarFileName" + } + } + } + + override def resolveSubmittedSparkJars(): Seq[String] = { + resolveSubmittedFiles(sparkJars, jarsDownloadPath) + } + + override def resolveSubmittedSparkFiles(): Seq[String] = { + resolveSubmittedFiles(sparkFiles, filesDownloadPath) + } + + private def resolveSubmittedFiles(files: Seq[String], downloadPath: String): Seq[String] = { + files.map { file => + val fileUri = Utils.resolveURI(file) + Option(fileUri.getScheme).getOrElse("file") match { + case "file" => + val fileName = new File(fileUri.getPath).getName + s"$downloadPath/$fileName" + case _ => + file + } + } + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala new file mode 100644 index 0000000000000..5b649735f2b3d --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import org.apache.spark.{SecurityManager, SparkConf} +import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, SparkPodInitContainerBootstrap, SparkPodInitContainerBootstrapImpl} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.rest.kubernetes.v2.RetrofitClientFactoryImpl + +/** + * Interface that wraps the provision of everything the submission client needs to set up the + * driver's init-container. This is all wrapped in the same place to ensure that related + * components are being constructed with consistent configurations with respect to one another. + */ +private[spark] trait DriverInitContainerComponentsProvider { + + def provideInitContainerConfigMapBuilder( + maybeSubmittedResourceIds: Option[SubmittedResourceIds]) + : SparkInitContainerConfigMapBuilder + def provideContainerLocalizedFilesResolver(): ContainerLocalizedFilesResolver + def provideExecutorInitContainerConfiguration(): ExecutorInitContainerConfiguration + def provideInitContainerSubmittedDependencyUploader( + driverPodLabels: Map[String, String]): Option[SubmittedDependencyUploader] + def provideSubmittedDependenciesSecretBuilder( + maybeSubmittedResourceSecrets: Option[SubmittedResourceSecrets]) + : Option[SubmittedDependencySecretBuilder] + def provideInitContainerBootstrap(): SparkPodInitContainerBootstrap +} + +private[spark] class DriverInitContainerComponentsProviderImpl( + sparkConf: SparkConf, + kubernetesAppId: String, + sparkJars: Seq[String], + sparkFiles: Seq[String]) + extends DriverInitContainerComponentsProvider { + + private val maybeResourceStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) + private val resourceStagingServerSslOptions = new SecurityManager(sparkConf) + .getSSLOptions(RESOURCE_STAGING_SERVER_SSL_NAMESPACE) + private val jarsDownloadPath = sparkConf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION) + private val filesDownloadPath = sparkConf.get(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION) + private val maybeSecretName = maybeResourceStagingServerUri.map { _ => + s"$kubernetesAppId-init-secret" + } + private val namespace = sparkConf.get(KUBERNETES_NAMESPACE) + private val configMapName = s"$kubernetesAppId-init-config" + private val configMapKey = s"$kubernetesAppId-init-config-key" + private val initContainerImage = sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE) + private val downloadTimeoutMinutes = sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT) + + override def provideInitContainerConfigMapBuilder( + maybeSubmittedResourceIds: Option[SubmittedResourceIds]) + : SparkInitContainerConfigMapBuilder = { + val submittedDependencyConfigPlugin = for { + stagingServerUri <- maybeResourceStagingServerUri + jarsResourceId <- maybeSubmittedResourceIds.map(_.jarsResourceId) + filesResourceId <- maybeSubmittedResourceIds.map(_.filesResourceId) + } yield { + new SubmittedDependencyInitContainerConfigPluginImpl( + stagingServerUri, + jarsResourceId, + filesResourceId, + INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY, + INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY, + INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, + INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH, + resourceStagingServerSslOptions) + } + new SparkInitContainerConfigMapBuilderImpl( + sparkJars, + sparkFiles, + jarsDownloadPath, + filesDownloadPath, + configMapName, + configMapKey, + submittedDependencyConfigPlugin) + } + + override def provideContainerLocalizedFilesResolver(): ContainerLocalizedFilesResolver = { + new ContainerLocalizedFilesResolverImpl( + sparkJars, sparkFiles, jarsDownloadPath, filesDownloadPath) + } + + override def provideExecutorInitContainerConfiguration(): ExecutorInitContainerConfiguration = { + new ExecutorInitContainerConfigurationImpl( + maybeSecretName, + INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH, + configMapName, + configMapKey) + } + + override def provideInitContainerSubmittedDependencyUploader( + driverPodLabels: Map[String, String]): Option[SubmittedDependencyUploader] = { + maybeResourceStagingServerUri.map { stagingServerUri => + new SubmittedDependencyUploaderImpl( + kubernetesAppId, + driverPodLabels, + namespace, + stagingServerUri, + sparkJars, + sparkFiles, + resourceStagingServerSslOptions, + RetrofitClientFactoryImpl) + } + } + + override def provideSubmittedDependenciesSecretBuilder( + maybeSubmittedResourceSecrets: Option[SubmittedResourceSecrets]) + : Option[SubmittedDependencySecretBuilder] = { + for { + secretName <- maybeSecretName + jarsResourceSecret <- maybeSubmittedResourceSecrets.map(_.jarsResourceSecret) + filesResourceSecret <- maybeSubmittedResourceSecrets.map(_.filesResourceSecret) + } yield { + new SubmittedDependencySecretBuilderImpl( + secretName, + jarsResourceSecret, + filesResourceSecret, + INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY, + INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY, + INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, + resourceStagingServerSslOptions) + } + } + + override def provideInitContainerBootstrap(): SparkPodInitContainerBootstrap = { + val resourceStagingServerSecretPlugin = maybeSecretName.map { secret => + new InitContainerResourceStagingServerSecretPluginImpl( + secret, INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH) + } + new SparkPodInitContainerBootstrapImpl( + initContainerImage, + jarsDownloadPath, + filesDownloadPath, + downloadTimeoutMinutes, + configMapName, + configMapKey, + resourceStagingServerSecretPlugin) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfiguration.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfiguration.scala new file mode 100644 index 0000000000000..adfdc060f0d0f --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfiguration.scala @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.config._ + +private[spark] trait ExecutorInitContainerConfiguration { + /** + * Provide the driver with configuration that allows it to configure executors to + * fetch resources in the same way the driver does. + */ + def configureSparkConfForExecutorInitContainer(originalSparkConf: SparkConf): SparkConf +} + +private[spark] class ExecutorInitContainerConfigurationImpl( + initContainerSecretName: Option[String], + initContainerSecretMountDir: String, + initContainerConfigMapName: String, + initContainerConfigMapKey: String) + extends ExecutorInitContainerConfiguration { + def configureSparkConfForExecutorInitContainer(originalSparkConf: SparkConf): SparkConf = { + val configuredSparkConf = originalSparkConf.clone() + .set(EXECUTOR_INIT_CONTAINER_CONFIG_MAP, + initContainerConfigMapName) + .set(EXECUTOR_INIT_CONTAINER_CONFIG_MAP_KEY, + initContainerConfigMapKey) + .set(EXECUTOR_INIT_CONTAINER_SECRET_MOUNT_DIR, initContainerSecretMountDir) + initContainerSecretName.map { secret => + configuredSparkConf.set(EXECUTOR_INIT_CONTAINER_SECRET, secret) + }.getOrElse(configuredSparkConf) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/InitContainerUtil.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/InitContainerUtil.scala new file mode 100644 index 0000000000000..0526ca53baaab --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/InitContainerUtil.scala @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import io.fabric8.kubernetes.api.model.{Container, PodBuilder} +import scala.collection.JavaConverters._ + +import org.apache.spark.deploy.kubernetes.constants._ + +private[spark] object InitContainerUtil { + + private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + + def appendInitContainer( + originalPodSpec: PodBuilder, initContainer: Container): PodBuilder = { + val resolvedInitContainers = originalPodSpec + .editMetadata() + .getAnnotations + .asScala + .get(INIT_CONTAINER_ANNOTATION) + .map { existingInitContainerAnnotation => + val existingInitContainers = OBJECT_MAPPER.readValue( + existingInitContainerAnnotation, classOf[List[Container]]) + existingInitContainers ++ Seq(initContainer) + }.getOrElse(Seq(initContainer)) + val resolvedSerializedInitContainers = OBJECT_MAPPER.writeValueAsString(resolvedInitContainers) + originalPodSpec + .editMetadata() + .removeFromAnnotations(INIT_CONTAINER_ANNOTATION) + .addToAnnotations(INIT_CONTAINER_ANNOTATION, resolvedSerializedInitContainers) + .endMetadata() + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerImpl.scala deleted file mode 100644 index 9dbbcd0d56a3b..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerImpl.scala +++ /dev/null @@ -1,324 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit.v2 - -import java.io.{File, FileOutputStream, StringWriter} -import java.util.Properties -import javax.ws.rs.core.MediaType - -import com.fasterxml.jackson.databind.ObjectMapper -import com.fasterxml.jackson.module.scala.DefaultScalaModule -import com.google.common.base.Charsets -import com.google.common.io.{BaseEncoding, Files} -import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, Container, ContainerBuilder, EmptyDirVolumeSource, PodBuilder, Secret, SecretBuilder, VolumeMount, VolumeMountBuilder} -import okhttp3.RequestBody -import retrofit2.Call -import scala.collection.JavaConverters._ -import scala.collection.mutable - -import org.apache.spark.{SparkException, SSLOptions} -import org.apache.spark.deploy.kubernetes.CompressionUtils -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.kubernetes.v1.{KubernetesCredentials, KubernetesFileUtils} -import org.apache.spark.deploy.rest.kubernetes.v2.{ResourceStagingServiceRetrofit, RetrofitClientFactory, StagedResourceIdentifier} -import org.apache.spark.util.Utils - -private[spark] trait MountedDependencyManager { - - /** - * Upload submitter-local jars to the resource staging server. - * @return The resource ID and secret to use to retrieve these jars. - */ - def uploadJars(): StagedResourceIdentifier - - /** - * Upload submitter-local files to the resource staging server. - * @return The resource ID and secret to use to retrieve these files. - */ - def uploadFiles(): StagedResourceIdentifier - - def configurePodToMountLocalDependencies( - driverContainerName: String, - initContainerSecret: Secret, - initContainerConfigMap: ConfigMap, - originalPodSpec: PodBuilder): PodBuilder - - def buildInitContainerSecret(jarsSecret: String, filesSecret: String): Secret - - def buildInitContainerConfigMap( - jarsResourceId: String, filesResourceId: String): ConfigMap - - /** - * Convert the Spark jar paths from their locations on the submitter's disk to - * the locations they will be downloaded to on the driver's disk. - */ - def resolveSparkJars(): Seq[String] - - /** - * Convert the Spark file paths from their locations on the submitter's disk to - * the locations they will be downloaded to on the driver's disk. - */ - def resolveSparkFiles(): Seq[String] -} - -/** - * Default implementation of a MountedDependencyManager that is backed by a - * Resource Staging Service. - */ -private[spark] class MountedDependencyManagerImpl( - kubernetesAppId: String, - podLabels: Map[String, String], - podNamespace: String, - stagingServerUri: String, - initContainerImage: String, - jarsDownloadPath: String, - filesDownloadPath: String, - downloadTimeoutMinutes: Long, - sparkJars: Seq[String], - sparkFiles: Seq[String], - stagingServiceSslOptions: SSLOptions, - retrofitClientFactory: RetrofitClientFactory) extends MountedDependencyManager { - private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) - - private def localUriStringsToFiles(uris: Seq[String]): Iterable[File] = { - KubernetesFileUtils.getOnlySubmitterLocalFiles(uris) - .map(Utils.resolveURI) - .map(uri => new File(uri.getPath)) - } - private def localJars: Iterable[File] = localUriStringsToFiles(sparkJars) - private def localFiles: Iterable[File] = localUriStringsToFiles(sparkFiles) - - override def uploadJars(): StagedResourceIdentifier = doUpload(localJars, "uploaded-jars") - override def uploadFiles(): StagedResourceIdentifier = doUpload(localFiles, "uploaded-files") - - private def doUpload(files: Iterable[File], fileNamePrefix: String): StagedResourceIdentifier = { - val filesDir = Utils.createTempDir(namePrefix = fileNamePrefix) - val filesTgz = new File(filesDir, s"$fileNamePrefix.tgz") - Utils.tryWithResource(new FileOutputStream(filesTgz)) { filesOutputStream => - CompressionUtils.writeTarGzipToStream(filesOutputStream, files.map(_.getAbsolutePath)) - } - // TODO provide credentials properly when the staging server monitors the Kubernetes API. - val kubernetesCredentialsString = OBJECT_MAPPER.writer() - .writeValueAsString(KubernetesCredentials(None, None, None, None)) - val labelsAsString = OBJECT_MAPPER.writer().writeValueAsString(podLabels) - - val filesRequestBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.MULTIPART_FORM_DATA), filesTgz) - - val kubernetesCredentialsBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), kubernetesCredentialsString) - - val namespaceRequestBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.TEXT_PLAIN), podNamespace) - - val labelsRequestBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), labelsAsString) - - val service = retrofitClientFactory.createRetrofitClient( - stagingServerUri, - classOf[ResourceStagingServiceRetrofit], - stagingServiceSslOptions) - val uploadResponse = service.uploadResources( - labelsRequestBody, namespaceRequestBody, filesRequestBody, kubernetesCredentialsBody) - getTypedResponseResult(uploadResponse) - } - - override def configurePodToMountLocalDependencies( - driverContainerName: String, - initContainerSecret: Secret, - initContainerConfigMap: ConfigMap, - originalPodSpec: PodBuilder): PodBuilder = { - val sharedVolumeMounts = Seq[VolumeMount]( - new VolumeMountBuilder() - .withName(DOWNLOAD_JARS_VOLUME_NAME) - .withMountPath(jarsDownloadPath) - .build(), - new VolumeMountBuilder() - .withName(DOWNLOAD_FILES_VOLUME_NAME) - .withMountPath(filesDownloadPath) - .build()) - - val initContainers = Seq(new ContainerBuilder() - .withName("spark-driver-init") - .withImage(initContainerImage) - .withImagePullPolicy("IfNotPresent") - .addNewVolumeMount() - .withName(INIT_CONTAINER_PROPERTIES_FILE_VOLUME) - .withMountPath(INIT_CONTAINER_PROPERTIES_FILE_MOUNT_PATH) - .endVolumeMount() - .addNewVolumeMount() - .withName(INIT_CONTAINER_SECRETS_VOLUME_NAME) - .withMountPath(INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH) - .endVolumeMount() - .addToVolumeMounts(sharedVolumeMounts: _*) - .addToArgs(INIT_CONTAINER_PROPERTIES_FILE_PATH) - .build()) - - // Make sure we don't override any user-provided init containers by just appending ours to - // the existing list. - val resolvedInitContainers = originalPodSpec - .editMetadata() - .getAnnotations - .asScala - .get(INIT_CONTAINER_ANNOTATION) - .map { existingInitContainerAnnotation => - val existingInitContainers = OBJECT_MAPPER.readValue( - existingInitContainerAnnotation, classOf[List[Container]]) - existingInitContainers ++ initContainers - }.getOrElse(initContainers) - val resolvedSerializedInitContainers = OBJECT_MAPPER.writeValueAsString(resolvedInitContainers) - originalPodSpec - .editMetadata() - .removeFromAnnotations(INIT_CONTAINER_ANNOTATION) - .addToAnnotations(INIT_CONTAINER_ANNOTATION, resolvedSerializedInitContainers) - .endMetadata() - .editSpec() - .addNewVolume() - .withName(INIT_CONTAINER_PROPERTIES_FILE_VOLUME) - .withNewConfigMap() - .withName(initContainerConfigMap.getMetadata.getName) - .addNewItem() - .withKey(INIT_CONTAINER_CONFIG_MAP_KEY) - .withPath(INIT_CONTAINER_PROPERTIES_FILE_NAME) - .endItem() - .endConfigMap() - .endVolume() - .addNewVolume() - .withName(DOWNLOAD_JARS_VOLUME_NAME) - .withEmptyDir(new EmptyDirVolumeSource()) - .endVolume() - .addNewVolume() - .withName(DOWNLOAD_FILES_VOLUME_NAME) - .withEmptyDir(new EmptyDirVolumeSource()) - .endVolume() - .addNewVolume() - .withName(INIT_CONTAINER_SECRETS_VOLUME_NAME) - .withNewSecret() - .withSecretName(initContainerSecret.getMetadata.getName) - .endSecret() - .endVolume() - .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainerName)) - .addToVolumeMounts(sharedVolumeMounts: _*) - .addNewEnv() - .withName(ENV_UPLOADED_JARS_DIR) - .withValue(jarsDownloadPath) - .endEnv() - .endContainer() - .endSpec() - } - - override def buildInitContainerSecret(jarsSecret: String, filesSecret: String): Secret = { - val trustStoreBase64 = stagingServiceSslOptions.trustStore.map { trustStoreFile => - require(trustStoreFile.isFile, "Dependency server trustStore provided at" + - trustStoreFile.getAbsolutePath + " does not exist or is not a file.") - (INIT_CONTAINER_TRUSTSTORE_SECRET_KEY, - BaseEncoding.base64().encode(Files.toByteArray(trustStoreFile))) - }.toMap - val jarsSecretBase64 = BaseEncoding.base64().encode(jarsSecret.getBytes(Charsets.UTF_8)) - val filesSecretBase64 = BaseEncoding.base64().encode(filesSecret.getBytes(Charsets.UTF_8)) - val secretData = Map( - INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY -> jarsSecretBase64, - INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY -> filesSecretBase64) ++ - trustStoreBase64 - val kubernetesSecret = new SecretBuilder() - .withNewMetadata() - .withName(s"$kubernetesAppId-spark-init") - .endMetadata() - .addToData(secretData.asJava) - .build() - kubernetesSecret - } - - override def buildInitContainerConfigMap( - jarsResourceId: String, filesResourceId: String): ConfigMap = { - val initContainerProperties = new Properties() - initContainerProperties.setProperty(RESOURCE_STAGING_SERVER_URI.key, stagingServerUri) - initContainerProperties.setProperty(DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION.key, jarsDownloadPath) - initContainerProperties.setProperty(DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION.key, filesDownloadPath) - initContainerProperties.setProperty( - INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key, jarsResourceId) - initContainerProperties.setProperty( - INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key, INIT_CONTAINER_DOWNLOAD_JARS_SECRET_PATH) - initContainerProperties.setProperty( - INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key, filesResourceId) - initContainerProperties.setProperty( - INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key, INIT_CONTAINER_DOWNLOAD_FILES_SECRET_PATH) - initContainerProperties.setProperty(DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT.key, - s"${downloadTimeoutMinutes}m") - stagingServiceSslOptions.trustStore.foreach { _ => - initContainerProperties.setProperty(RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key, - INIT_CONTAINER_TRUSTSTORE_PATH) - } - initContainerProperties.setProperty(RESOURCE_STAGING_SERVER_SSL_ENABLED.key, - stagingServiceSslOptions.enabled.toString) - stagingServiceSslOptions.trustStorePassword.foreach { password => - initContainerProperties.setProperty(RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key, password) - } - stagingServiceSslOptions.trustStoreType.foreach { storeType => - initContainerProperties.setProperty(RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key, storeType) - } - val propertiesWriter = new StringWriter() - initContainerProperties.store(propertiesWriter, "Init-container properties.") - new ConfigMapBuilder() - .withNewMetadata() - .withName(s"$kubernetesAppId-init-properties") - .endMetadata() - .addToData(INIT_CONTAINER_CONFIG_MAP_KEY, propertiesWriter.toString) - .build() - } - - override def resolveSparkJars(): Seq[String] = resolveLocalFiles(sparkJars, jarsDownloadPath) - - override def resolveSparkFiles(): Seq[String] = resolveLocalFiles(sparkFiles, filesDownloadPath) - - private def resolveLocalFiles( - allFileUriStrings: Seq[String], localDownloadRoot: String): Seq[String] = { - val usedLocalFileNames = mutable.HashSet.empty[String] - val resolvedFiles = mutable.Buffer.empty[String] - for (fileUriString <- allFileUriStrings) { - val fileUri = Utils.resolveURI(fileUriString) - val resolvedFile = Option(fileUri.getScheme).getOrElse("file") match { - case "file" => - // Deduplication logic matches that of CompressionUtils#writeTarGzipToStream - val file = new File(fileUri.getPath) - val extension = Files.getFileExtension(file.getName) - val nameWithoutExtension = Files.getNameWithoutExtension(file.getName) - var resolvedFileName = file.getName - var deduplicationCounter = 1 - while (usedLocalFileNames.contains(resolvedFileName)) { - resolvedFileName = s"$nameWithoutExtension-$deduplicationCounter.$extension" - deduplicationCounter += 1 - } - s"file://$localDownloadRoot/$resolvedFileName" - case _ => fileUriString - } - resolvedFiles += resolvedFile - } - resolvedFiles - } - - private def getTypedResponseResult[T](call: Call[T]): T = { - val response = call.execute() - if (response.code() < 200 || response.code() >= 300) { - throw new SparkException("Unexpected response from dependency server when uploading" + - s" dependencies: ${response.code()}. Error body: " + - Option(response.errorBody()).map(_.string()).getOrElse("N/A")) - } - response.body() - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerProvider.scala deleted file mode 100644 index 8f09112132b2c..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerProvider.scala +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit.v2 - -import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf} -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.rest.kubernetes.v2.RetrofitClientFactoryImpl - -private[spark] trait MountedDependencyManagerProvider { - def getMountedDependencyManager( - kubernetesAppId: String, - stagingServerUri: String, - podLabels: Map[String, String], - podNamespace: String, - sparkJars: Seq[String], - sparkFiles: Seq[String]): MountedDependencyManager -} - -private[spark] class MountedDependencyManagerProviderImpl(sparkConf: SparkConf) - extends MountedDependencyManagerProvider { - override def getMountedDependencyManager( - kubernetesAppId: String, - stagingServerUri: String, - podLabels: Map[String, String], - podNamespace: String, - sparkJars: Seq[String], - sparkFiles: Seq[String]): MountedDependencyManager = { - val resourceStagingServerSslOptions = new SparkSecurityManager(sparkConf) - .getSSLOptions("kubernetes.resourceStagingServer") - new MountedDependencyManagerImpl( - kubernetesAppId, - podLabels, - podNamespace, - stagingServerUri, - sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE), - sparkConf.get(DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION), - sparkConf.get(DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION), - sparkConf.get(DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT), - sparkJars, - sparkFiles, - resourceStagingServerSslOptions, - RetrofitClientFactoryImpl) - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/PropertiesConfigMapFromScalaMapBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/PropertiesConfigMapFromScalaMapBuilder.scala new file mode 100644 index 0000000000000..cb9194552d2b6 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/PropertiesConfigMapFromScalaMapBuilder.scala @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.StringWriter +import java.util.Properties + +import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder} + +/** + * Creates a config map from a map object, with a single given key + * and writing the map in a {@link java.util.Properties} format. + */ +private[spark] object PropertiesConfigMapFromScalaMapBuilder { + + def buildConfigMap( + configMapName: String, + configMapKey: String, + config: Map[String, String]): ConfigMap = { + val properties = new Properties() + config.foreach { case (key, value) => properties.setProperty(key, value) } + val propertiesWriter = new StringWriter() + properties.store(propertiesWriter, + s"Java properties built from Kubernetes config map with name: $configMapName" + + " and config map key: $configMapKey") + new ConfigMapBuilder() + .withNewMetadata() + .withName(configMapName) + .endMetadata() + .addToData(configMapKey, propertiesWriter.toString) + .build() + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilder.scala new file mode 100644 index 0000000000000..362fbbdf517dc --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilder.scala @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import io.fabric8.kubernetes.api.model.ConfigMap + +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils + +private[spark] trait SparkInitContainerConfigMapBuilder { + /** + * Construct a config map that an init-container should reference for fetching + * remote dependencies. The config map includes the remote jars and files to download, + * as well as details to fetch files from a resource staging server, if applicable. + */ + def build(): ConfigMap +} + +private[spark] class SparkInitContainerConfigMapBuilderImpl( + sparkJars: Seq[String], + sparkFiles: Seq[String], + jarsDownloadPath: String, + filesDownloadPath: String, + configMapName: String, + configMapKey: String, + submittedDependenciesPlugin: Option[SubmittedDependencyInitContainerConfigPlugin]) + extends SparkInitContainerConfigMapBuilder { + + override def build(): ConfigMap = { + val remoteJarsToDownload = KubernetesFileUtils.getOnlyRemoteFiles(sparkJars) + val remoteFilesToDownload = KubernetesFileUtils.getOnlyRemoteFiles(sparkFiles) + val remoteJarsConf = if (remoteJarsToDownload.nonEmpty) { + Map(INIT_CONTAINER_REMOTE_JARS.key -> remoteJarsToDownload.mkString(",")) + } else { + Map.empty[String, String] + } + val remoteFilesConf = if (remoteFilesToDownload.nonEmpty) { + Map(INIT_CONTAINER_REMOTE_FILES.key -> remoteFilesToDownload.mkString(",")) + } else { + Map.empty[String, String] + } + val baseInitContainerConfig = Map[String, String]( + INIT_CONTAINER_JARS_DOWNLOAD_LOCATION.key -> jarsDownloadPath, + INIT_CONTAINER_FILES_DOWNLOAD_LOCATION.key -> filesDownloadPath) ++ + remoteJarsConf ++ + remoteFilesConf + val submittedDependenciesConfig = submittedDependenciesPlugin.map { plugin => + plugin.configurationsToFetchSubmittedDependencies() + }.toSeq.flatten.toMap + PropertiesConfigMapFromScalaMapBuilder.buildConfigMap( + configMapName, + configMapKey, + baseInitContainerConfig ++ submittedDependenciesConfig) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala new file mode 100644 index 0000000000000..bc9abc4eaba81 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import org.apache.spark.SSLOptions +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ + +private[spark] trait SubmittedDependencyInitContainerConfigPlugin { + /** + * Obtain configuration to fetch submitted dependencies from a resource staging server. + * This includes the resource identifiers for the jar and file bundles, as well as the + * remote location of the resource staging server, and the location of secret files for + * authenticating to the resource staging server. Note that the secret file paths here need to + * line up with the locations the secrets are mounted by + * SubmittedDependencyInitContainerVolumesPlugin; constants provide the consistency and + * convention for these to line up. + */ + def configurationsToFetchSubmittedDependencies(): Map[String, String] +} + +private[spark] class SubmittedDependencyInitContainerConfigPluginImpl( + resourceStagingServerUri: String, + jarsResourceId: String, + filesResourceId: String, + jarsSecretKey: String, + filesSecretKey: String, + trustStoreSecretKey: String, + secretsVolumeMountPath: String, + resourceStagingServiceSslOptions: SSLOptions) + extends SubmittedDependencyInitContainerConfigPlugin { + + override def configurationsToFetchSubmittedDependencies(): Map[String, String] = { + Map[String, String]( + RESOURCE_STAGING_SERVER_URI.key -> resourceStagingServerUri, + INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> jarsResourceId, + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> + s"$secretsVolumeMountPath/$jarsSecretKey", + INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> filesResourceId, + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> + s"$secretsVolumeMountPath/$filesSecretKey", + RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> + resourceStagingServiceSslOptions.enabled.toString) ++ + resourceStagingServiceSslOptions.trustStore.map { _ => + (RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key, + s"$secretsVolumeMountPath/$trustStoreSecretKey") + }.toMap ++ + resourceStagingServiceSslOptions.trustStorePassword.map { password => + (RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key, password) + }.toMap ++ + resourceStagingServiceSslOptions.trustStoreType.map { storeType => + (RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key, storeType) + }.toMap + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala new file mode 100644 index 0000000000000..1853b2ecce6d2 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import com.google.common.base.Charsets +import com.google.common.io.{BaseEncoding, Files} +import io.fabric8.kubernetes.api.model.{Secret, SecretBuilder} +import scala.collection.JavaConverters._ + +import org.apache.spark.SSLOptions +import org.apache.spark.deploy.kubernetes.constants._ + +private[spark] trait SubmittedDependencySecretBuilder { + /** + * Construct a Kubernetes secret bundle that init-containers can use to retrieve an + * application's dependencies. + */ + def build(): Secret +} + +private[spark] class SubmittedDependencySecretBuilderImpl( + secretName: String, + jarsResourceSecret: String, + filesResourceSecret: String, + jarsSecretKey: String, + filesSecretKey: String, + trustStoreSecretKey: String, + resourceStagingServerSslOptions: SSLOptions) + extends SubmittedDependencySecretBuilder { + + override def build(): Secret = { + val trustStoreBase64 = resourceStagingServerSslOptions.trustStore.map { trustStoreFile => + require(trustStoreFile.isFile, "Dependency server trustStore provided at" + + trustStoreFile.getAbsolutePath + " does not exist or is not a file.") + (trustStoreSecretKey, BaseEncoding.base64().encode(Files.toByteArray(trustStoreFile))) + }.toMap + val jarsSecretBase64 = BaseEncoding.base64().encode(jarsResourceSecret.getBytes(Charsets.UTF_8)) + val filesSecretBase64 = BaseEncoding.base64().encode( + filesResourceSecret.getBytes(Charsets.UTF_8)) + val secretData = Map( + jarsSecretKey -> jarsSecretBase64, + filesSecretKey -> filesSecretBase64) ++ + trustStoreBase64 + val kubernetesSecret = new SecretBuilder() + .withNewMetadata() + .withName(secretName) + .endMetadata() + .addToData(secretData.asJava) + .build() + kubernetesSecret + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala new file mode 100644 index 0000000000000..f22759d463cb7 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.{File, FileOutputStream} +import javax.ws.rs.core.MediaType + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import okhttp3.RequestBody +import retrofit2.Call + +import org.apache.spark.{SparkException, SSLOptions} +import org.apache.spark.deploy.kubernetes.CompressionUtils +import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils +import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials +import org.apache.spark.deploy.rest.kubernetes.v2.{ResourceStagingServiceRetrofit, RetrofitClientFactory} +import org.apache.spark.util.Utils + +private[spark] trait SubmittedDependencyUploader { + /** + * Upload submitter-local jars to the resource staging server. + * @return The resource ID and secret to use to retrieve these jars. + */ + def uploadJars(): SubmittedResourceIdAndSecret + + /** + * Upload submitter-local files to the resource staging server. + * @return The resource ID and secret to use to retrieve these files. + */ + def uploadFiles(): SubmittedResourceIdAndSecret +} + +/** + * Default implementation of a SubmittedDependencyManager that is backed by a + * Resource Staging Service. + */ +private[spark] class SubmittedDependencyUploaderImpl( + kubernetesAppId: String, + podLabels: Map[String, String], + podNamespace: String, + stagingServerUri: String, + sparkJars: Seq[String], + sparkFiles: Seq[String], + stagingServiceSslOptions: SSLOptions, + retrofitClientFactory: RetrofitClientFactory) extends SubmittedDependencyUploader { + private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + + private def localUriStringsToFiles(uris: Seq[String]): Iterable[File] = { + KubernetesFileUtils.getOnlySubmitterLocalFiles(uris) + .map(Utils.resolveURI) + .map(uri => new File(uri.getPath)) + } + private def localJars: Iterable[File] = localUriStringsToFiles(sparkJars) + private def localFiles: Iterable[File] = localUriStringsToFiles(sparkFiles) + + override def uploadJars(): SubmittedResourceIdAndSecret = doUpload(localJars, "uploaded-jars") + override def uploadFiles(): SubmittedResourceIdAndSecret = doUpload(localFiles, "uploaded-files") + + private def doUpload(files: Iterable[File], fileNamePrefix: String) + : SubmittedResourceIdAndSecret = { + val filesDir = Utils.createTempDir(namePrefix = fileNamePrefix) + val filesTgz = new File(filesDir, s"$fileNamePrefix.tgz") + Utils.tryWithResource(new FileOutputStream(filesTgz)) { filesOutputStream => + CompressionUtils.writeTarGzipToStream(filesOutputStream, files.map(_.getAbsolutePath)) + } + // TODO provide credentials properly when the staging server monitors the Kubernetes API. + val kubernetesCredentialsString = OBJECT_MAPPER.writer() + .writeValueAsString(KubernetesCredentials(None, None, None, None)) + val labelsAsString = OBJECT_MAPPER.writer().writeValueAsString(podLabels) + + val filesRequestBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.MULTIPART_FORM_DATA), filesTgz) + + val kubernetesCredentialsBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), kubernetesCredentialsString) + + val namespaceRequestBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.TEXT_PLAIN), podNamespace) + + val labelsRequestBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), labelsAsString) + + val service = retrofitClientFactory.createRetrofitClient( + stagingServerUri, + classOf[ResourceStagingServiceRetrofit], + stagingServiceSslOptions) + val uploadResponse = service.uploadResources( + labelsRequestBody, namespaceRequestBody, filesRequestBody, kubernetesCredentialsBody) + getTypedResponseResult(uploadResponse) + } + + private def getTypedResponseResult[T](call: Call[T]): T = { + val response = call.execute() + if (response.code() < 200 || response.code() >= 300) { + throw new SparkException("Unexpected response from dependency server when uploading" + + s" dependencies: ${response.code()}. Error body: " + + Option(response.errorBody()).map(_.string()).getOrElse("N/A")) + } + response.body() + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/StagedResourceIdentifier.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedResources.scala similarity index 51% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/StagedResourceIdentifier.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedResources.scala index 65bc9bc17dae9..f4e5e991180ce 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/StagedResourceIdentifier.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedResources.scala @@ -14,6 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.kubernetes.submit.v2 -case class StagedResourceIdentifier(resourceId: String, resourceSecret: String) +case class SubmittedResourceIdAndSecret(resourceId: String, resourceSecret: String) + +case class SubmittedResources( + jarsResourceIdAndSecret: SubmittedResourceIdAndSecret, + filesResourceIdAndSecret: SubmittedResourceIdAndSecret) { + def ids(): SubmittedResourceIds = SubmittedResourceIds( + jarsResourceIdAndSecret.resourceId, filesResourceIdAndSecret.resourceId) + def secrets(): SubmittedResourceSecrets = SubmittedResourceSecrets( + jarsResourceIdAndSecret.resourceSecret, filesResourceIdAndSecret.resourceSecret) +} + +case class SubmittedResourceIds(jarsResourceId: String, filesResourceId: String) + +case class SubmittedResourceSecrets(jarsResourceSecret: String, filesResourceSecret: String) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala index ca05fe767146b..7847ba2546594 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala @@ -35,6 +35,7 @@ import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkCo import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.kubernetes.CompressionUtils import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils import org.apache.spark.deploy.rest._ import org.apache.spark.internal.config.OptionalConfigEntry import org.apache.spark.util.{ShutdownHookManager, ThreadUtils, Utils} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala index 680d305985cc0..67caa176930ea 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala @@ -25,12 +25,15 @@ import com.google.common.io.Files import com.google.common.util.concurrent.SettableFuture import okhttp3.ResponseBody import retrofit2.{Call, Callback, Response} +import scala.concurrent.{ExecutionContext, Future} +import scala.concurrent.duration.Duration -import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException} +import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf} +import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.CompressionUtils import org.apache.spark.internal.Logging -import org.apache.spark.util.Utils +import org.apache.spark.util.{ThreadUtils, Utils} private trait WaitableCallback[T] extends Callback[T] { private val complete = SettableFuture.create[Boolean] @@ -61,55 +64,149 @@ private class DownloadTarGzCallback(downloadDir: File) extends WaitableCallback[ } } +// Extracted for testing so that unit tests don't have to depend on Utils.fetchFile +private[v2] trait FileFetcher { + def fetchFile(uri: String, targetDir: File): Unit +} + +private class FileFetcherImpl(sparkConf: SparkConf, securityManager: SparkSecurityManager) + extends FileFetcher { + def fetchFile(uri: String, targetDir: File): Unit = { + Utils.fetchFile( + url = uri, + targetDir = targetDir, + conf = sparkConf, + securityMgr = securityManager, + hadoopConf = SparkHadoopUtil.get.newConfiguration(sparkConf), + timestamp = System.currentTimeMillis(), + useCache = false) + } +} + +/** + * Process that fetches files from a resource staging server and/or arbitrary remote locations. + * + * The init-container can handle fetching files from any of those sources, but not all of the + * sources need to be specified. This allows for composing multiple instances of this container + * with different configurations for different download sources, or using the same container to + * download everything at once. + */ private[spark] class KubernetesSparkDependencyDownloadInitContainer( - sparkConf: SparkConf, retrofitClientFactory: RetrofitClientFactory) extends Logging { + sparkConf: SparkConf, + retrofitClientFactory: RetrofitClientFactory, + fileFetcher: FileFetcher, + securityManager: SparkSecurityManager) extends Logging { - private val resourceStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) - .getOrElse(throw new SparkException("No dependency server URI was provided.")) + private implicit val downloadExecutor = ExecutionContext.fromExecutorService( + ThreadUtils.newDaemonCachedThreadPool("download-executor")) + private val maybeResourceStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) - private val downloadJarsResourceIdentifier = sparkConf + private val maybeDownloadJarsResourceIdentifier = sparkConf .get(INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER) - .getOrElse(throw new SparkException("No resource identifier provided for jars.")) private val downloadJarsSecretLocation = new File( sparkConf.get(INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION)) - private val downloadFilesResourceIdentifier = sparkConf + private val maybeDownloadFilesResourceIdentifier = sparkConf .get(INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER) - .getOrElse(throw new SparkException("No resource identifier provided for files.")) private val downloadFilesSecretLocation = new File( sparkConf.get(INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION)) - require(downloadJarsSecretLocation.isFile, "Application jars download secret provided" + - s" at ${downloadJarsSecretLocation.getAbsolutePath} does not exist or is not a file.") - require(downloadFilesSecretLocation.isFile, "Application files download secret provided" + - s" at ${downloadFilesSecretLocation.getAbsolutePath} does not exist or is not a file.") - private val jarsDownloadDir = new File(sparkConf.get(DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION)) - require(jarsDownloadDir.isDirectory, "Application jars download directory provided at" + - s" ${jarsDownloadDir.getAbsolutePath} does not exist or is not a directory.") + private val jarsDownloadDir = new File( + sparkConf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION)) + private val filesDownloadDir = new File( + sparkConf.get(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION)) + + private val remoteJars = sparkConf.get(INIT_CONTAINER_REMOTE_JARS) + private val remoteFiles = sparkConf.get(INIT_CONTAINER_REMOTE_FILES) - private val filesDownloadDir = new File(sparkConf.get(DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION)) - require(filesDownloadDir.isDirectory, "Application files download directory provided at" + - s" ${filesDownloadDir.getAbsolutePath} does not exist or is not a directory.") - private val downloadTimeoutMinutes = sparkConf.get(DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT) + private val downloadTimeoutMinutes = sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT) def run(): Unit = { - val securityManager = new SparkSecurityManager(sparkConf) - val sslOptions = securityManager.getSSLOptions("kubernetes.resourceStagingServer") - val service = retrofitClientFactory.createRetrofitClient( - resourceStagingServerUri, classOf[ResourceStagingServiceRetrofit], sslOptions) - val jarsSecret = Files.toString(downloadJarsSecretLocation, Charsets.UTF_8) - val filesSecret = Files.toString(downloadFilesSecretLocation, Charsets.UTF_8) - val downloadJarsCallback = new DownloadTarGzCallback(jarsDownloadDir) - val downloadFilesCallback = new DownloadTarGzCallback(filesDownloadDir) - service.downloadResources(downloadJarsResourceIdentifier, jarsSecret) - .enqueue(downloadJarsCallback) - service.downloadResources(downloadFilesResourceIdentifier, filesSecret) - .enqueue(downloadFilesCallback) - logInfo("Waiting to download jars...") - downloadJarsCallback.waitForCompletion(downloadTimeoutMinutes, TimeUnit.MINUTES) - logInfo(s"Jars downloaded to ${jarsDownloadDir.getAbsolutePath}") - logInfo("Waiting to download files...") - downloadFilesCallback.waitForCompletion(downloadTimeoutMinutes, TimeUnit.MINUTES) - logInfo(s"Files downloaded to ${filesDownloadDir.getAbsolutePath}") + val resourceStagingServerJarsDownload = Future[Unit] { + downloadResourcesFromStagingServer( + maybeDownloadJarsResourceIdentifier, + downloadJarsSecretLocation, + jarsDownloadDir, + "Starting to download jars from resource staging server...", + "Finished downloading jars from resource staging server.", + s"Application jars download secret provided at" + + s" ${downloadJarsSecretLocation.getAbsolutePath} does not exist or is not a file.", + s"Application jars download directory provided at" + + s" ${jarsDownloadDir.getAbsolutePath} does not exist or is not a directory.") + } + val resourceStagingServerFilesDownload = Future[Unit] { + downloadResourcesFromStagingServer( + maybeDownloadFilesResourceIdentifier, + downloadFilesSecretLocation, + filesDownloadDir, + "Starting to download files from resource staging server...", + "Finished downloading files from resource staging server.", + s"Application files download secret provided at" + + s" ${downloadFilesSecretLocation.getAbsolutePath} does not exist or is not a file.", + s"Application files download directory provided at" + + s" ${filesDownloadDir.getAbsolutePath} does not exist or is not" + + s" a directory.") + } + val remoteJarsDownload = Future[Unit] { + downloadFiles(remoteJars, + jarsDownloadDir, + s"Remote jars download directory specified at $jarsDownloadDir does not exist" + + s" or is not a directory.") + } + val remoteFilesDownload = Future[Unit] { + downloadFiles(remoteFiles, + filesDownloadDir, + s"Remote files download directory specified at $filesDownloadDir does not exist" + + s" or is not a directory.") + } + waitForFutures( + resourceStagingServerJarsDownload, + resourceStagingServerFilesDownload, + remoteJarsDownload, + remoteFilesDownload) + } + + private def downloadResourcesFromStagingServer( + maybeResourceId: Option[String], + resourceSecretLocation: File, + resourceDownloadDir: File, + downloadStartMessage: String, + downloadFinishedMessage: String, + errMessageOnSecretNotAFile: String, + errMessageOnDownloadDirNotADirectory: String): Unit = { + maybeResourceStagingServerUri.foreach { resourceStagingServerUri => + maybeResourceId.foreach { resourceId => + require(resourceSecretLocation.isFile, errMessageOnSecretNotAFile) + require(resourceDownloadDir.isDirectory, errMessageOnDownloadDirNotADirectory) + val sslOptions = securityManager.getSSLOptions("kubernetes.resourceStagingServer") + val service = retrofitClientFactory.createRetrofitClient( + resourceStagingServerUri, classOf[ResourceStagingServiceRetrofit], sslOptions) + val resourceSecret = Files.toString(resourceSecretLocation, Charsets.UTF_8) + val downloadResourceCallback = new DownloadTarGzCallback(resourceDownloadDir) + logInfo(downloadStartMessage) + service.downloadResources(resourceId, resourceSecret) + .enqueue(downloadResourceCallback) + downloadResourceCallback.waitForCompletion(downloadTimeoutMinutes, TimeUnit.MINUTES) + logInfo(downloadFinishedMessage) + } + } + } + + private def downloadFiles( + filesCommaSeparated: Option[String], + downloadDir: File, + errMessageOnDestinationNotADirectory: String): Unit = { + if (filesCommaSeparated.isDefined) { + require(downloadDir.isDirectory, errMessageOnDestinationNotADirectory) + } + filesCommaSeparated.map(_.split(",")).toSeq.flatten.foreach { file => + fileFetcher.fetchFile(file, downloadDir) + } + } + + private def waitForFutures(futures: Future[_]*) { + futures.foreach { + ThreadUtils.awaitResult(_, Duration.create(downloadTimeoutMinutes, TimeUnit.MINUTES)) + } } } @@ -121,7 +218,13 @@ object KubernetesSparkDependencyDownloadInitContainer extends Logging { } else { new SparkConf(true) } - new KubernetesSparkDependencyDownloadInitContainer(sparkConf, RetrofitClientFactoryImpl).run() + val securityManager = new SparkSecurityManager(sparkConf) + val fileFetcher = new FileFetcherImpl(sparkConf, securityManager) + new KubernetesSparkDependencyDownloadInitContainer( + sparkConf, + RetrofitClientFactoryImpl, + fileFetcher, + securityManager).run() logInfo("Finished downloading application dependencies.") } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala index 844809dec995c..b7c6c4fb913da 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala @@ -22,6 +22,7 @@ import javax.ws.rs.core.{MediaType, StreamingOutput} import org.glassfish.jersey.media.multipart.FormDataParam +import org.apache.spark.deploy.kubernetes.submit.v2.SubmittedResourceIdAndSecret import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials /** @@ -69,7 +70,7 @@ private[spark] trait ResourceStagingService { @FormDataParam("podNamespace") podNamespace: String, @FormDataParam("resources") resources: InputStream, @FormDataParam("kubernetesCredentials") kubernetesCredentials: KubernetesCredentials) - : StagedResourceIdentifier + : SubmittedResourceIdAndSecret /** * Download an application's resources. The resources are provided as a stream, where the stream's diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala index cf6180fbf53d4..3dfa83c85e6dd 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala @@ -26,6 +26,7 @@ import com.google.common.io.{BaseEncoding, ByteStreams, Files} import scala.collection.concurrent.TrieMap import org.apache.spark.SparkException +import org.apache.spark.deploy.kubernetes.submit.v2.SubmittedResourceIdAndSecret import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials import org.apache.spark.internal.Logging import org.apache.spark.util.Utils @@ -41,7 +42,7 @@ private[spark] class ResourceStagingServiceImpl(dependenciesRootDir: File) podLabels: Map[String, String], podNamespace: String, resources: InputStream, - kubernetesCredentials: KubernetesCredentials): StagedResourceIdentifier = { + kubernetesCredentials: KubernetesCredentials): SubmittedResourceIdAndSecret = { val resourceId = UUID.randomUUID().toString val secretBytes = new Array[Byte](1024) SECURE_RANDOM.nextBytes(secretBytes) @@ -65,7 +66,7 @@ private[spark] class ResourceStagingServiceImpl(dependenciesRootDir: File) podNamespace, resourcesTgz, kubernetesCredentials) - StagedResourceIdentifier(resourceId, resourceSecret) + SubmittedResourceIdAndSecret(resourceId, resourceSecret) } catch { case e: Throwable => if (!resourcesDir.delete()) { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala index b1a3cc0676757..e0079a372f0d9 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala @@ -20,6 +20,8 @@ import okhttp3.{RequestBody, ResponseBody} import retrofit2.Call import retrofit2.http.{Multipart, Path, Streaming} +import org.apache.spark.deploy.kubernetes.submit.v2.SubmittedResourceIdAndSecret + /** * Retrofit-compatible variant of {@link ResourceStagingService}. For documentation on * how to use this service, see the aforementioned JAX-RS based interface. @@ -33,7 +35,7 @@ private[spark] trait ResourceStagingServiceRetrofit { @retrofit2.http.Part("podNamespace") podNamespace: RequestBody, @retrofit2.http.Part("resources") resources: RequestBody, @retrofit2.http.Part("kubernetesCredentials") - kubernetesCredentials: RequestBody): Call[StagedResourceIdentifier] + kubernetesCredentials: RequestBody): Call[SubmittedResourceIdAndSecret] @Streaming @retrofit2.http.GET("/api/v0/resources/{resourceId}") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala index 70098f1f46ac0..e2630b9918b61 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala @@ -17,9 +17,12 @@ package org.apache.spark.scheduler.cluster.kubernetes import org.apache.spark.SparkContext +import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, SparkPodInitContainerBootstrapImpl} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.internal.Logging import org.apache.spark.scheduler.{ExternalClusterManager, SchedulerBackend, TaskScheduler, TaskSchedulerImpl} -private[spark] class KubernetesClusterManager extends ExternalClusterManager { +private[spark] class KubernetesClusterManager extends ExternalClusterManager with Logging { override def canCreate(masterURL: String): Boolean = masterURL.startsWith("k8s") @@ -31,7 +34,49 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager { override def createSchedulerBackend(sc: SparkContext, masterURL: String, scheduler: TaskScheduler) : SchedulerBackend = { - new KubernetesClusterSchedulerBackend(sc.taskScheduler.asInstanceOf[TaskSchedulerImpl], sc) + val sparkConf = sc.getConf + val maybeConfigMap = sparkConf.get(EXECUTOR_INIT_CONTAINER_CONFIG_MAP) + val maybeConfigMapKey = sparkConf.get(EXECUTOR_INIT_CONTAINER_CONFIG_MAP_KEY) + + val maybeExecutorInitContainerSecretName = + sparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET) + val maybeExecutorInitContainerSecretMount = + sparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET_MOUNT_DIR) + val executorInitContainerSecretVolumePlugin = for { + initContainerSecretName <- maybeExecutorInitContainerSecretName + initContainerSecretMountPath <- maybeExecutorInitContainerSecretMount + } yield { + new InitContainerResourceStagingServerSecretPluginImpl( + initContainerSecretName, + initContainerSecretMountPath) + } + // Only set up the bootstrap if they've provided both the config map key and the config map + // name. Note that we generally expect both to have been set from spark-submit V2, but for + // testing developers may simply run the driver JVM locally, but the config map won't be set + // then. + val bootStrap = for { + configMap <- maybeConfigMap + configMapKey <- maybeConfigMapKey + } yield { + new SparkPodInitContainerBootstrapImpl( + sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE), + sparkConf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION), + sparkConf.get(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION), + sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT), + configMap, + configMapKey, + executorInitContainerSecretVolumePlugin) + } + if (maybeConfigMap.isEmpty) { + logWarning("The executor's init-container config map was not specified. Executors will" + + " therefore not attempt to fetch remote or submitted dependencies.") + } + if (maybeConfigMapKey.isEmpty) { + logWarning("The executor's init-container config map key was not specified. Executors will" + + " therefore not attempt to fetch remote or submitted dependencies.") + } + new KubernetesClusterSchedulerBackend( + sc.taskScheduler.asInstanceOf[TaskSchedulerImpl], sc, bootStrap) } override def initialize(scheduler: TaskScheduler, backend: SchedulerBackend): Unit = { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 669a073b1fab6..0dd875b307a6d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -20,17 +20,16 @@ import java.io.Closeable import java.util.concurrent.TimeUnit import java.util.concurrent.atomic.{AtomicInteger, AtomicLong, AtomicReference} -import scala.collection.JavaConverters._ -import scala.collection.mutable -import scala.concurrent.{ExecutionContext, Future} - -import io.fabric8.kubernetes.api.model._ +import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, EnvVarSourceBuilder, Pod, PodBuilder, QuantityBuilder} import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action import org.apache.commons.io.FilenameUtils +import scala.collection.JavaConverters._ +import scala.collection.mutable +import scala.concurrent.{ExecutionContext, Future} import org.apache.spark.{SparkContext, SparkEnv, SparkException} -import org.apache.spark.deploy.kubernetes.ConfigurationUtils +import org.apache.spark.deploy.kubernetes.{ConfigurationUtils, SparkPodInitContainerBootstrap} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.rpc.{RpcCallContext, RpcEndpointAddress, RpcEnv} @@ -41,7 +40,8 @@ import org.apache.spark.util.{ThreadUtils, Utils} private[spark] class KubernetesClusterSchedulerBackend( scheduler: TaskSchedulerImpl, - val sc: SparkContext) + val sc: SparkContext, + executorInitContainerBootstrap: Option[SparkPodInitContainerBootstrap]) extends CoarseGrainedSchedulerBackend(scheduler, sc.env.rpcEnv) { import KubernetesClusterSchedulerBackend._ @@ -52,6 +52,9 @@ private[spark] class KubernetesClusterSchedulerBackend( private val EXECUTOR_PODS_BY_IPS_LOCK = new Object private val executorPodsByIPs = new mutable.HashMap[String, Pod] // Indexed by executor IP addrs. + private val executorExtraClasspath = conf.get( + org.apache.spark.internal.config.EXECUTOR_CLASS_PATH) + private val executorJarsDownloadDir = conf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION) private var shufflePodCache: Option[ShufflePodCache] = None private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) private val kubernetesNamespace = conf.get(KUBERNETES_NAMESPACE) @@ -258,13 +261,20 @@ private[spark] class KubernetesClusterSchedulerBackend( val executorCpuQuantity = new QuantityBuilder(false) .withAmount(executorCores) .build() + val executorExtraClasspathEnv = executorExtraClasspath.map { cp => + new EnvVarBuilder() + .withName(ENV_EXECUTOR_EXTRA_CLASSPATH) + .withValue(cp) + .build() + } val requiredEnv = Seq( (ENV_EXECUTOR_PORT, executorPort.toString), (ENV_DRIVER_URL, driverUrl), (ENV_EXECUTOR_CORES, executorCores), (ENV_EXECUTOR_MEMORY, executorMemoryString), (ENV_APPLICATION_ID, applicationId()), - (ENV_EXECUTOR_ID, executorId)) + (ENV_EXECUTOR_ID, executorId), + (ENV_MOUNTED_CLASSPATH, s"$executorJarsDownloadDir/*")) .map(env => new EnvVarBuilder() .withName(env._1) .withValue(env._2) @@ -317,7 +327,7 @@ private[spark] class KubernetesClusterSchedulerBackend( .endContainer() .endSpec() - val resolvedPodBuilder = shuffleServiceConfig + val withMaybeShuffleConfigPodBuilder = shuffleServiceConfig .map { config => config.shuffleDirs.foldLeft(basePodBuilder) { (builder, dir) => builder @@ -337,9 +347,14 @@ private[spark] class KubernetesClusterSchedulerBackend( .endSpec() } }.getOrElse(basePodBuilder) + val resolvedExecutorPod = executorInitContainerBootstrap.map { bootstrap => + bootstrap.bootstrapInitContainerAndVolumes( + "executor", + withMaybeShuffleConfigPodBuilder) + }.getOrElse(withMaybeShuffleConfigPodBuilder) try { - (executorId, kubernetesClient.pods().create(resolvedPodBuilder.build())) + (executorId, kubernetesClient.pods.create(resolvedExecutorPod.build())) } catch { case throwable: Throwable => logError("Failed to allocate executor pod.", throwable) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala new file mode 100644 index 0000000000000..6db7d3ff2da53 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import com.fasterxml.jackson.databind.ObjectMapper +import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, Pod, PodBuilder} +import org.scalatest.BeforeAndAfter +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkFunSuite +import org.apache.spark.deploy.kubernetes.constants._ + +class SparkPodInitContainerBootstrapSuite extends SparkFunSuite with BeforeAndAfter { + private val OBJECT_MAPPER = new ObjectMapper() + private val INIT_CONTAINER_IMAGE = "spark-init:latest" + private val JARS_DOWNLOAD_PATH = "/var/data/spark-jars" + private val FILES_DOWNLOAD_PATH = "/var/data/spark-files" + private val DOWNLOAD_TIMEOUT_MINUTES = 5 + private val INIT_CONTAINER_CONFIG_MAP_NAME = "spark-init-config-map" + private val INIT_CONTAINER_CONFIG_MAP_KEY = "spark-init-config-map-key" + private val ADDED_SUBMITTED_DEPENDENCY_ENV = "ADDED_SUBMITTED_DEPENDENCY" + private val ADDED_SUBMITTED_DEPENDENCY_ANNOTATION = "added-submitted-dependencies" + private val MAIN_CONTAINER_NAME = "spark-main" + private val TRUE = "true" + + private val submittedDependencyPlugin = new InitContainerResourceStagingServerSecretPlugin { + override def addResourceStagingServerSecretVolumeToPod(basePod: PodBuilder) + : PodBuilder = { + basePod.editMetadata() + .addToAnnotations(ADDED_SUBMITTED_DEPENDENCY_ANNOTATION, TRUE) + .endMetadata() + } + + override def mountResourceStagingServerSecretIntoInitContainer(container: ContainerBuilder) + : ContainerBuilder = { + container + .addNewEnv() + .withName(ADDED_SUBMITTED_DEPENDENCY_ENV) + .withValue(TRUE) + .endEnv() + } + } + + test("Running without submitted dependencies adds init-container with volume mounts.") { + val bootstrappedPod = bootstrapPodWithoutSubmittedDependencies() + val podAnnotations = bootstrappedPod.getMetadata.getAnnotations.asScala + assert(podAnnotations.contains(INIT_CONTAINER_ANNOTATION)) + val initContainers = OBJECT_MAPPER.readValue( + podAnnotations(INIT_CONTAINER_ANNOTATION), classOf[Array[Container]]) + assert(initContainers.length === 1) + val initContainer = initContainers.head + val initContainerVolumeMounts = initContainer.getVolumeMounts.asScala.map { + mount => (mount.getName, mount.getMountPath) + }.toMap + val expectedInitContainerVolumeMounts = Map( + INIT_CONTAINER_PROPERTIES_FILE_VOLUME -> INIT_CONTAINER_PROPERTIES_FILE_DIR, + INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME -> JARS_DOWNLOAD_PATH, + INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME -> FILES_DOWNLOAD_PATH) + assert(initContainerVolumeMounts === expectedInitContainerVolumeMounts) + assert(initContainer.getName === "spark-init") + assert(initContainer.getImage === INIT_CONTAINER_IMAGE) + assert(initContainer.getImagePullPolicy === "IfNotPresent") + assert(initContainer.getArgs.asScala === List(INIT_CONTAINER_PROPERTIES_FILE_PATH)) + } + + test("Running without submitted dependencies adds volume mounts to main container.") { + val bootstrappedPod = bootstrapPodWithoutSubmittedDependencies() + val containers = bootstrappedPod.getSpec.getContainers.asScala + val mainContainer = containers.find(_.getName === MAIN_CONTAINER_NAME) + assert(mainContainer.isDefined) + val volumeMounts = mainContainer.map(_.getVolumeMounts.asScala).toSeq.flatten.map { + mount => (mount.getName, mount.getMountPath) + }.toMap + val expectedVolumeMounts = Map( + INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME -> JARS_DOWNLOAD_PATH, + INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME -> FILES_DOWNLOAD_PATH) + assert(volumeMounts === expectedVolumeMounts) + } + + test("Running without submitted dependencies adds volumes to the pod") { + val bootstrappedPod = bootstrapPodWithoutSubmittedDependencies() + val podVolumes = bootstrappedPod.getSpec.getVolumes.asScala + assert(podVolumes.size === 3) + assert(podVolumes.exists { volume => + volume.getName == INIT_CONTAINER_PROPERTIES_FILE_VOLUME && + Option(volume.getConfigMap).map { configMap => + configMap.getItems.asScala.map { + keyToPath => (keyToPath.getKey, keyToPath.getPath) + }.toMap + }.contains(Map(INIT_CONTAINER_CONFIG_MAP_KEY -> INIT_CONTAINER_PROPERTIES_FILE_NAME)) + }) + assert(podVolumes.exists { volume => + volume.getName == INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME && volume.getEmptyDir != null + }) + assert(podVolumes.exists { volume => + volume.getName == INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME && volume.getEmptyDir != null + }) + } + + test("Running with submitted dependencies modifies the init container with the plugin.") { + val bootstrappedPod = bootstrapPodWithSubmittedDependencies() + val podAnnotations = bootstrappedPod.getMetadata.getAnnotations.asScala + assert(podAnnotations(ADDED_SUBMITTED_DEPENDENCY_ANNOTATION) === TRUE) + val initContainers = OBJECT_MAPPER.readValue( + podAnnotations(INIT_CONTAINER_ANNOTATION), classOf[Array[Container]]) + assert(initContainers.length === 1) + val initContainer = initContainers.head + assert(initContainer.getEnv.asScala.exists { + env => env.getName === ADDED_SUBMITTED_DEPENDENCY_ENV && env.getValue === TRUE + }) + } + + private def bootstrapPodWithoutSubmittedDependencies(): Pod = { + val bootstrapUnderTest = new SparkPodInitContainerBootstrapImpl( + INIT_CONTAINER_IMAGE, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH, + DOWNLOAD_TIMEOUT_MINUTES, + INIT_CONTAINER_CONFIG_MAP_NAME, + INIT_CONTAINER_CONFIG_MAP_KEY, + None) + bootstrapUnderTest.bootstrapInitContainerAndVolumes( + MAIN_CONTAINER_NAME, basePod()).build() + } + + private def bootstrapPodWithSubmittedDependencies(): Pod = { + val bootstrapUnderTest = new SparkPodInitContainerBootstrapImpl( + INIT_CONTAINER_IMAGE, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH, + DOWNLOAD_TIMEOUT_MINUTES, + INIT_CONTAINER_CONFIG_MAP_NAME, + INIT_CONTAINER_CONFIG_MAP_KEY, + Some(submittedDependencyPlugin)) + bootstrapUnderTest.bootstrapInitContainerAndVolumes( + MAIN_CONTAINER_NAME, basePod()).build() + } + + private def basePod(): PodBuilder = { + new PodBuilder() + .withNewMetadata() + .withName("spark-pod") + .endMetadata() + .withNewSpec() + .addNewContainer() + .withName(MAIN_CONTAINER_NAME) + .endContainer() + .endSpec() + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SubmittedDependencyInitContainerVolumesPluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SubmittedDependencyInitContainerVolumesPluginSuite.scala new file mode 100644 index 0000000000000..473d369c8eca3 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SubmittedDependencyInitContainerVolumesPluginSuite.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder} +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkFunSuite +import org.apache.spark.deploy.kubernetes.constants._ + +class SubmittedDependencyInitContainerVolumesPluginSuite extends SparkFunSuite { + + private val SECRET_NAME = "secret" + private val SECRET_MOUNT_PATH = "/mnt/secrets" + private val plugin = new InitContainerResourceStagingServerSecretPluginImpl( + SECRET_NAME, SECRET_MOUNT_PATH) + + test("The init container should have the secret volume mount.") { + val baseInitContainer = new ContainerBuilder().withName("container") + val configuredInitContainer = plugin.mountResourceStagingServerSecretIntoInitContainer( + baseInitContainer).build() + val volumeMounts = configuredInitContainer.getVolumeMounts.asScala + assert(volumeMounts.size === 1) + assert(volumeMounts.exists { volumeMount => + volumeMount.getName === INIT_CONTAINER_SECRET_VOLUME_NAME && + volumeMount.getMountPath === SECRET_MOUNT_PATH + }) + } + + test("The pod should have the secret volume.") { + val basePod = new PodBuilder() + .withNewMetadata().withName("pod").endMetadata() + .withNewSpec() + .addNewContainer() + .withName("container") + .endContainer() + .endSpec() + val configuredPod = plugin.addResourceStagingServerSecretVolumeToPod(basePod).build() + val volumes = configuredPod.getSpec.getVolumes.asScala + assert(volumes.size === 1) + assert(volumes.exists { volume => + volume.getName === INIT_CONTAINER_SECRET_VOLUME_NAME && + Option(volume.getSecret).map(_.getSecretName).contains(SECRET_NAME) + }) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala index e6536fbaa6941..4dc1e2e44980a 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala @@ -18,311 +18,331 @@ package org.apache.spark.deploy.kubernetes.submit.v2 import java.io.File -import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, Container, DoneablePod, HasMetadata, Pod, PodBuilder, PodList, Secret, SecretBuilder} +import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, DoneablePod, HasMetadata, Pod, PodBuilder, PodList, Secret, SecretBuilder} import io.fabric8.kubernetes.client.KubernetesClient import io.fabric8.kubernetes.client.dsl.{MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource} import org.hamcrest.{BaseMatcher, Description} -import org.mockito.Matchers.{any, anyVararg, argThat, eq => mockitoEq, startsWith} -import org.mockito.Mockito.when +import org.mockito.{ArgumentCaptor, Mock, MockitoAnnotations} +import org.mockito.Matchers.{any, anyVararg, argThat, eq => mockitoEq} +import org.mockito.Mockito.{times, verify, when} import org.mockito.invocation.InvocationOnMock import org.mockito.stubbing.Answer import org.scalatest.BeforeAndAfter -import org.scalatest.mock.MockitoSugar._ import scala.collection.JavaConverters._ -import scala.reflect.ClassTag +import scala.collection.mutable import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.SparkPodInitContainerBootstrap import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.kubernetes.v2.StagedResourceIdentifier -import org.apache.spark.util.Utils class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { - private val MAIN_CLASS = "org.apache.spark.test.Main" - private val APP_ARGS = Array[String]("arg1", "arg2") - private val MAIN_APP_RESOURCE = "local:///app/jars/spark-main.jar" - private val APP_NAME = "spark-test-app" - private val STAGING_SERVER_URI = "http://localhost:9000" + private val JARS_RESOURCE = SubmittedResourceIdAndSecret("jarsId", "jarsSecret") + private val FILES_RESOURCE = SubmittedResourceIdAndSecret("filesId", "filesSecret") + private val SUBMITTED_RESOURCES = SubmittedResources(JARS_RESOURCE, FILES_RESOURCE) + private val BOOTSTRAPPED_POD_ANNOTATION = "bootstrapped" + private val TRUE = "true" + private val APP_NAME = "spark-test" + private val APP_ID = "spark-app-id" + private val CUSTOM_LABEL_KEY = "customLabel" + private val CUSTOM_LABEL_VALUE = "customLabelValue" + private val ALL_EXPECTED_LABELS = Map( + CUSTOM_LABEL_KEY -> CUSTOM_LABEL_VALUE, + SPARK_APP_ID_LABEL -> APP_ID, + SPARK_APP_NAME_LABEL -> APP_NAME) + private val CUSTOM_ANNOTATION_KEY = "customAnnotation" + private val CUSTOM_ANNOTATION_VALUE = "customAnnotationValue" + private val SECRET_NAME = "secret" + private val SECRET_KEY = "secret-key" + private val SECRET_DATA = "secret-data" + private val MAIN_CLASS = "org.apache.spark.examples.SparkPi" + private val APP_ARGS = Array("3", "20") private val SPARK_JARS = Seq( - "local:///app/jars/spark-helper.jar", "file:///var/data/spark-local-helper.jar") + "hdfs://localhost:9000/app/jars/jar1.jar", "file:///app/jars/jar2.jar") private val RESOLVED_SPARK_JARS = Seq( - "local:///app/jars/spark-helper.jar", - "file:///var/data/spark-downloaded/spark-local-helper.jar") + "hdfs://localhost:9000/app/jars/jar1.jar", "file:///var/data/spark-jars/jar2.jar") + private val RESOLVED_SPARK_REMOTE_AND_LOCAL_JARS = Seq( + "/var/data/spark-jars/jar1.jar", "/var/data/spark-jars/jar2.jar") private val SPARK_FILES = Seq( - "local:///app/files/spark-file.txt", "file:///var/data/spark-local-file.txt") + "hdfs://localhost:9000/app/files/file1.txt", "file:///app/files/file2.txt") private val RESOLVED_SPARK_FILES = Seq( - "local:///app/files/spark-file.txt", "file:///var/data/spark-downloaded/spark-local-file.txt") - private val DRIVER_EXTRA_CLASSPATH = "/app/jars/extra-jar1.jar:/app/jars/extra-jars2.jar" - private val DRIVER_DOCKER_IMAGE_VALUE = "spark-driver:latest" - private val DRIVER_MEMORY_OVERHEARD_MB = 128L - private val DRIVER_MEMORY_MB = 512L - private val NAMESPACE = "namespace" - private val DOWNLOAD_JARS_RESOURCE_IDENTIFIER = StagedResourceIdentifier("jarsId", "jarsSecret") - private val DOWNLOAD_FILES_RESOURCE_IDENTIFIER = StagedResourceIdentifier( - "filesId", "filesSecret") - private val MOUNTED_FILES_ANNOTATION_KEY = "mountedFiles" - - private var sparkConf: SparkConf = _ - private var submissionKubernetesClientProvider: SubmissionKubernetesClientProvider = _ - private var submissionKubernetesClient: KubernetesClient = _ - private type PODS = MixedOperation[Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]] - private type RESOURCES = NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable[ - HasMetadata, Boolean] - private var podOperations: PODS = _ - private var resourceListOperations: RESOURCES = _ - private var mountedDependencyManagerProvider: MountedDependencyManagerProvider = _ - private var mountedDependencyManager: MountedDependencyManager = _ - private var captureCreatedPodAnswer: SelfArgumentCapturingAnswer[Pod] = _ - private var captureCreatedResourcesAnswer: AllArgumentsCapturingAnswer[HasMetadata, RESOURCES] = _ + "hdfs://localhost:9000/app/files/file1.txt", "file:///var/data/spark-files/file2.txt") + private val INIT_CONTAINER_SECRET = new SecretBuilder() + .withNewMetadata() + .withName(SECRET_NAME) + .endMetadata() + .addToData(SECRET_KEY, SECRET_DATA) + .build() + private val CONFIG_MAP_NAME = "config-map" + private val CONFIG_MAP_KEY = "config-map-key" + private val CONFIG_MAP_DATA = "config-map-data" + private val CUSTOM_JAVA_OPTION_KEY = "myappoption" + private val CUSTOM_JAVA_OPTION_VALUE = "myappoptionvalue" + private val DRIVER_JAVA_OPTIONS = s"-D$CUSTOM_JAVA_OPTION_KEY=$CUSTOM_JAVA_OPTION_VALUE" + private val DRIVER_EXTRA_CLASSPATH = "/var/data/spark-app-custom/custom-jar.jar" + private val INIT_CONTAINER_CONFIG_MAP = new ConfigMapBuilder() + .withNewMetadata() + .withName(CONFIG_MAP_NAME) + .endMetadata() + .addToData(CONFIG_MAP_KEY, CONFIG_MAP_DATA) + .build() + private val CUSTOM_DRIVER_IMAGE = "spark-custom-driver:latest" + private val DRIVER_MEMORY_MB = 512 + private val DRIVER_MEMORY_OVERHEAD_MB = 128 + private val SPARK_CONF = new SparkConf(true) + .set(DRIVER_DOCKER_IMAGE, CUSTOM_DRIVER_IMAGE) + .set(org.apache.spark.internal.config.DRIVER_MEMORY, DRIVER_MEMORY_MB.toLong) + .set(KUBERNETES_DRIVER_MEMORY_OVERHEAD, DRIVER_MEMORY_OVERHEAD_MB.toLong) + .set(KUBERNETES_DRIVER_LABELS, s"$CUSTOM_LABEL_KEY=$CUSTOM_LABEL_VALUE") + .set(KUBERNETES_DRIVER_ANNOTATIONS, s"$CUSTOM_ANNOTATION_KEY=$CUSTOM_ANNOTATION_VALUE") + .set(org.apache.spark.internal.config.DRIVER_CLASS_PATH, DRIVER_EXTRA_CLASSPATH) + .set(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS, DRIVER_JAVA_OPTIONS) + private val EXECUTOR_INIT_CONF_KEY = "executor-init-conf" + private val SPARK_CONF_WITH_EXECUTOR_INIT_CONF = SPARK_CONF.clone() + .set(EXECUTOR_INIT_CONF_KEY, TRUE) + private val DRIVER_POD_UID = "driver-pod-uid" + private val DRIVER_POD_KIND = "pod" + private val DRIVER_POD_API_VERSION = "v1" + @Mock + private var initContainerConfigMapBuilder: SparkInitContainerConfigMapBuilder = _ + @Mock + private var containerLocalizedFilesResolver: ContainerLocalizedFilesResolver = _ + @Mock + private var executorInitContainerConfiguration: ExecutorInitContainerConfiguration = _ + @Mock + private var submittedDependencyUploader: SubmittedDependencyUploader = _ + @Mock + private var submittedDependenciesSecretBuilder: SubmittedDependencySecretBuilder = _ + @Mock + private var initContainerBootstrap: SparkPodInitContainerBootstrap = _ + @Mock + private var initContainerComponentsProvider: DriverInitContainerComponentsProvider = _ + @Mock + private var kubernetesClientProvider: SubmissionKubernetesClientProvider = _ + @Mock + private var kubernetesClient: KubernetesClient = _ + @Mock + private var podOps: MixedOperation[Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]] = _ + private type ResourceListOps = NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable[ + HasMetadata, java.lang.Boolean] + @Mock + private var resourceListOps: ResourceListOps = _ before { - sparkConf = new SparkConf(true) - .set("spark.app.name", APP_NAME) - .set("spark.master", "k8s://https://localhost:443") - .set(DRIVER_DOCKER_IMAGE, DRIVER_DOCKER_IMAGE_VALUE) - .set(KUBERNETES_DRIVER_MEMORY_OVERHEAD, DRIVER_MEMORY_OVERHEARD_MB) - .set(KUBERNETES_NAMESPACE, NAMESPACE) - .set(org.apache.spark.internal.config.DRIVER_MEMORY, DRIVER_MEMORY_MB) - submissionKubernetesClientProvider = mock[SubmissionKubernetesClientProvider] - submissionKubernetesClient = mock[KubernetesClient] - podOperations = mock[PODS] - resourceListOperations = mock[RESOURCES] - mountedDependencyManagerProvider = mock[MountedDependencyManagerProvider] - mountedDependencyManager = mock[MountedDependencyManager] - when(submissionKubernetesClientProvider.get).thenReturn(submissionKubernetesClient) - when(submissionKubernetesClient.pods()).thenReturn(podOperations) - captureCreatedPodAnswer = new SelfArgumentCapturingAnswer[Pod] - captureCreatedResourcesAnswer = new AllArgumentsCapturingAnswer[HasMetadata, RESOURCES]( - resourceListOperations) - when(podOperations.create(any())).thenAnswer(captureCreatedPodAnswer) - when(submissionKubernetesClient.resourceList(anyVararg[HasMetadata])) - .thenAnswer(captureCreatedResourcesAnswer) - } - - // Tests w/o local dependencies, or behave independently to that configuration. - test("Simple properties and environment set on the driver pod.") { - sparkConf.set(org.apache.spark.internal.config.DRIVER_CLASS_PATH, DRIVER_EXTRA_CLASSPATH) - val createdDriverPod = createAndGetDriverPod() - val maybeDriverContainer = getDriverContainer(createdDriverPod) - maybeDriverContainer.foreach { driverContainer => - assert(driverContainer.getName === DRIVER_CONTAINER_NAME) - assert(driverContainer.getImage === DRIVER_DOCKER_IMAGE_VALUE) - assert(driverContainer.getImagePullPolicy === "IfNotPresent") - val envs = driverContainer.getEnv.asScala.map { env => - (env.getName, env.getValue) - }.toMap - assert(envs(ENV_DRIVER_MEMORY) === s"${DRIVER_MEMORY_MB + DRIVER_MEMORY_OVERHEARD_MB}m") - assert(envs(ENV_DRIVER_MAIN_CLASS) === MAIN_CLASS) - assert(envs(ENV_DRIVER_ARGS) === APP_ARGS.mkString(" ")) - assert(envs(ENV_SUBMIT_EXTRA_CLASSPATH) === DRIVER_EXTRA_CLASSPATH) - } - } - - test("Created pod should apply custom annotations and labels") { - sparkConf.set(KUBERNETES_DRIVER_LABELS, - "label1=label1value,label2=label2value") - sparkConf.set(KUBERNETES_DRIVER_ANNOTATIONS, - "annotation1=annotation1value,annotation2=annotation2value") - val createdDriverPod = createAndGetDriverPod() - val labels = createdDriverPod.getMetadata.getLabels.asScala - assert(labels.size === 4) - // App ID is non-deterministic, but just check if it's set and is prefixed with the app name - val appIdLabel = labels(SPARK_APP_ID_LABEL) - assert(appIdLabel != null && appIdLabel.startsWith(APP_NAME) && appIdLabel != APP_NAME) - val appNameLabel = labels(SPARK_APP_NAME_LABEL) - assert(appNameLabel != null && appNameLabel == APP_NAME) - assert(labels("label1") === "label1value") - assert(labels("label2") === "label2value") - val annotations = createdDriverPod.getMetadata.getAnnotations.asScala - val expectedAnnotations = Map( - "annotation1" -> "annotation1value", "annotation2" -> "annotation2value") - assert(annotations === expectedAnnotations) - } - - test("Driver JVM Options should be set in the environment.") { - sparkConf.set(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS, "-Dopt1=opt1value") - sparkConf.set("spark.logConf", "true") - val createdDriverPod = createAndGetDriverPod() - val maybeDriverContainer = getDriverContainer(createdDriverPod) - maybeDriverContainer.foreach { driverContainer => - val maybeJvmOptionsEnv = driverContainer.getEnv - .asScala - .find(_.getName == ENV_DRIVER_JAVA_OPTS) - assert(maybeJvmOptionsEnv.isDefined) - maybeJvmOptionsEnv.foreach { jvmOptionsEnv => - val jvmOptions = jvmOptionsEnv.getValue.split(" ") - jvmOptions.foreach { opt => assert(opt.startsWith("-D")) } - val optionKeyValues = jvmOptions.map { option => - val withoutDashDPrefix = option.stripPrefix("-D") - val split = withoutDashDPrefix.split('=') - assert(split.length == 2) - (split(0), split(1)) - }.toMap - assert(optionKeyValues("opt1") === "opt1value") - assert(optionKeyValues.contains("spark.app.id")) - assert(optionKeyValues("spark.jars") === MAIN_APP_RESOURCE) - assert(optionKeyValues(KUBERNETES_DRIVER_POD_NAME.key).startsWith(APP_NAME)) - assert(optionKeyValues("spark.app.name") === APP_NAME) - assert(optionKeyValues("spark.logConf") === "true") + MockitoAnnotations.initMocks(this) + when(initContainerComponentsProvider.provideInitContainerBootstrap()) + .thenReturn(initContainerBootstrap) + when(submittedDependencyUploader.uploadJars()).thenReturn(JARS_RESOURCE) + when(submittedDependencyUploader.uploadFiles()).thenReturn(FILES_RESOURCE) + when(initContainerBootstrap + .bootstrapInitContainerAndVolumes(mockitoEq(DRIVER_CONTAINER_NAME), any())) + .thenAnswer(new Answer[PodBuilder] { + override def answer(invocationOnMock: InvocationOnMock): PodBuilder = { + invocationOnMock.getArgumentAt(1, classOf[PodBuilder]).editMetadata() + .addToAnnotations(BOOTSTRAPPED_POD_ANNOTATION, TRUE) + .endMetadata() + } + }) + when(initContainerComponentsProvider.provideContainerLocalizedFilesResolver()) + .thenReturn(containerLocalizedFilesResolver) + when(initContainerComponentsProvider.provideExecutorInitContainerConfiguration()) + .thenReturn(executorInitContainerConfiguration) + when(submittedDependenciesSecretBuilder.build()) + .thenReturn(INIT_CONTAINER_SECRET) + when(initContainerConfigMapBuilder.build()) + .thenReturn(INIT_CONTAINER_CONFIG_MAP) + when(kubernetesClientProvider.get).thenReturn(kubernetesClient) + when(kubernetesClient.pods()).thenReturn(podOps) + when(podOps.create(any())).thenAnswer(new Answer[Pod] { + override def answer(invocation: InvocationOnMock): Pod = { + new PodBuilder(invocation.getArgumentAt(0, classOf[Pod])) + .editMetadata() + .withUid(DRIVER_POD_UID) + .endMetadata() + .withKind(DRIVER_POD_KIND) + .withApiVersion(DRIVER_POD_API_VERSION) + .build() } - } + }) + when(containerLocalizedFilesResolver.resolveSubmittedAndRemoteSparkJars()) + .thenReturn(RESOLVED_SPARK_REMOTE_AND_LOCAL_JARS) + when(containerLocalizedFilesResolver.resolveSubmittedSparkJars()) + .thenReturn(RESOLVED_SPARK_JARS) + when(containerLocalizedFilesResolver.resolveSubmittedSparkFiles()) + .thenReturn(RESOLVED_SPARK_FILES) + when(executorInitContainerConfiguration.configureSparkConfForExecutorInitContainer(SPARK_CONF)) + .thenReturn(SPARK_CONF_WITH_EXECUTOR_INIT_CONF) + when(kubernetesClient.resourceList(anyVararg[HasMetadata]())).thenReturn(resourceListOps) } - // Tests with local dependencies with the mounted dependency manager. - test("Uploading local dependencies should create Kubernetes secrets and config map") { - val initContainerConfigMap = getInitContainerConfigMap() - val initContainerSecret = getInitContainerSecret() - runWithMountedDependencies(initContainerConfigMap, initContainerSecret) - val driverPod = captureCreatedPodAnswer.capturedArgument - assert(captureCreatedResourcesAnswer.capturedArguments != null) - assert(captureCreatedResourcesAnswer.capturedArguments.size === 2) - assert(captureCreatedResourcesAnswer.capturedArguments.toSet === - Set(initContainerSecret, initContainerConfigMap)) - captureCreatedResourcesAnswer.capturedArguments.foreach { resource => - val driverPodOwnerReferences = resource.getMetadata.getOwnerReferences - assert(driverPodOwnerReferences.size === 1) - val driverPodOwnerReference = driverPodOwnerReferences.asScala.head - assert(driverPodOwnerReference.getName === driverPod.getMetadata.getName) - assert(driverPodOwnerReference.getApiVersion === driverPod.getApiVersion) - assert(driverPodOwnerReference.getUid === driverPod.getMetadata.getUid) - assert(driverPodOwnerReference.getKind === driverPod.getKind) - assert(driverPodOwnerReference.getController) - } + test("Run with dependency uploader") { + when(initContainerComponentsProvider + .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) + .thenReturn(Some(submittedDependencyUploader)) + when(initContainerComponentsProvider + .provideSubmittedDependenciesSecretBuilder(Some(SUBMITTED_RESOURCES.secrets()))) + .thenReturn(Some(submittedDependenciesSecretBuilder)) + when(initContainerComponentsProvider + .provideInitContainerConfigMapBuilder(Some(SUBMITTED_RESOURCES.ids()))) + .thenReturn(initContainerConfigMapBuilder) + runAndVerifyDriverPodHasCorrectProperties() + val resourceListArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata]) + verify(kubernetesClient).resourceList(resourceListArgumentCaptor.capture()) + val createdResources = resourceListArgumentCaptor.getAllValues.asScala + assert(createdResources.size === 2) + verifyCreatedResourcesHaveOwnerReferences(createdResources) + assert(createdResources.exists { + case secret: Secret => + val expectedSecretData = Map(SECRET_KEY -> SECRET_DATA) + secret.getMetadata.getName == SECRET_NAME && secret.getData.asScala == expectedSecretData + case _ => false + }) + verifyConfigMapWasCreated(createdResources) + verify(submittedDependencyUploader).uploadJars() + verify(submittedDependencyUploader).uploadFiles() + verify(initContainerComponentsProvider) + .provideInitContainerConfigMapBuilder(Some(SUBMITTED_RESOURCES.ids())) + verify(initContainerComponentsProvider) + .provideSubmittedDependenciesSecretBuilder(Some(SUBMITTED_RESOURCES.secrets())) } - test("Uploading local resources should set classpath environment variables") { - val initContainerConfigMap = getInitContainerConfigMap() - val initContainerSecret = getInitContainerSecret() - runWithMountedDependencies(initContainerConfigMap, initContainerSecret) - val driverPod = captureCreatedPodAnswer.capturedArgument - val maybeDriverContainer = getDriverContainer(driverPod) - maybeDriverContainer.foreach { driverContainer => - val envs = driverContainer.getEnv - .asScala - .map { env => (env.getName, env.getValue) } - .toMap - val classPathEntries = envs(ENV_MOUNTED_CLASSPATH).split(File.pathSeparator).toSet - val expectedClassPathEntries = RESOLVED_SPARK_JARS - .map(Utils.resolveURI) - .map(_.getPath) - .toSet - assert(classPathEntries === expectedClassPathEntries) - } + test("Run without dependency uploader") { + when(initContainerComponentsProvider + .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) + .thenReturn(None) + when(initContainerComponentsProvider + .provideSubmittedDependenciesSecretBuilder(None)) + .thenReturn(None) + when(initContainerComponentsProvider + .provideInitContainerConfigMapBuilder(None)) + .thenReturn(initContainerConfigMapBuilder) + runAndVerifyDriverPodHasCorrectProperties() + val resourceListArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata]) + verify(kubernetesClient).resourceList(resourceListArgumentCaptor.capture()) + val createdResources = resourceListArgumentCaptor.getAllValues.asScala + assert(createdResources.size === 1) + verifyCreatedResourcesHaveOwnerReferences(createdResources) + verifyConfigMapWasCreated(createdResources) + verify(submittedDependencyUploader, times(0)).uploadJars() + verify(submittedDependencyUploader, times(0)).uploadFiles() + verify(initContainerComponentsProvider) + .provideInitContainerConfigMapBuilder(None) + verify(initContainerComponentsProvider) + .provideSubmittedDependenciesSecretBuilder(None) } - private def getInitContainerSecret(): Secret = { - new SecretBuilder() - .withNewMetadata().withName(s"$APP_NAME-init-container-secret").endMetadata() - .addToData( - INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY, DOWNLOAD_JARS_RESOURCE_IDENTIFIER.resourceSecret) - .addToData(INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY, - DOWNLOAD_FILES_RESOURCE_IDENTIFIER.resourceSecret) - .build() + private def verifyCreatedResourcesHaveOwnerReferences( + createdResources: mutable.Buffer[HasMetadata]): Unit = { + assert(createdResources.forall { resource => + val owners = resource.getMetadata.getOwnerReferences.asScala + owners.size === 1 && + owners.head.getController && + owners.head.getKind == DRIVER_POD_KIND && + owners.head.getUid == DRIVER_POD_UID && + owners.head.getName == APP_ID && + owners.head.getApiVersion == DRIVER_POD_API_VERSION + }) } - private def getInitContainerConfigMap(): ConfigMap = { - new ConfigMapBuilder() - .withNewMetadata().withName(s"$APP_NAME-init-container-conf").endMetadata() - .addToData("key", "configuration") - .build() + private def verifyConfigMapWasCreated(createdResources: mutable.Buffer[HasMetadata]): Unit = { + assert(createdResources.exists { + case configMap: ConfigMap => + val expectedConfigMapData = Map(CONFIG_MAP_KEY -> CONFIG_MAP_DATA) + configMap.getMetadata.getName == CONFIG_MAP_NAME && + configMap.getData.asScala == expectedConfigMapData + case _ => false + }) } - private def runWithMountedDependencies( - initContainerConfigMap: ConfigMap, initContainerSecret: Secret): Unit = { - sparkConf.set(RESOURCE_STAGING_SERVER_URI, STAGING_SERVER_URI) - .setJars(SPARK_JARS) - .set("spark.files", SPARK_FILES.mkString(",")) - val labelsMatcher = new BaseMatcher[Map[String, String]] { - override def matches(maybeLabels: scala.Any) = { - maybeLabels match { - case labels: Map[String, String] => - labels(SPARK_APP_ID_LABEL).startsWith(APP_NAME) && - labels(SPARK_APP_NAME_LABEL) == APP_NAME - case _ => false + private def runAndVerifyDriverPodHasCorrectProperties(): Unit = { + new Client( + APP_NAME, + APP_ID, + MAIN_CLASS, + SPARK_CONF, + APP_ARGS, + SPARK_JARS, + SPARK_FILES, + kubernetesClientProvider, + initContainerComponentsProvider).run() + val podMatcher = new BaseMatcher[Pod] { + override def matches(o: scala.Any): Boolean = { + o match { + case p: Pod => + Option(p) + .filter(_.getMetadata.getName == APP_ID) + .filter(podHasCorrectAnnotations) + .filter(_.getMetadata.getLabels.asScala == ALL_EXPECTED_LABELS) + .filter(containerHasCorrectBasicContainerConfiguration) + .filter(containerHasCorrectBasicEnvs) + .filter(containerHasCorrectMountedClasspath) + .exists(containerHasCorrectJvmOptions) + case _ => + false } } - override def describeTo(description: Description) = { - description.appendText("Checks if the labels contain the app ID and app name.") - } + override def describeTo(description: Description): Unit = {} } - when(mountedDependencyManagerProvider.getMountedDependencyManager( - startsWith(APP_NAME), - mockitoEq(STAGING_SERVER_URI), - argThat(labelsMatcher), - mockitoEq(NAMESPACE), - mockitoEq(SPARK_JARS ++ Seq(MAIN_APP_RESOURCE)), - mockitoEq(SPARK_FILES))).thenReturn(mountedDependencyManager) - when(mountedDependencyManager.uploadJars()).thenReturn(DOWNLOAD_JARS_RESOURCE_IDENTIFIER) - when(mountedDependencyManager.uploadFiles()).thenReturn(DOWNLOAD_FILES_RESOURCE_IDENTIFIER) - when(mountedDependencyManager.buildInitContainerSecret( - DOWNLOAD_JARS_RESOURCE_IDENTIFIER.resourceSecret, - DOWNLOAD_FILES_RESOURCE_IDENTIFIER.resourceSecret)) - .thenReturn(initContainerSecret) - when(mountedDependencyManager.buildInitContainerConfigMap( - DOWNLOAD_JARS_RESOURCE_IDENTIFIER.resourceId, DOWNLOAD_FILES_RESOURCE_IDENTIFIER.resourceId)) - .thenReturn(initContainerConfigMap) - when(mountedDependencyManager.resolveSparkJars()).thenReturn(RESOLVED_SPARK_JARS) - when(mountedDependencyManager.resolveSparkFiles()).thenReturn(RESOLVED_SPARK_FILES) - when(mountedDependencyManager.configurePodToMountLocalDependencies( - mockitoEq(DRIVER_CONTAINER_NAME), - mockitoEq(initContainerSecret), - mockitoEq(initContainerConfigMap), - any())).thenAnswer(new Answer[PodBuilder] { - override def answer(invocationOnMock: InvocationOnMock): PodBuilder = { - val basePod = invocationOnMock.getArgumentAt(3, classOf[PodBuilder]) - basePod.editMetadata().addToAnnotations(MOUNTED_FILES_ANNOTATION_KEY, "true").endMetadata() - } - }) - val clientUnderTest = createClient() - clientUnderTest.run() + verify(podOps).create(argThat(podMatcher)) } - private def getDriverContainer(driverPod: Pod): Option[Container] = { - val maybeDriverContainer = driverPod.getSpec - .getContainers - .asScala - .find(_.getName == DRIVER_CONTAINER_NAME) - assert(maybeDriverContainer.isDefined) - maybeDriverContainer + private def containerHasCorrectJvmOptions(pod: Pod): Boolean = { + val driverContainer = pod.getSpec.getContainers.asScala.head + val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)) + envs.toMap.get(ENV_DRIVER_JAVA_OPTS).exists { javaOptions => + val splitOptions = javaOptions.split(" ") + val expectedOptions = SPARK_CONF.getAll + .filterNot(_._1 == org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS.key) + .toMap ++ + Map( + "spark.app.id" -> APP_ID, + KUBERNETES_DRIVER_POD_NAME.key -> APP_ID, + EXECUTOR_INIT_CONF_KEY -> TRUE, + CUSTOM_JAVA_OPTION_KEY -> CUSTOM_JAVA_OPTION_VALUE, + "spark.jars" -> RESOLVED_SPARK_JARS.mkString(","), + "spark.files" -> RESOLVED_SPARK_FILES.mkString(",")) + splitOptions.forall(_.startsWith("-D")) && + splitOptions.map { option => + val withoutPrefix = option.substring(2) + (withoutPrefix.split("=", 2)(0), withoutPrefix.split("=", 2)(1)) + }.toMap == expectedOptions + } } - private def createAndGetDriverPod(): Pod = { - val clientUnderTest = createClient() - clientUnderTest.run() - val createdDriverPod = captureCreatedPodAnswer.capturedArgument - assert(createdDriverPod != null) - createdDriverPod + private def containerHasCorrectMountedClasspath(pod: Pod): Boolean = { + val driverContainer = pod.getSpec.getContainers.asScala.head + val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)) + envs.toMap.get(ENV_MOUNTED_CLASSPATH).exists { classpath => + val mountedClasspathEntities = classpath.split(File.pathSeparator) + mountedClasspathEntities.toSet == RESOLVED_SPARK_REMOTE_AND_LOCAL_JARS.toSet + } } - private def createClient(): Client = { - new Client( - MAIN_CLASS, - sparkConf, - APP_ARGS, - MAIN_APP_RESOURCE, - submissionKubernetesClientProvider, - mountedDependencyManagerProvider) + private def containerHasCorrectBasicEnvs(pod: Pod): Boolean = { + val driverContainer = pod.getSpec.getContainers.asScala.head + val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)) + val expectedBasicEnvs = Map( + ENV_SUBMIT_EXTRA_CLASSPATH -> DRIVER_EXTRA_CLASSPATH, + ENV_DRIVER_MEMORY -> s"${DRIVER_MEMORY_MB + DRIVER_MEMORY_OVERHEAD_MB}m", + ENV_DRIVER_MAIN_CLASS -> MAIN_CLASS, + ENV_DRIVER_ARGS -> APP_ARGS.mkString(" ")) + expectedBasicEnvs.toSet.subsetOf(envs.toSet) } - private class SelfArgumentCapturingAnswer[T: ClassTag] extends Answer[T] { - var capturedArgument: T = _ - - override def answer(invocationOnMock: InvocationOnMock): T = { - val argumentClass = implicitly[ClassTag[T]].runtimeClass.asInstanceOf[Class[T]] - val argument = invocationOnMock.getArgumentAt(0, argumentClass) - this.capturedArgument = argument - argument - } + private def containerHasCorrectBasicContainerConfiguration(pod: Pod): Boolean = { + val containers = pod.getSpec.getContainers.asScala + containers.size == 1 && + containers.head.getName == DRIVER_CONTAINER_NAME && + containers.head.getImage == CUSTOM_DRIVER_IMAGE && + containers.head.getImagePullPolicy == "IfNotPresent" } - private class AllArgumentsCapturingAnswer[I, T](returnValue: T) extends Answer[T] { - var capturedArguments: Seq[I] = _ - - override def answer(invocationOnMock: InvocationOnMock): T = { - capturedArguments = invocationOnMock.getArguments.map(_.asInstanceOf[I]).toSeq - returnValue - } + private def podHasCorrectAnnotations(pod: Pod): Boolean = { + val expectedAnnotations = Map( + CUSTOM_ANNOTATION_KEY -> CUSTOM_ANNOTATION_VALUE, + BOOTSTRAPPED_POD_ANNOTATION -> TRUE) + pod.getMetadata.getAnnotations.asScala == expectedAnnotations } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolverSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolverSuite.scala new file mode 100644 index 0000000000000..6804f0010b6a5 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolverSuite.scala @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import org.apache.spark.SparkFunSuite + +class ContainerLocalizedFilesResolverSuite extends SparkFunSuite { + private val SPARK_JARS = Seq( + "hdfs://localhost:9000/app/jars/jar1.jar", + "file:///app/jars/jar2.jar", + "local:///app/jars/jar3.jar", + "http://app/jars/jar4.jar") + private val SPARK_FILES = Seq( + "hdfs://localhost:9000/app/files/file1.txt", + "file:///app/files/file2.txt", + "local:///app/files/file3.txt", + "http://app/files/file4.txt") + private val JARS_DOWNLOAD_PATH = "/var/data/spark-jars" + private val FILES_DOWNLOAD_PATH = "/var/data/spark-files" + private val localizedFilesResolver = new ContainerLocalizedFilesResolverImpl( + SPARK_JARS, + SPARK_FILES, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH) + + test("Submitted and remote Spark jars should resolve non-local uris to download path.") { + val resolvedJars = localizedFilesResolver.resolveSubmittedAndRemoteSparkJars() + val expectedResolvedJars = Seq( + s"$JARS_DOWNLOAD_PATH/jar1.jar", + s"$JARS_DOWNLOAD_PATH/jar2.jar", + "/app/jars/jar3.jar", + s"$JARS_DOWNLOAD_PATH/jar4.jar") + assert(resolvedJars === expectedResolvedJars) + } + + test("Submitted Spark jars should resolve to the download path.") { + val resolvedJars = localizedFilesResolver.resolveSubmittedSparkJars() + val expectedResolvedJars = Seq( + "hdfs://localhost:9000/app/jars/jar1.jar", + s"$JARS_DOWNLOAD_PATH/jar2.jar", + "local:///app/jars/jar3.jar", + "http://app/jars/jar4.jar") + assert(resolvedJars === expectedResolvedJars) + } + + test("Submitted Spark files should resolve to the download path.") { + val resolvedFiles = localizedFilesResolver.resolveSubmittedSparkFiles() + val expectedResolvedFiles = Seq( + "hdfs://localhost:9000/app/files/file1.txt", + s"$FILES_DOWNLOAD_PATH/file2.txt", + "local:///app/files/file3.txt", + "http://app/files/file4.txt") + assert(resolvedFiles === expectedResolvedFiles) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfigurationSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfigurationSuite.scala new file mode 100644 index 0000000000000..62bfd127d17e2 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfigurationSuite.scala @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.config._ + +class ExecutorInitContainerConfigurationSuite extends SparkFunSuite { + + private val SECRET_NAME = "init-container-secret" + private val SECRET_MOUNT_DIR = "/mnt/secrets/spark" + private val CONFIG_MAP_NAME = "spark-config-map" + private val CONFIG_MAP_KEY = "spark-config-map-key" + + test("Not passing a secret name should not set the secret value.") { + val baseSparkConf = new SparkConf(false) + val configurationUnderTest = new ExecutorInitContainerConfigurationImpl( + None, + SECRET_MOUNT_DIR, + CONFIG_MAP_NAME, + CONFIG_MAP_KEY) + val resolvedSparkConf = configurationUnderTest + .configureSparkConfForExecutorInitContainer(baseSparkConf) + assert(resolvedSparkConf.get(EXECUTOR_INIT_CONTAINER_CONFIG_MAP).contains(CONFIG_MAP_NAME)) + assert(resolvedSparkConf.get(EXECUTOR_INIT_CONTAINER_CONFIG_MAP_KEY).contains(CONFIG_MAP_KEY)) + assert(resolvedSparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET_MOUNT_DIR) + .contains(SECRET_MOUNT_DIR)) + assert(resolvedSparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET).isEmpty) + } + + test("Passing a secret name should set the secret value.") { + val baseSparkConf = new SparkConf(false) + val configurationUnderTest = new ExecutorInitContainerConfigurationImpl( + Some(SECRET_NAME), + SECRET_MOUNT_DIR, + CONFIG_MAP_NAME, + CONFIG_MAP_KEY) + val resolvedSparkConf = configurationUnderTest + .configureSparkConfForExecutorInitContainer(baseSparkConf) + assert(resolvedSparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET).contains(SECRET_NAME)) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerSuite.scala deleted file mode 100644 index 321fe1b3fd889..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerSuite.scala +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.deploy.kubernetes.submit.v2 - -import java.io.{ByteArrayOutputStream, File, StringReader} -import java.util.{Properties, UUID} - -import com.fasterxml.jackson.databind.ObjectMapper -import com.fasterxml.jackson.module.scala.DefaultScalaModule -import com.google.common.base.Charsets -import com.google.common.io.{BaseEncoding, Files} -import io.fabric8.kubernetes.api.model.{ConfigMapBuilder, Container, Pod, PodBuilder, SecretBuilder} -import okhttp3.RequestBody -import okio.Okio -import org.mockito.invocation.InvocationOnMock -import org.mockito.stubbing.Answer -import org.mockito.Matchers.any -import org.mockito.Mockito -import org.scalatest.BeforeAndAfter -import org.scalatest.mock.MockitoSugar._ -import retrofit2.{Call, Response} -import scala.collection.JavaConverters._ - -import org.apache.spark.{SparkFunSuite, SSLOptions} -import org.apache.spark.deploy.kubernetes.CompressionUtils -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.kubernetes.v2.{ResourceStagingServiceRetrofit, RetrofitClientFactory, StagedResourceIdentifier} -import org.apache.spark.util.Utils - -private[spark] class MountedDependencyManagerSuite extends SparkFunSuite with BeforeAndAfter { - import MountedDependencyManagerSuite.createTempFile - - private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) - private val APP_ID = "app-id" - private val LABELS = Map("label1" -> "label1value", "label2" -> "label2value") - private val NAMESPACE = "namespace" - private val STAGING_SERVER_URI = "http://localhost:8000" - private val INIT_CONTAINER_IMAGE = "spark-driver-init:latest" - private val JARS_DOWNLOAD_PATH = DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION.defaultValue.get - private val FILES_DOWNLOAD_PATH = DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION.defaultValue.get - private val DOWNLOAD_TIMEOUT_MINUTES = 5 - private val LOCAL_JARS = Seq(createTempFile("jar"), createTempFile("jar")) - private val JARS = Seq("hdfs://localhost:9000/jars/jar1.jar", - s"file://${LOCAL_JARS.head}", - LOCAL_JARS(1)) - private val LOCAL_FILES = Seq(createTempFile("txt")) - private val FILES = Seq("hdfs://localhost:9000/files/file1.txt", - LOCAL_FILES.head) - private val TRUSTSTORE_FILE = new File(createTempFile(".jks")) - private val TRUSTSTORE_PASSWORD = "trustStorePassword" - private val TRUSTSTORE_TYPE = "jks" - private val STAGING_SERVER_SSL_OPTIONS = SSLOptions( - enabled = true, - trustStore = Some(TRUSTSTORE_FILE), - trustStorePassword = Some(TRUSTSTORE_PASSWORD), - trustStoreType = Some(TRUSTSTORE_TYPE)) - private val JARS_RESOURCE_ID = "jarsId" - private val JARS_SECRET = "jarsSecret" - private val FILES_RESOURCE_ID = "filesId" - private val FILES_SECRET = "filesSecret" - private var retrofitClientFactory: RetrofitClientFactory = _ - private var retrofitClient: ResourceStagingServiceRetrofit = _ - - private var dependencyManagerUnderTest: MountedDependencyManager = _ - - before { - retrofitClientFactory = mock[RetrofitClientFactory] - retrofitClient = mock[ResourceStagingServiceRetrofit] - Mockito.when( - retrofitClientFactory.createRetrofitClient( - STAGING_SERVER_URI, classOf[ResourceStagingServiceRetrofit], STAGING_SERVER_SSL_OPTIONS)) - .thenReturn(retrofitClient) - dependencyManagerUnderTest = new MountedDependencyManagerImpl( - APP_ID, - LABELS, - NAMESPACE, - STAGING_SERVER_URI, - INIT_CONTAINER_IMAGE, - JARS_DOWNLOAD_PATH, - FILES_DOWNLOAD_PATH, - DOWNLOAD_TIMEOUT_MINUTES, - JARS, - FILES, - STAGING_SERVER_SSL_OPTIONS, - retrofitClientFactory) - } - - test("Uploading jars should contact the staging server with the appropriate parameters") { - val capturingArgumentsAnswer = new UploadDependenciesArgumentsCapturingAnswer( - StagedResourceIdentifier("resourceId", "resourceSecret")) - Mockito.when(retrofitClient.uploadResources(any(), any(), any(), any())) - .thenAnswer(capturingArgumentsAnswer) - dependencyManagerUnderTest.uploadJars() - testUploadSendsCorrectFiles(LOCAL_JARS, capturingArgumentsAnswer) - } - - test("Uploading files should contact the staging server with the appropriate parameters") { - val capturingArgumentsAnswer = new UploadDependenciesArgumentsCapturingAnswer( - StagedResourceIdentifier("resourceId", "resourceSecret")) - Mockito.when(retrofitClient.uploadResources(any(), any(), any(), any())) - .thenAnswer(capturingArgumentsAnswer) - dependencyManagerUnderTest.uploadFiles() - testUploadSendsCorrectFiles(LOCAL_FILES, capturingArgumentsAnswer) - } - - test("Init container secret should contain jars, files, and trustStore") { - val jarsSecretBase64 = BaseEncoding.base64().encode(JARS_SECRET.getBytes(Charsets.UTF_8)) - val filesSecretBase64 = BaseEncoding.base64().encode(FILES_SECRET.getBytes(Charsets.UTF_8)) - val trustStoreBase64 = BaseEncoding.base64().encode(Files.toByteArray(TRUSTSTORE_FILE)) - val secret = dependencyManagerUnderTest.buildInitContainerSecret("jarsSecret", "filesSecret") - assert(secret.getMetadata.getName === s"$APP_ID-spark-init") - val expectedSecrets = Map( - INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY -> jarsSecretBase64, - INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY -> filesSecretBase64, - INIT_CONTAINER_TRUSTSTORE_SECRET_KEY -> trustStoreBase64) - assert(secret.getData.asScala === expectedSecrets) - } - - test("Init container config map should contain parameters for downloading from staging server") { - val configMap = dependencyManagerUnderTest.buildInitContainerConfigMap( - JARS_RESOURCE_ID, FILES_RESOURCE_ID) - assert(configMap.getMetadata.getName === s"$APP_ID-init-properties") - val propertiesRawString = configMap.getData.get(INIT_CONTAINER_CONFIG_MAP_KEY) - assert(propertiesRawString != null) - val propertiesReader = new StringReader(propertiesRawString) - val properties = new Properties() - properties.load(propertiesReader) - val propertiesMap = properties.stringPropertyNames().asScala.map { prop => - (prop, properties.getProperty(prop)) - }.toMap - val expectedProperties = Map[String, String]( - RESOURCE_STAGING_SERVER_URI.key -> STAGING_SERVER_URI, - DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION.key -> JARS_DOWNLOAD_PATH, - DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION.key -> FILES_DOWNLOAD_PATH, - INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> JARS_RESOURCE_ID, - INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> - INIT_CONTAINER_DOWNLOAD_JARS_SECRET_PATH, - INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> FILES_RESOURCE_ID, - INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> - INIT_CONTAINER_DOWNLOAD_FILES_SECRET_PATH, - DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT.key -> s"${DOWNLOAD_TIMEOUT_MINUTES}m", - RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key -> INIT_CONTAINER_TRUSTSTORE_PATH, - RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> "true", - RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key -> TRUSTSTORE_PASSWORD, - RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key -> TRUSTSTORE_TYPE) - assert(propertiesMap === expectedProperties) - } - - test("Resolving jars should map local paths to their mounted counterparts") { - val resolvedJars = dependencyManagerUnderTest.resolveSparkJars() - val expectedResolvedJars = Seq( - "hdfs://localhost:9000/jars/jar1.jar", - s"file://$JARS_DOWNLOAD_PATH/${new File(JARS(1)).getName}", - s"file://$JARS_DOWNLOAD_PATH/${new File(JARS(2)).getName}") - assert(resolvedJars === expectedResolvedJars) - } - - test("Resolving files should map local paths to their mounted counterparts") { - val resolvedFiles = dependencyManagerUnderTest.resolveSparkFiles() - val expectedResolvedFiles = Seq( - "hdfs://localhost:9000/files/file1.txt", - s"file://$FILES_DOWNLOAD_PATH/${new File(FILES(1)).getName}") - assert(resolvedFiles === expectedResolvedFiles) - } - - test("Downloading init container should be added to pod") { - val driverPod = configureDriverPod() - val podAnnotations = driverPod.getMetadata.getAnnotations - assert(podAnnotations.size === 1) - val initContainerRawAnnotation = podAnnotations.get(INIT_CONTAINER_ANNOTATION) - val initContainers = OBJECT_MAPPER.readValue( - initContainerRawAnnotation, classOf[Array[Container]]) - assert(initContainers.size === 1) - val initContainer = initContainers.head - assert(initContainer.getName === "spark-driver-init") - assert(initContainer.getImage === INIT_CONTAINER_IMAGE) - assert(initContainer.getImagePullPolicy === "IfNotPresent") - val volumeMounts = initContainer.getVolumeMounts - .asScala - .map(mount => (mount.getName, mount.getMountPath)) - .toMap - val expectedVolumeMounts = Map[String, String]( - DOWNLOAD_JARS_VOLUME_NAME -> JARS_DOWNLOAD_PATH, - DOWNLOAD_FILES_VOLUME_NAME -> FILES_DOWNLOAD_PATH, - INIT_CONTAINER_PROPERTIES_FILE_VOLUME -> INIT_CONTAINER_PROPERTIES_FILE_MOUNT_PATH, - INIT_CONTAINER_SECRETS_VOLUME_NAME -> INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH) - assert(volumeMounts === expectedVolumeMounts) - } - - test("Driver pod should have added volumes and volume mounts for file downloads") { - val driverPod = configureDriverPod() - val volumes = driverPod.getSpec.getVolumes.asScala.map(volume => (volume.getName, volume)).toMap - val initContainerPropertiesVolume = volumes(INIT_CONTAINER_PROPERTIES_FILE_VOLUME).getConfigMap - assert(initContainerPropertiesVolume != null) - assert(initContainerPropertiesVolume.getName === "config") - assert(initContainerPropertiesVolume.getItems.asScala.exists { keyToPath => - keyToPath.getKey == INIT_CONTAINER_CONFIG_MAP_KEY && - keyToPath.getPath == INIT_CONTAINER_PROPERTIES_FILE_NAME - }) - val jarsVolume = volumes(DOWNLOAD_JARS_VOLUME_NAME) - assert(jarsVolume.getEmptyDir != null) - val filesVolume = volumes(DOWNLOAD_FILES_VOLUME_NAME) - assert(filesVolume.getEmptyDir != null) - val initContainerSecretVolume = volumes(INIT_CONTAINER_SECRETS_VOLUME_NAME) - assert(initContainerSecretVolume.getSecret != null) - assert(initContainerSecretVolume.getSecret.getSecretName === "secret") - val driverContainer = driverPod.getSpec - .getContainers - .asScala - .find(_.getName == "driver-container").get - val driverContainerVolumeMounts = driverContainer.getVolumeMounts - .asScala - .map(mount => (mount.getName, mount.getMountPath)) - .toMap - val expectedVolumeMountNamesAndPaths = Map[String, String]( - DOWNLOAD_JARS_VOLUME_NAME -> JARS_DOWNLOAD_PATH, - DOWNLOAD_FILES_VOLUME_NAME -> FILES_DOWNLOAD_PATH) - assert(driverContainerVolumeMounts === expectedVolumeMountNamesAndPaths) - val envs = driverContainer.getEnv - assert(envs.size() === 1) - assert(envs.asScala.head.getName === ENV_UPLOADED_JARS_DIR) - assert(envs.asScala.head.getValue === JARS_DOWNLOAD_PATH) - } - - private def configureDriverPod(): Pod = { - val initContainerSecret = new SecretBuilder() - .withNewMetadata().withName("secret").endMetadata() - .addToData("datakey", "datavalue") - .build() - val initContainerConfigMap = new ConfigMapBuilder() - .withNewMetadata().withName("config").endMetadata() - .addToData("datakey", "datavalue") - .build() - val basePod = new PodBuilder() - .withNewMetadata() - .withName("driver-pod") - .endMetadata() - .withNewSpec() - .addNewContainer() - .withName("driver-container") - .withImage("spark-driver:latest") - .endContainer() - .endSpec() - val adjustedPod = dependencyManagerUnderTest.configurePodToMountLocalDependencies( - "driver-container", - initContainerSecret, - initContainerConfigMap, - basePod).build() - adjustedPod - } - - private def testUploadSendsCorrectFiles( - expectedFiles: Seq[String], - capturingArgumentsAnswer: UploadDependenciesArgumentsCapturingAnswer) = { - val requestLabelsBytes = requestBodyBytes(capturingArgumentsAnswer.podLabelsArg) - val requestLabelsString = new String(requestLabelsBytes, Charsets.UTF_8) - val requestLabelsMap = OBJECT_MAPPER.readValue( - requestLabelsString, classOf[Map[String, String]]) - assert(requestLabelsMap === LABELS) - val requestNamespaceBytes = requestBodyBytes(capturingArgumentsAnswer.podNamespaceArg) - val requestNamespaceString = new String(requestNamespaceBytes, Charsets.UTF_8) - assert(requestNamespaceString === NAMESPACE) - val localJarsTarStream = new ByteArrayOutputStream() - CompressionUtils.writeTarGzipToStream(localJarsTarStream, expectedFiles) - val requestResourceBytes = requestBodyBytes(capturingArgumentsAnswer.podResourcesArg) - assert(requestResourceBytes.sameElements(localJarsTarStream.toByteArray)) - } - - private def requestBodyBytes(requestBody: RequestBody): Array[Byte] = { - Utils.tryWithResource(new ByteArrayOutputStream()) { outputStream => - Utils.tryWithResource(Okio.sink(outputStream)) { sink => - Utils.tryWithResource(Okio.buffer(sink)) { bufferedSink => - requestBody.writeTo(bufferedSink) - } - } - outputStream.toByteArray - } - } -} - -private class UploadDependenciesArgumentsCapturingAnswer(returnValue: StagedResourceIdentifier) - extends Answer[Call[StagedResourceIdentifier]] { - - var podLabelsArg: RequestBody = _ - var podNamespaceArg: RequestBody = _ - var podResourcesArg: RequestBody = _ - var kubernetesCredentialsArg: RequestBody = _ - - override def answer(invocationOnMock: InvocationOnMock): Call[StagedResourceIdentifier] = { - podLabelsArg = invocationOnMock.getArgumentAt(0, classOf[RequestBody]) - podNamespaceArg = invocationOnMock.getArgumentAt(1, classOf[RequestBody]) - podResourcesArg = invocationOnMock.getArgumentAt(2, classOf[RequestBody]) - kubernetesCredentialsArg = invocationOnMock.getArgumentAt(3, classOf[RequestBody]) - val responseCall = mock[Call[StagedResourceIdentifier]] - Mockito.when(responseCall.execute()).thenReturn(Response.success(returnValue)) - responseCall - } -} - -private object MountedDependencyManagerSuite { - def createTempFile(extension: String): String = { - val dir = Utils.createTempDir() - val file = new File(dir, s"${UUID.randomUUID().toString}.$extension") - Files.write(UUID.randomUUID().toString, file, Charsets.UTF_8) - file.getAbsolutePath - } -} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilderSuite.scala new file mode 100644 index 0000000000000..7c6fbf5ce6da2 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilderSuite.scala @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.StringReader +import java.util.Properties + +import com.google.common.collect.Maps +import org.mockito.Mockito.{verify, when} +import org.scalatest.BeforeAndAfter +import org.scalatest.mock.MockitoSugar._ +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkFunSuite +import org.apache.spark.deploy.kubernetes.config._ + +class SparkInitContainerConfigMapBuilderSuite extends SparkFunSuite with BeforeAndAfter { + + private val JARS = Seq( + "hdfs://localhost:9000/app/jars/jar1.jar", + "file:///app/jars/jar2.jar", + "http://localhost:9000/app/jars/jar3.jar", + "local:///app/jars/jar4.jar") + private val FILES = Seq( + "hdfs://localhost:9000/app/files/file1.txt", + "file:///app/files/file2.txt", + "http://localhost:9000/app/files/file3.txt", + "local:///app/files/file4.txt") + private val JARS_DOWNLOAD_PATH = "/var/data/jars" + private val FILES_DOWNLOAD_PATH = "/var/data/files" + private val CONFIG_MAP_NAME = "config-map" + private val CONFIG_MAP_KEY = "config-map-key" + + test("Config map without submitted dependencies sets remote download configurations") { + val configMap = new SparkInitContainerConfigMapBuilderImpl( + JARS, + FILES, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH, + CONFIG_MAP_NAME, + CONFIG_MAP_KEY, + None).build() + assert(configMap.getMetadata.getName === CONFIG_MAP_NAME) + val maybeConfigValue = configMap.getData.asScala.get(CONFIG_MAP_KEY) + assert(maybeConfigValue.isDefined) + maybeConfigValue.foreach { configValue => + val propertiesStringReader = new StringReader(configValue) + val properties = new Properties() + properties.load(propertiesStringReader) + val propertiesMap = Maps.fromProperties(properties).asScala + val remoteJarsString = propertiesMap.get(INIT_CONTAINER_REMOTE_JARS.key) + assert(remoteJarsString.isDefined) + val remoteJars = remoteJarsString.map(_.split(",")).toSet.flatten + assert(remoteJars === + Set("hdfs://localhost:9000/app/jars/jar1.jar", "http://localhost:9000/app/jars/jar3.jar")) + val remoteFilesString = propertiesMap.get(INIT_CONTAINER_REMOTE_FILES.key) + assert(remoteFilesString.isDefined) + val remoteFiles = remoteFilesString.map(_.split(",")).toSet.flatten + assert(remoteFiles === + Set("hdfs://localhost:9000/app/files/file1.txt", + "http://localhost:9000/app/files/file3.txt")) + assert(propertiesMap(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION.key) === JARS_DOWNLOAD_PATH) + assert(propertiesMap(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION.key) === FILES_DOWNLOAD_PATH) + } + } + + test("Config map with submitted dependencies adds configurations from plugin") { + val submittedDependenciesPlugin = mock[SubmittedDependencyInitContainerConfigPlugin] + when(submittedDependenciesPlugin.configurationsToFetchSubmittedDependencies()) + .thenReturn(Map("customConf" -> "customConfValue")) + val configMap = new SparkInitContainerConfigMapBuilderImpl( + JARS, + FILES, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH, + CONFIG_MAP_NAME, + CONFIG_MAP_KEY, + Some(submittedDependenciesPlugin)).build() + val configValue = configMap.getData.asScala(CONFIG_MAP_KEY) + val propertiesStringReader = new StringReader(configValue) + val properties = new Properties() + properties.load(propertiesStringReader) + val propertiesMap = Maps.fromProperties(properties).asScala + assert(propertiesMap("customConf") === "customConfValue") + verify(submittedDependenciesPlugin).configurationsToFetchSubmittedDependencies() + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala new file mode 100644 index 0000000000000..11a671085c201 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.File + +import org.apache.spark.{SparkFunSuite, SSLOptions} +import org.apache.spark.deploy.kubernetes.config._ + +class SubmittedDependencyInitContainerConfigPluginSuite extends SparkFunSuite { + private val STAGING_SERVER_URI = "http://localhost:9000" + private val JARS_RESOURCE_ID = "jars-id" + private val FILES_RESOURCE_ID = "files-id" + private val JARS_SECRET_KEY = "jars" + private val FILES_SECRET_KEY = "files" + private val TRUSTSTORE_SECRET_KEY = "trustStore" + private val SECRETS_VOLUME_MOUNT_PATH = "/var/data/" + private val TRUSTSTORE_PASSWORD = "trustStore" + private val TRUSTSTORE_FILE = "/mnt/secrets/trustStore.jks" + private val TRUSTSTORE_TYPE = "jks" + private val RESOURCE_STAGING_SERVICE_SSL_OPTIONS = SSLOptions( + enabled = true, + trustStore = Some(new File(TRUSTSTORE_FILE)), + trustStorePassword = Some(TRUSTSTORE_PASSWORD), + trustStoreType = Some(TRUSTSTORE_TYPE)) + + test("Plugin should provide configuration for fetching uploaded dependencies") { + val configPluginUnderTest = new SubmittedDependencyInitContainerConfigPluginImpl( + STAGING_SERVER_URI, + JARS_RESOURCE_ID, + FILES_RESOURCE_ID, + JARS_SECRET_KEY, + FILES_SECRET_KEY, + TRUSTSTORE_SECRET_KEY, + SECRETS_VOLUME_MOUNT_PATH, + SSLOptions()) + val addedConfigurations = configPluginUnderTest.configurationsToFetchSubmittedDependencies() + val expectedConfigurations = Map( + RESOURCE_STAGING_SERVER_URI.key -> STAGING_SERVER_URI, + INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> JARS_RESOURCE_ID, + INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> FILES_RESOURCE_ID, + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> + s"$SECRETS_VOLUME_MOUNT_PATH/$JARS_SECRET_KEY", + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> + s"$SECRETS_VOLUME_MOUNT_PATH/$FILES_SECRET_KEY", + RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> "false") + assert(addedConfigurations === expectedConfigurations) + } + + test("Plugin should set up SSL with the appropriate trustStore if it's provided.") { + val configPluginUnderTest = new SubmittedDependencyInitContainerConfigPluginImpl( + STAGING_SERVER_URI, + JARS_RESOURCE_ID, + FILES_RESOURCE_ID, + JARS_SECRET_KEY, + FILES_SECRET_KEY, + TRUSTSTORE_SECRET_KEY, + SECRETS_VOLUME_MOUNT_PATH, + RESOURCE_STAGING_SERVICE_SSL_OPTIONS) + val addedConfigurations = configPluginUnderTest.configurationsToFetchSubmittedDependencies() + val expectedSslConfigurations = Map( + RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> "true", + RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key -> + s"$SECRETS_VOLUME_MOUNT_PATH/$TRUSTSTORE_SECRET_KEY", + RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key -> TRUSTSTORE_PASSWORD, + RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key -> TRUSTSTORE_TYPE) + assert(expectedSslConfigurations.toSet.subsetOf(addedConfigurations.toSet)) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala new file mode 100644 index 0000000000000..189d87e27a28a --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.File + +import com.google.common.base.Charsets +import com.google.common.io.{BaseEncoding, Files} +import io.fabric8.kubernetes.api.model.Secret +import scala.collection.JavaConverters._ +import scala.collection.Map + +import org.apache.spark.{SparkFunSuite, SSLOptions} +import org.apache.spark.util.Utils + +class SubmittedDependencySecretBuilderSuite extends SparkFunSuite { + + private val SECRET_NAME = "submitted-dependency-secret" + private val JARS_SECRET = "jars-secret" + private val FILES_SECRET = "files-secret" + private val JARS_SECRET_KEY = "jars-secret-key" + private val FILES_SECRET_KEY = "files-secret-key" + private val TRUSTSTORE_SECRET_KEY = "truststore-secret-key" + private val TRUSTSTORE_STRING_CONTENTS = "trustStore-contents" + + test("Building the secret without a trustStore") { + val builder = new SubmittedDependencySecretBuilderImpl( + SECRET_NAME, + JARS_SECRET, + FILES_SECRET, + JARS_SECRET_KEY, + FILES_SECRET_KEY, + TRUSTSTORE_SECRET_KEY, + SSLOptions()) + val secret = builder.build() + assert(secret.getMetadata.getName === SECRET_NAME) + val secretDecodedData = decodeSecretData(secret) + val expectedSecretData = Map(JARS_SECRET_KEY -> JARS_SECRET, FILES_SECRET_KEY -> FILES_SECRET) + assert(secretDecodedData === expectedSecretData) + } + + private def decodeSecretData(secret: Secret): Map[String, String] = { + val secretData = secret.getData.asScala + secretData.mapValues(encoded => + new String(BaseEncoding.base64().decode(encoded), Charsets.UTF_8)) + } + + test("Building the secret with a trustStore") { + val tempTrustStoreDir = Utils.createTempDir(namePrefix = "temp-truststores") + try { + val trustStoreFile = new File(tempTrustStoreDir, "trustStore.jks") + Files.write(TRUSTSTORE_STRING_CONTENTS, trustStoreFile, Charsets.UTF_8) + val builder = new SubmittedDependencySecretBuilderImpl( + SECRET_NAME, + JARS_SECRET, + FILES_SECRET, + JARS_SECRET_KEY, + FILES_SECRET_KEY, + TRUSTSTORE_SECRET_KEY, + SSLOptions(trustStore = Some(trustStoreFile))) + val secret = builder.build() + val secretDecodedData = decodeSecretData(secret) + assert(secretDecodedData(TRUSTSTORE_SECRET_KEY) === TRUSTSTORE_STRING_CONTENTS) + } finally { + tempTrustStoreDir.delete() + } + } + +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderSuite.scala new file mode 100644 index 0000000000000..7b259aa2c3a0c --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderSuite.scala @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File} +import java.util.UUID + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.google.common.base.Charsets +import com.google.common.io.Files +import okhttp3.RequestBody +import okio.Okio +import org.mockito.Matchers.any +import org.mockito.Mockito +import org.mockito.invocation.InvocationOnMock +import org.mockito.stubbing.Answer +import org.scalatest.BeforeAndAfter +import org.scalatest.mock.MockitoSugar._ +import retrofit2.{Call, Response} + +import org.apache.spark.{SparkFunSuite, SSLOptions} +import org.apache.spark.deploy.kubernetes.CompressionUtils +import org.apache.spark.deploy.rest.kubernetes.v2.{ResourceStagingServiceRetrofit, RetrofitClientFactory} +import org.apache.spark.util.Utils + +private[spark] class SubmittedDependencyUploaderSuite extends SparkFunSuite with BeforeAndAfter { + import SubmittedDependencyUploaderSuite.createTempFile + + private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + private val APP_ID = "app-id" + private val LABELS = Map("label1" -> "label1value", "label2" -> "label2value") + private val NAMESPACE = "namespace" + private val STAGING_SERVER_URI = "http://localhost:8000" + private val LOCAL_JARS = Seq(createTempFile("jar"), createTempFile("jar")) + private val JARS = Seq("hdfs://localhost:9000/jars/jar1.jar", + s"file://${LOCAL_JARS.head}", + LOCAL_JARS(1)) + private val LOCAL_FILES = Seq(createTempFile("txt")) + private val FILES = Seq("hdfs://localhost:9000/files/file1.txt", + LOCAL_FILES.head) + private val TRUSTSTORE_FILE = new File(createTempFile(".jks")) + private val TRUSTSTORE_PASSWORD = "trustStorePassword" + private val TRUSTSTORE_TYPE = "jks" + private val STAGING_SERVER_SSL_OPTIONS = SSLOptions( + enabled = true, + trustStore = Some(TRUSTSTORE_FILE), + trustStorePassword = Some(TRUSTSTORE_PASSWORD), + trustStoreType = Some(TRUSTSTORE_TYPE)) + private var retrofitClientFactory: RetrofitClientFactory = _ + private var retrofitClient: ResourceStagingServiceRetrofit = _ + + private var dependencyUploaderUnderTest: SubmittedDependencyUploader = _ + + before { + retrofitClientFactory = mock[RetrofitClientFactory] + retrofitClient = mock[ResourceStagingServiceRetrofit] + Mockito.when( + retrofitClientFactory.createRetrofitClient( + STAGING_SERVER_URI, classOf[ResourceStagingServiceRetrofit], STAGING_SERVER_SSL_OPTIONS)) + .thenReturn(retrofitClient) + dependencyUploaderUnderTest = new SubmittedDependencyUploaderImpl( + APP_ID, + LABELS, + NAMESPACE, + STAGING_SERVER_URI, + JARS, + FILES, + STAGING_SERVER_SSL_OPTIONS, + retrofitClientFactory) + } + + test("Uploading jars should contact the staging server with the appropriate parameters") { + val capturingArgumentsAnswer = new UploadDependenciesArgumentsCapturingAnswer( + SubmittedResourceIdAndSecret("resourceId", "resourceSecret")) + Mockito.when(retrofitClient.uploadResources(any(), any(), any(), any())) + .thenAnswer(capturingArgumentsAnswer) + dependencyUploaderUnderTest.uploadJars() + testUploadSendsCorrectFiles(LOCAL_JARS, capturingArgumentsAnswer) + } + + test("Uploading files should contact the staging server with the appropriate parameters") { + val capturingArgumentsAnswer = new UploadDependenciesArgumentsCapturingAnswer( + SubmittedResourceIdAndSecret("resourceId", "resourceSecret")) + Mockito.when(retrofitClient.uploadResources(any(), any(), any(), any())) + .thenAnswer(capturingArgumentsAnswer) + dependencyUploaderUnderTest.uploadFiles() + testUploadSendsCorrectFiles(LOCAL_FILES, capturingArgumentsAnswer) + } + + private def testUploadSendsCorrectFiles( + expectedFiles: Seq[String], + capturingArgumentsAnswer: UploadDependenciesArgumentsCapturingAnswer) = { + val requestLabelsBytes = requestBodyBytes(capturingArgumentsAnswer.podLabelsArg) + val requestLabelsString = new String(requestLabelsBytes, Charsets.UTF_8) + val requestLabelsMap = OBJECT_MAPPER.readValue( + requestLabelsString, classOf[Map[String, String]]) + assert(requestLabelsMap === LABELS) + val requestNamespaceBytes = requestBodyBytes(capturingArgumentsAnswer.podNamespaceArg) + val requestNamespaceString = new String(requestNamespaceBytes, Charsets.UTF_8) + assert(requestNamespaceString === NAMESPACE) + + val unpackedFilesDir = Utils.createTempDir(namePrefix = "test-unpacked-files") + val compressedBytesInput = new ByteArrayInputStream( + requestBodyBytes(capturingArgumentsAnswer.podResourcesArg)) + CompressionUtils.unpackTarStreamToDirectory(compressedBytesInput, unpackedFilesDir) + val writtenFiles = unpackedFilesDir.listFiles + assert(writtenFiles.size === expectedFiles.size) + + expectedFiles.map(new File(_)).foreach { expectedFile => + val maybeWrittenFile = writtenFiles.find(_.getName == expectedFile.getName) + assert(maybeWrittenFile.isDefined) + maybeWrittenFile.foreach { writtenFile => + val writtenFileBytes = Files.toByteArray(writtenFile) + val expectedFileBytes = Files.toByteArray(expectedFile) + assert(expectedFileBytes.toSeq === writtenFileBytes.toSeq) + } + } + } + + private def requestBodyBytes(requestBody: RequestBody): Array[Byte] = { + Utils.tryWithResource(new ByteArrayOutputStream()) { outputStream => + Utils.tryWithResource(Okio.sink(outputStream)) { sink => + Utils.tryWithResource(Okio.buffer(sink)) { bufferedSink => + try { + requestBody.writeTo(bufferedSink) + } finally { + bufferedSink.flush() + } + } + } + outputStream.toByteArray + } + } +} + +private class UploadDependenciesArgumentsCapturingAnswer(returnValue: SubmittedResourceIdAndSecret) + extends Answer[Call[SubmittedResourceIdAndSecret]] { + + var podLabelsArg: RequestBody = _ + var podNamespaceArg: RequestBody = _ + var podResourcesArg: RequestBody = _ + var kubernetesCredentialsArg: RequestBody = _ + + override def answer(invocationOnMock: InvocationOnMock): Call[SubmittedResourceIdAndSecret] = { + podLabelsArg = invocationOnMock.getArgumentAt(0, classOf[RequestBody]) + podNamespaceArg = invocationOnMock.getArgumentAt(1, classOf[RequestBody]) + podResourcesArg = invocationOnMock.getArgumentAt(2, classOf[RequestBody]) + kubernetesCredentialsArg = invocationOnMock.getArgumentAt(3, classOf[RequestBody]) + val responseCall = mock[Call[SubmittedResourceIdAndSecret]] + Mockito.when(responseCall.execute()).thenReturn(Response.success(returnValue)) + responseCall + } +} + +private object SubmittedDependencyUploaderSuite { + def createTempFile(extension: String): String = { + val dir = Utils.createTempDir() + val file = new File(dir, s"${UUID.randomUUID().toString}.$extension") + Files.write(UUID.randomUUID().toString, file, Charsets.UTF_8) + file.getAbsolutePath + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala index 77eb7f2b9f49c..6ab37185b8d07 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala @@ -24,6 +24,7 @@ import com.google.common.base.Charsets import com.google.common.io.Files import okhttp3.{MediaType, ResponseBody} import org.mockito.Matchers.any +import org.mockito.Mockito import org.mockito.Mockito.{doAnswer, when} import org.mockito.invocation.InvocationOnMock import org.mockito.stubbing.Answer @@ -31,7 +32,7 @@ import org.scalatest.BeforeAndAfter import org.scalatest.mock.MockitoSugar._ import retrofit2.{Call, Callback, Response} -import org.apache.spark.{SparkConf, SparkFunSuite, SSLOptions} +import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkFunSuite, SSLOptions} import org.apache.spark.deploy.kubernetes.CompressionUtils import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.util.Utils @@ -55,7 +56,6 @@ class KubernetesSparkDependencyDownloadInitContainerSuite private val JARS_RESOURCE_ID = "jarsId" private val FILES_RESOURCE_ID = "filesId" - private var sparkConf: SparkConf = _ private var downloadJarsDir: File = _ private var downloadFilesDir: File = _ private var downloadJarsSecretValue: String = _ @@ -64,7 +64,7 @@ class KubernetesSparkDependencyDownloadInitContainerSuite private var filesCompressedBytes: Array[Byte] = _ private var retrofitClientFactory: RetrofitClientFactory = _ private var retrofitClient: ResourceStagingServiceRetrofit = _ - private var initContainerUnderTest: KubernetesSparkDependencyDownloadInitContainer = _ + private var fileFetcher: FileFetcher = _ override def beforeAll(): Unit = { jarsCompressedBytes = compressPathsToBytes(JARS) @@ -80,24 +80,10 @@ class KubernetesSparkDependencyDownloadInitContainerSuite downloadFilesDir = Utils.createTempDir() retrofitClientFactory = mock[RetrofitClientFactory] retrofitClient = mock[ResourceStagingServiceRetrofit] - sparkConf = new SparkConf(true) - .set(RESOURCE_STAGING_SERVER_URI, STAGING_SERVER_URI) - .set(INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER, JARS_RESOURCE_ID) - .set(INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION, DOWNLOAD_JARS_SECRET_LOCATION) - .set(INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER, FILES_RESOURCE_ID) - .set(INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION, DOWNLOAD_FILES_SECRET_LOCATION) - .set(DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION, downloadJarsDir.getAbsolutePath) - .set(DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION, downloadFilesDir.getAbsolutePath) - .set(RESOURCE_STAGING_SERVER_SSL_ENABLED, true) - .set(RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE, TRUSTSTORE_FILE.getAbsolutePath) - .set(RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD, TRUSTSTORE_PASSWORD) - .set(RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE, TRUSTSTORE_TYPE) - + fileFetcher = mock[FileFetcher] when(retrofitClientFactory.createRetrofitClient( STAGING_SERVER_URI, classOf[ResourceStagingServiceRetrofit], STAGING_SERVER_SSL_OPTIONS)) .thenReturn(retrofitClient) - initContainerUnderTest = new KubernetesSparkDependencyDownloadInitContainer( - sparkConf, retrofitClientFactory) } after { @@ -105,9 +91,15 @@ class KubernetesSparkDependencyDownloadInitContainerSuite downloadFilesDir.delete() } - test("Downloads should unpack response body streams to directories") { + test("Downloads from resource staging server should unpack response body to directories") { val downloadJarsCall = mock[Call[ResponseBody]] val downloadFilesCall = mock[Call[ResponseBody]] + val sparkConf = getSparkConfForResourceStagingServerDownloads + val initContainerUnderTest = new KubernetesSparkDependencyDownloadInitContainer( + sparkConf, + retrofitClientFactory, + fileFetcher, + securityManager = new SparkSecurityManager(sparkConf)) when(retrofitClient.downloadResources(JARS_RESOURCE_ID, downloadJarsSecretValue)) .thenReturn(downloadJarsCall) when(retrofitClient.downloadResources(FILES_RESOURCE_ID, downloadFilesSecretValue)) @@ -125,6 +117,46 @@ class KubernetesSparkDependencyDownloadInitContainerSuite initContainerUnderTest.run() checkWrittenFilesAreTheSameAsOriginal(JARS, downloadJarsDir) checkWrittenFilesAreTheSameAsOriginal(FILES, downloadFilesDir) + Mockito.verifyZeroInteractions(fileFetcher) + } + + test("Downloads from remote server should invoke the file fetcher") { + val sparkConf = getSparkConfForRemoteFileDownloads + val initContainerUnderTest = new KubernetesSparkDependencyDownloadInitContainer( + sparkConf, + retrofitClientFactory, + fileFetcher, + securityManager = new SparkSecurityManager(sparkConf)) + initContainerUnderTest.run() + Mockito.verify(fileFetcher).fetchFile("http://localhost:9000/jar1.jar", downloadJarsDir) + Mockito.verify(fileFetcher).fetchFile("hdfs://localhost:9000/jar2.jar", downloadJarsDir) + Mockito.verify(fileFetcher).fetchFile("http://localhost:9000/file.txt", downloadFilesDir) + + } + + private def getSparkConfForResourceStagingServerDownloads: SparkConf = { + new SparkConf(true) + .set(RESOURCE_STAGING_SERVER_URI, STAGING_SERVER_URI) + .set(INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER, JARS_RESOURCE_ID) + .set(INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION, DOWNLOAD_JARS_SECRET_LOCATION) + .set(INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER, FILES_RESOURCE_ID) + .set(INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION, DOWNLOAD_FILES_SECRET_LOCATION) + .set(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION, downloadJarsDir.getAbsolutePath) + .set(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION, downloadFilesDir.getAbsolutePath) + .set(RESOURCE_STAGING_SERVER_SSL_ENABLED, true) + .set(RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE, TRUSTSTORE_FILE.getAbsolutePath) + .set(RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD, TRUSTSTORE_PASSWORD) + .set(RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE, TRUSTSTORE_TYPE) + } + + private def getSparkConfForRemoteFileDownloads: SparkConf = { + new SparkConf(true) + .set(INIT_CONTAINER_REMOTE_JARS, + "http://localhost:9000/jar1.jar,hdfs://localhost:9000/jar2.jar") + .set(INIT_CONTAINER_REMOTE_FILES, + "http://localhost:9000/file.txt") + .set(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION, downloadJarsDir.getAbsolutePath) + .set(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION, downloadFilesDir.getAbsolutePath) } private def checkWrittenFilesAreTheSameAsOriginal( diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile index 23c6751f1b3ed..c5f1c43ff7cf4 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -37,4 +37,7 @@ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark # TODO support spark.executor.extraClassPath -CMD exec ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp ${SPARK_HOME}/jars/\* org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP +CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ + if ! [ -z ${SPARK_MOUNTED_CLASSPATH}+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_EXECUTOR_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXECUTOR_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + exec ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp $SPARK_CLASSPATH org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index c94893cbce410..02904c0e5fe21 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -200,6 +200,28 @@ + + maven-resources-plugin + 3.0.2 + + + copy-integration-test-http-server-dockerfile + pre-integration-test + + copy-resources + + + ${project.build.directory}/docker/dockerfiles + + + src/main/docker + true + + + + + + com.googlecode.maven-download-plugin download-maven-plugin diff --git a/resource-managers/kubernetes/integration-tests/src/main/docker/integration-test-asset-server/Dockerfile b/resource-managers/kubernetes/integration-tests/src/main/docker/integration-test-asset-server/Dockerfile new file mode 100644 index 0000000000000..e26d207cf4397 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/main/docker/integration-test-asset-server/Dockerfile @@ -0,0 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Simple asset server that can provide the integration test jars over HTTP. +FROM trinitronx/python-simplehttpserver:travis-12 + +ADD examples/integration-tests-jars /var/www diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala index ae02de7937c6a..3be4507ac105a 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala @@ -18,23 +18,19 @@ package org.apache.spark.deploy.kubernetes.integrationtest import java.util.UUID -import scala.collection.JavaConverters._ - -import com.google.common.collect.ImmutableList import io.fabric8.kubernetes.client.internal.readiness.Readiness import org.scalatest.{BeforeAndAfter, DoNotDiscover} import org.scalatest.concurrent.Eventually +import scala.collection.JavaConverters._ -import org.apache.spark._ +import org.apache.spark.{SparkConf, SparkFunSuite, SSLOptions} import org.apache.spark.deploy.kubernetes.SSLUtils import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackend import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND -import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 -import org.apache.spark.deploy.kubernetes.submit.v1.Client -import org.apache.spark.deploy.kubernetes.submit.v2.{MountedDependencyManagerProviderImpl, SubmissionKubernetesClientProviderImpl} -import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} +import org.apache.spark.deploy.kubernetes.submit.v2.Client +import org.apache.spark.launcher.SparkLauncher @DoNotDiscover private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) @@ -44,11 +40,14 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) private var kubernetesTestComponents: KubernetesTestComponents = _ private var sparkConf: SparkConf = _ private var resourceStagingServerLauncher: ResourceStagingServerLauncher = _ + private var staticAssetServerLauncher: StaticAssetServerLauncher = _ override def beforeAll(): Unit = { kubernetesTestComponents = new KubernetesTestComponents(testBackend.getKubernetesClient) resourceStagingServerLauncher = new ResourceStagingServerLauncher( kubernetesTestComponents.kubernetesClient.inNamespace(kubernetesTestComponents.namespace)) + staticAssetServerLauncher = new StaticAssetServerLauncher( + kubernetesTestComponents.kubernetesClient.inNamespace(kubernetesTestComponents.namespace)) } before { @@ -98,7 +97,6 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) assume(testBackend.name == MINIKUBE_TEST_BACKEND) sparkConf.setJars(Seq( - KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) runSparkPiAndVerifyCompletion(KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE) } @@ -118,6 +116,25 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) runSparkGroupByTestAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) } + test("Use remote resources without the resource staging server.") { + val assetServerUri = staticAssetServerLauncher.launchStaticAssetServer() + sparkConf.setJars(Seq( + s"$assetServerUri/${KubernetesSuite.EXAMPLES_JAR_FILE.getName}", + s"$assetServerUri/${KubernetesSuite.HELPER_JAR_FILE.getName}" + )) + runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) + } + + test("Mix remote resources with submitted ones.") { + launchStagingServer(SSLOptions()) + val assetServerUri = staticAssetServerLauncher.launchStaticAssetServer() + sparkConf.setJars(Seq( + KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + s"$assetServerUri/${KubernetesSuite.HELPER_JAR_FILE.getName}" + )) + runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) + } + private def launchStagingServer(resourceStagingServerSslOptions: SSLOptions): Unit = { assume(testBackend.name == MINIKUBE_TEST_BACKEND) @@ -134,16 +151,7 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) } private def runSparkPiAndVerifyCompletion(appResource: String): Unit = { - val client = new org.apache.spark.deploy.kubernetes.submit.v2.Client( - sparkConf = sparkConf, - mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, - appArgs = Array.empty[String], - mainAppResource = appResource, - kubernetesClientProvider = - new SubmissionKubernetesClientProviderImpl(sparkConf), - mountedDependencyManagerProvider = - new MountedDependencyManagerProviderImpl(sparkConf)) - client.run() + Client.run(sparkConf, appResource, KubernetesSuite.SPARK_PI_MAIN_CLASS, Array.empty[String]) val driverPod = kubernetesTestComponents.kubernetesClient .pods() .withLabel("spark-app-locator", APP_LOCATOR_LABEL) @@ -160,16 +168,11 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) } private def runSparkGroupByTestAndVerifyCompletion(appResource: String): Unit = { - val client = new org.apache.spark.deploy.kubernetes.submit.v2.Client( + Client.run( sparkConf = sparkConf, - mainClass = KubernetesSuite.GROUP_BY_MAIN_CLASS, appArgs = Array.empty[String], - mainAppResource = appResource, - kubernetesClientProvider = - new SubmissionKubernetesClientProviderImpl(sparkConf), - mountedDependencyManagerProvider = - new MountedDependencyManagerProviderImpl(sparkConf)) - client.run() + mainClass = KubernetesSuite.GROUP_BY_MAIN_CLASS, + mainAppResource = appResource) val driverPod = kubernetesTestComponents.kubernetesClient .pods() .withLabel("spark-app-locator", APP_LOCATOR_LABEL) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala index ca549fa27d630..3a99f907d15fd 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala @@ -43,7 +43,6 @@ private[spark] class ResourceStagingServerLauncher(kubernetesClient: KubernetesC private val PROPERTIES_FILE_NAME = "staging-server.properties" private val PROPERTIES_DIR = "/var/data/spark-staging-server" private val PROPERTIES_FILE_PATH = s"$PROPERTIES_DIR/$PROPERTIES_FILE_NAME" - private var activeResources = Seq.empty[HasMetadata] // Returns the NodePort the staging server is listening on def launchStagingServer(sslOptions: SSLOptions): Int = { @@ -146,8 +145,8 @@ private[spark] class ResourceStagingServerLauncher(kubernetesClient: KubernetesC .endPort() .endSpec() .build() - val stagingServerPodReadyWatcher = new ReadinessWatcher[Pod] - val serviceReadyWatcher = new ReadinessWatcher[Endpoints] + val stagingServerPodReadyWatcher = new SparkReadinessWatcher[Pod] + val serviceReadyWatcher = new SparkReadinessWatcher[Endpoints] val allResources = Seq( stagingServerService, stagingServerConfigMap, @@ -159,9 +158,7 @@ private[spark] class ResourceStagingServerLauncher(kubernetesClient: KubernetesC Utils.tryWithResource(kubernetesClient.endpoints() .withName(stagingServerService.getMetadata.getName) .watch(serviceReadyWatcher)) { _ => - activeResources = kubernetesClient.resourceList(allResources: _*) - .createOrReplace() - .asScala + kubernetesClient.resourceList(allResources: _*).createOrReplace() stagingServerPodReadyWatcher.waitUntilReady() serviceReadyWatcher.waitUntilReady() } @@ -172,25 +169,4 @@ private[spark] class ResourceStagingServerLauncher(kubernetesClient: KubernetesC .get(0) .getNodePort } - - def tearDownStagingServer(): Unit = { - kubernetesClient.resourceList(activeResources: _*).delete() - activeResources = Seq.empty[HasMetadata] - } - - private class ReadinessWatcher[T <: HasMetadata] extends Watcher[T] { - - private val signal = SettableFuture.create[Boolean] - - override def eventReceived(action: Action, resource: T): Unit = { - if ((action == Action.MODIFIED || action == Action.ADDED) && - Readiness.isReady(resource)) { - signal.set(true) - } - } - - override def onClose(cause: KubernetesClientException): Unit = {} - - def waitUntilReady(): Boolean = signal.get(30, TimeUnit.SECONDS) - } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/SparkReadinessWatcher.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/SparkReadinessWatcher.scala new file mode 100644 index 0000000000000..20517eb2fc2a6 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/SparkReadinessWatcher.scala @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest + +import java.util.concurrent.TimeUnit + +import com.google.common.util.concurrent.SettableFuture +import io.fabric8.kubernetes.api.model.HasMetadata +import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} +import io.fabric8.kubernetes.client.Watcher.Action +import io.fabric8.kubernetes.client.internal.readiness.Readiness + +private[spark] class SparkReadinessWatcher[T <: HasMetadata] extends Watcher[T] { + + private val signal = SettableFuture.create[Boolean] + + override def eventReceived(action: Action, resource: T): Unit = { + if ((action == Action.MODIFIED || action == Action.ADDED) && + Readiness.isReady(resource)) { + signal.set(true) + } + } + + override def onClose(cause: KubernetesClientException): Unit = {} + + def waitUntilReady(): Boolean = signal.get(30, TimeUnit.SECONDS) +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/StaticAssetServerLauncher.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/StaticAssetServerLauncher.scala new file mode 100644 index 0000000000000..6b483769f5254 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/StaticAssetServerLauncher.scala @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest + +import io.fabric8.kubernetes.api.model.{HTTPGetActionBuilder, Pod} +import io.fabric8.kubernetes.client.KubernetesClient + +import org.apache.spark.util.Utils + +/** + * Launches a simple HTTP server which provides jars that can be downloaded by Spark applications + * in integration tests. + */ +private[spark] class StaticAssetServerLauncher(kubernetesClient: KubernetesClient) { + + // Returns the HTTP Base URI of the server. + def launchStaticAssetServer(): String = { + val readinessWatcher = new SparkReadinessWatcher[Pod] + val probePingHttpGet = new HTTPGetActionBuilder() + .withNewPort(8080) + .withScheme("HTTP") + .withPath("/") + .build() + Utils.tryWithResource(kubernetesClient + .pods() + .withName("integration-test-static-assets") + .watch(readinessWatcher)) { _ => + val pod = kubernetesClient.pods().createNew() + .withNewMetadata() + .withName("integration-test-static-assets") + .endMetadata() + .withNewSpec() + .addNewContainer() + .withName("static-asset-server-container") + .withImage("spark-integration-test-asset-server:latest") + .withImagePullPolicy("IfNotPresent") + .withNewReadinessProbe() + .withHttpGet(probePingHttpGet) + .endReadinessProbe() + .endContainer() + .endSpec() + .done() + readinessWatcher.waitUntilReady() + val podIP = kubernetesClient.pods().withName(pod.getMetadata.getName).get() + .getStatus + .getPodIP + s"http://$podIP:8080" + } + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala index 52b8c7d7359a6..0692cf55db848 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala @@ -34,6 +34,8 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, private val SHUFFLE_SERVICE_DOCKER_FILE = "dockerfiles/shuffle-service/Dockerfile" private val DRIVER_INIT_DOCKER_FILE = "dockerfiles/driver-init/Dockerfile" private val STAGING_SERVER_DOCKER_FILE = "dockerfiles/resource-staging-server/Dockerfile" + private val STATIC_ASSET_SERVER_DOCKER_FILE = + "dockerfiles/integration-test-asset-server/Dockerfile" private val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) private val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) private val dockerHost = dockerEnv.getOrElse("DOCKER_HOST", @@ -65,6 +67,7 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, buildImage("spark-driver-v2", DRIVER_V2_DOCKER_FILE) buildImage("spark-resource-staging-server", STAGING_SERVER_DOCKER_FILE) buildImage("spark-driver-init", DRIVER_INIT_DOCKER_FILE) + buildImage("spark-integration-test-asset-server", STATIC_ASSET_SERVER_DOCKER_FILE) } private def buildImage(name: String, dockerFile: String): Unit = { From e071ad9c0e8b6a0099de9907def520af6e159caf Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Wed, 17 May 2017 16:33:55 -0700 Subject: [PATCH 107/156] Scalastyle fixes (#278) --- .../integrationtest/KubernetesTestComponents.scala | 2 +- .../deploy/kubernetes/integrationtest/ProcessUtils.scala | 6 +++--- .../spark/deploy/kubernetes/integrationtest/constants.scala | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala index 8cdacee655c05..677c0db606a47 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala @@ -95,4 +95,4 @@ private[spark] class KubernetesTestComponents(defaultClient: DefaultKubernetesCl val trustManager = SSLUtils.trustManagers(kubernetesConf)(0).asInstanceOf[X509TrustManager] HttpClientUtil.createClient[T](Set(url), 5, sslContext.getSocketFactory, trustManager) } -} \ No newline at end of file +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ProcessUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ProcessUtils.scala index d0bfac3085487..4008007b72fc4 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ProcessUtils.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ProcessUtils.scala @@ -26,9 +26,9 @@ import org.apache.spark.util.Utils object ProcessUtils extends Logging { /** - * executeProcess is used to run a command and return the output if it - * completes within timeout seconds. - */ + * executeProcess is used to run a command and return the output if it + * completes within timeout seconds. + */ def executeProcess(fullCommand: Array[String], timeout: Long): Seq[String] = { val pb = new ProcessBuilder().command(fullCommand: _*) pb.redirectErrorStream(true) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/constants.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/constants.scala index 8207198b529d2..bfded1003fc25 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/constants.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/constants.scala @@ -19,4 +19,4 @@ package org.apache.spark.deploy.kubernetes.integrationtest package object constants { val MINIKUBE_TEST_BACKEND = "minikube" val GCE_TEST_BACKEND = "gce" -} \ No newline at end of file +} From 6882a1bf0e91cec325b947e1bc9ef7718cc5bf52 Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Thu, 18 May 2017 00:25:17 -0500 Subject: [PATCH 108/156] Exit properly when the k8s cluster is not available. (#256) * Exit properly when the k8s cluster is not available. * add jetty to k8s module dependency so we can use only rebuild the k8s module. * CR * Fixed single thread scheduler. * Fixed scalastyle check. * CR --- resource-managers/kubernetes/core/pom.xml | 1 - .../spark/deploy/kubernetes/submit/v1/Client.scala | 1 + .../submit/v1/LoggingPodStatusWatcher.scala | 13 +++++++++---- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 70c252009c9b4..aa429f73a5627 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -133,4 +133,3 @@ - diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala index 0f1e7886a1ba2..8f1e356bec8ca 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala @@ -155,6 +155,7 @@ private[spark] class Client( .pods() .withName(kubernetesDriverPodName) .watch(loggingWatch)) { _ => + loggingWatch.start() val resourceCleanShutdownHook = ShutdownHookManager.addShutdownHook(() => kubernetesResourceCleaner.deleteAllRegisteredResourcesFromKubernetes(kubernetesClient)) val cleanupServiceManagerHook = ShutdownHookManager.addShutdownHook( diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala index 7be334194d9d7..537bcccaa1458 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala @@ -24,6 +24,7 @@ import io.fabric8.kubernetes.client.Watcher.Action import scala.collection.JavaConverters._ import org.apache.spark.internal.Logging +import org.apache.spark.util.ThreadUtils /** * A monitor for the running Kubernetes pod of a Spark application. Status logging occurs on @@ -40,19 +41,23 @@ private[kubernetes] class LoggingPodStatusWatcher(podCompletedFuture: CountDownL extends Watcher[Pod] with Logging { // start timer for periodic logging - private val scheduler = Executors.newScheduledThreadPool(1) + private val scheduler = + ThreadUtils.newDaemonSingleThreadScheduledExecutor("logging-pod-status-watcher") private val logRunnable: Runnable = new Runnable { override def run() = logShortStatus() } - if (interval > 0) { - scheduler.scheduleWithFixedDelay(logRunnable, 0, interval, TimeUnit.MILLISECONDS) - } private var pod: Option[Pod] = Option.empty private def phase: String = pod.map(_.getStatus().getPhase()).getOrElse("unknown") private def status: String = pod.map(_.getStatus().getContainerStatuses().toString()) .getOrElse("unknown") + def start(): Unit = { + if (interval > 0) { + scheduler.scheduleAtFixedRate(logRunnable, 0, interval, TimeUnit.MILLISECONDS) + } + } + override def eventReceived(action: Action, pod: Pod): Unit = { this.pod = Option(pod) action match { From 9d6665cde35b897498aa0a9ffdbb89b3b903caf1 Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 18 May 2017 14:27:31 -0700 Subject: [PATCH 109/156] Support driver pod kubernetes credentials mounting in V2 submission (#246) --- .../kubernetes/KubernetesCredentials.scala | 23 +++ .../spark/deploy/kubernetes/config.scala | 20 +- .../spark/deploy/kubernetes/constants.scala | 16 ++ ...iverPodKubernetesCredentialsProvider.scala | 11 +- .../deploy/kubernetes/submit/v1/Client.scala | 6 +- .../deploy/kubernetes/submit/v2/Client.scala | 26 ++- ...riverPodKubernetesCredentialsMounter.scala | 175 ++++++++++++++++++ ...KubernetesCredentialsMounterProvider.scala | 46 +++++ .../v2/SubmittedDependencyUploaderImpl.scala | 3 +- .../v1/KubernetesRestProtocolMessages.scala | 7 +- .../v1/KubernetesSparkRestServer.scala | 7 +- .../v2/ResourceStagingService.scala | 2 +- .../v2/ResourceStagingServiceImpl.scala | 2 +- .../DriverPodKubernetesClientProvider.scala | 16 +- .../kubernetes/KubernetesClientBuilder.scala | 97 ---------- .../kubernetes/submit/v2/ClientV2Suite.scala | 171 ++++++++++++----- ...PodKubernetesCredentialsMounterSuite.scala | 167 +++++++++++++++++ .../v2/ResourceStagingServerSuite.scala | 3 +- .../v2/ResourceStagingServiceImplSuite.scala | 2 +- .../integrationtest/KubernetesV2Suite.scala | 13 ++ 20 files changed, 632 insertions(+), 181 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesCredentials.scala rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v1 => }/DriverPodKubernetesCredentialsProvider.scala (88%) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounter.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterProvider.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterSuite.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesCredentials.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesCredentials.scala new file mode 100644 index 0000000000000..aba94e6969529 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesCredentials.scala @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +case class KubernetesCredentials( + oauthTokenBase64: Option[String], + caCertDataBase64: Option[String], + clientKeyDataBase64: Option[String], + clientCertDataBase64: Option[String]) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index f0a39fe359227..45e5a46a26258 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -120,14 +120,20 @@ package object config extends Logging { private[spark] val KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE = ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.mounted.caCertFile") .doc("Path on the driver pod's disk containing the CA cert file to use when authenticating" + - " against Kubernetes.") + " against Kubernetes. Typically this is configured by spark-submit from mounting a" + + " secret from the submitting machine into the pod, and hence this configuration is marked" + + " as internal, but this can also be set manually to use a certificate that is mounted" + + " into the driver pod via other means.") .stringConf .createOptional private[spark] val KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE = ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.mounted.clientKeyFile") .doc("Path on the driver pod's disk containing the client key file to use when" + - " authenticating against Kubernetes.") + " authenticating against Kubernetes. Typically this is configured by spark-submit from" + + " mounting a secret from the submitting machine into the pod, and hence this" + + " configuration is marked as internal, but this can also be set manually to" + + " use a key file that is mounted into the driver pod via other means.") .internal() .stringConf .createOptional @@ -135,7 +141,10 @@ package object config extends Logging { private[spark] val KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE = ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.mounted.clientCertFile") .doc("Path on the driver pod's disk containing the client cert file to use when" + - " authenticating against Kubernetes.") + " authenticating against Kubernetes. Typically this is configured by spark-submit from" + + " mounting a secret from the submitting machine into the pod, and hence this" + + " configuration is marked as internal, but this can also be set manually to" + + " use a certificate that is mounted into the driver pod via other means.") .internal() .stringConf .createOptional @@ -143,7 +152,10 @@ package object config extends Logging { private[spark] val KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN = ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.mounted.oauthTokenFile") .doc("Path on the driver pod's disk containing the OAuth token file to use when" + - " authenticating against Kubernetes.") + " authenticating against Kubernetes. Typically this is configured by spark-submit from" + + " mounting a secret from the submitting machine into the pod, and hence this" + + " configuration is marked as internal, but this can also be set manually to" + + " use a token that is mounted into the driver pod via other means.") .internal() .stringConf .createOptional diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 4c4f7b9fc3b23..8d0965078aaa8 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -38,6 +38,22 @@ package object constants { private[spark] val SUBMISSION_SSL_SECRETS_VOLUME_NAME = "spark-submission-server-ssl-secrets" private[spark] val SUBMISSION_SSL_KEY_PEM_SECRET_NAME = "spark-submission-server-key-pem" private[spark] val SUBMISSION_SSL_CERT_PEM_SECRET_NAME = "spark-submission-server-cert-pem" + private[spark] val DRIVER_CREDENTIALS_SECRETS_BASE_DIR = + "/mnt/secrets/spark-kubernetes-credentials" + private[spark] val DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME = "ca-cert" + private[spark] val DRIVER_CREDENTIALS_CA_CERT_PATH = + s"$DRIVER_CREDENTIALS_SECRETS_BASE_DIR/$DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME" + private[spark] val DRIVER_CREDENTIALS_CLIENT_KEY_SECRET_NAME = "client-key" + private[spark] val DRIVER_CREDENTIALS_CLIENT_KEY_PATH = + s"$DRIVER_CREDENTIALS_SECRETS_BASE_DIR/$DRIVER_CREDENTIALS_CLIENT_KEY_SECRET_NAME" + private[spark] val DRIVER_CREDENTIALS_CLIENT_CERT_SECRET_NAME = "client-cert" + private[spark] val DRIVER_CREDENTIALS_CLIENT_CERT_PATH = + s"$DRIVER_CREDENTIALS_SECRETS_BASE_DIR/$DRIVER_CREDENTIALS_CLIENT_CERT_SECRET_NAME" + private[spark] val DRIVER_CREDENTIALS_OAUTH_TOKEN_SECRET_NAME = "oauth-token" + private[spark] val DRIVER_CREDENTIALS_OAUTH_TOKEN_PATH = + s"$DRIVER_CREDENTIALS_SECRETS_BASE_DIR/$DRIVER_CREDENTIALS_OAUTH_TOKEN_SECRET_NAME" + private[spark] val DRIVER_CREDENTIALS_SECRET_VOLUME_NAME = "kubernetes-credentials" + // Default and fixed ports private[spark] val SUBMISSION_SERVER_PORT = 7077 diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverPodKubernetesCredentialsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala similarity index 88% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverPodKubernetesCredentialsProvider.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala index 112226dbe3fc1..404741520c059 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverPodKubernetesCredentialsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala @@ -14,15 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v1 +package org.apache.spark.deploy.kubernetes.submit import java.io.File +import com.google.common.base.Charsets import com.google.common.io.{BaseEncoding, Files} import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.KubernetesCredentials import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials import org.apache.spark.internal.config.OptionalConfigEntry private[spark] class DriverPodKubernetesCredentialsProvider(sparkConf: SparkConf) { @@ -38,7 +39,9 @@ private[spark] class DriverPodKubernetesCredentialsProvider(sparkConf: SparkConf require(sparkConf.get(KUBERNETES_DRIVER_CLIENT_CERT_FILE).isEmpty, "Cannot specify both a service account and a driver pod client cert file.") } - val oauthToken = sparkConf.get(KUBERNETES_DRIVER_OAUTH_TOKEN) + val oauthTokenBase64 = sparkConf.get(KUBERNETES_DRIVER_OAUTH_TOKEN).map { token => + BaseEncoding.base64().encode(token.getBytes(Charsets.UTF_8)) + } val caCertDataBase64 = safeFileConfToBase64(KUBERNETES_DRIVER_CA_CERT_FILE, s"Driver CA cert file provided at %s does not exist or is not a file.") val clientKeyDataBase64 = safeFileConfToBase64(KUBERNETES_DRIVER_CLIENT_KEY_FILE, @@ -46,7 +49,7 @@ private[spark] class DriverPodKubernetesCredentialsProvider(sparkConf: SparkConf val clientCertDataBase64 = safeFileConfToBase64(KUBERNETES_DRIVER_CLIENT_CERT_FILE, s"Driver client cert file provided at %s does not exist or is not a file.") KubernetesCredentials( - oauthToken = oauthToken, + oauthTokenBase64 = oauthTokenBase64, caCertDataBase64 = caCertDataBase64, clientKeyDataBase64 = clientKeyDataBase64, clientCertDataBase64 = clientCertDataBase64) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala index 8f1e356bec8ca..fa3c97c6957b5 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala @@ -30,11 +30,11 @@ import org.apache.commons.codec.binary.Base64 import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException} -import org.apache.spark.deploy.kubernetes.CompressionUtils +import org.apache.spark.deploy.kubernetes.{CompressionUtils, KubernetesCredentials} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils -import org.apache.spark.deploy.rest.kubernetes.v1.{AppResource, ContainerAppResource, HttpClientUtil, KubernetesCreateSubmissionRequest, KubernetesCredentials, KubernetesSparkRestApi, RemoteAppResource, UploadedAppResource} +import org.apache.spark.deploy.kubernetes.submit.{DriverPodKubernetesCredentialsProvider, KubernetesFileUtils} +import org.apache.spark.deploy.rest.kubernetes.v1.{AppResource, ContainerAppResource, HttpClientUtil, KubernetesCreateSubmissionRequest, KubernetesSparkRestApi, RemoteAppResource, UploadedAppResource} import org.apache.spark.internal.Logging import org.apache.spark.util.{ShutdownHookManager, Utils} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala index a403a91840bd6..da08e17dee85b 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala @@ -48,7 +48,9 @@ private[spark] class Client( sparkJars: Seq[String], sparkFiles: Seq[String], kubernetesClientProvider: SubmissionKubernetesClientProvider, - initContainerComponentsProvider: DriverInitContainerComponentsProvider) extends Logging { + initContainerComponentsProvider: DriverInitContainerComponentsProvider, + kubernetesCredentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider) + extends Logging { private val kubernetesDriverPodName = sparkConf.get(KUBERNETES_DRIVER_POD_NAME) .getOrElse(kubernetesAppId) @@ -133,9 +135,6 @@ private[spark] class Client( .provideInitContainerBootstrap() .bootstrapInitContainerAndVolumes(driverContainer.getName, basePod) - val driverOwnedResources = Seq(initContainerConfigMap) ++ - maybeSubmittedDependenciesSecret.toSeq - val containerLocalizedFilesResolver = initContainerComponentsProvider .provideContainerLocalizedFilesResolver() val resolvedSparkJars = containerLocalizedFilesResolver.resolveSubmittedSparkJars() @@ -143,8 +142,15 @@ private[spark] class Client( val executorInitContainerConfiguration = initContainerComponentsProvider .provideExecutorInitContainerConfiguration() - val resolvedSparkConf = executorInitContainerConfiguration + val sparkConfWithExecutorInit = executorInitContainerConfiguration .configureSparkConfForExecutorInitContainer(sparkConf) + val credentialsMounter = kubernetesCredentialsMounterProvider + .getDriverPodKubernetesCredentialsMounter() + val credentialsSecret = credentialsMounter.createCredentialsSecret() + val podWithInitContainerAndMountedCreds = credentialsMounter.mountDriverKubernetesCredentials( + podWithInitContainer, driverContainer.getName, credentialsSecret) + val resolvedSparkConf = credentialsMounter.setDriverPodKubernetesCredentialLocations( + sparkConfWithExecutorInit) if (resolvedSparkJars.nonEmpty) { resolvedSparkConf.set("spark.jars", resolvedSparkJars.mkString(",")) } @@ -166,7 +172,7 @@ private[spark] class Client( val resolvedDriverJavaOpts = resolvedSparkConf.getAll.map { case (confKey, confValue) => s"-D$confKey=$confValue" }.mkString(" ") + driverJavaOptions.map(" " + _).getOrElse("") - val resolvedDriverPod = podWithInitContainer.editSpec() + val resolvedDriverPod = podWithInitContainerAndMountedCreds.editSpec() .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainer.getName)) .addNewEnv() .withName(ENV_MOUNTED_CLASSPATH) @@ -181,6 +187,9 @@ private[spark] class Client( .build() val createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod) try { + val driverOwnedResources = Seq(initContainerConfigMap) ++ + maybeSubmittedDependenciesSecret.toSeq ++ + credentialsSecret.toSeq val driverPodOwnerReference = new OwnerReferenceBuilder() .withName(createdDriverPod.getMetadata.getName) .withApiVersion(createdDriverPod.getApiVersion) @@ -261,6 +270,8 @@ private[spark] object Client { val initContainerComponentsProvider = new DriverInitContainerComponentsProviderImpl( sparkConf, kubernetesAppId, sparkJars, sparkFiles) val kubernetesClientProvider = new SubmissionKubernetesClientProviderImpl(sparkConf) + val kubernetesCredentialsMounterProvider = + new DriverPodKubernetesCredentialsMounterProviderImpl(sparkConf, kubernetesAppId) new Client( appName, kubernetesAppId, @@ -270,6 +281,7 @@ private[spark] object Client { sparkJars, sparkFiles, kubernetesClientProvider, - initContainerComponentsProvider).run() + initContainerComponentsProvider, + kubernetesCredentialsMounterProvider).run() } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounter.scala new file mode 100644 index 0000000000000..9759669335774 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounter.scala @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import io.fabric8.kubernetes.api.model.{PodBuilder, Secret, SecretBuilder} +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.KubernetesCredentials +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.internal.config.OptionalConfigEntry + +private[spark] trait DriverPodKubernetesCredentialsMounter { + + /** + * Set fields on the Spark configuration that indicate where the driver pod is + * to find its Kubernetes credentials for requesting executors. + */ + def setDriverPodKubernetesCredentialLocations(sparkConf: SparkConf): SparkConf + + /** + * Create the Kubernetes secret object that correspond to the driver's credentials + * that have to be created and mounted into the driver pod. The single Secret + * object contains all of the data entries for the driver pod's Kubernetes + * credentials. Returns empty if no secrets are to be mounted. + */ + def createCredentialsSecret(): Option[Secret] + + /** + * Mount any Kubernetes credentials from the submitting machine's disk into the driver pod. The + * secret that is passed in here should have been created from createCredentialsSecret so that + * the implementation does not need to hold its state. + */ + def mountDriverKubernetesCredentials( + originalPodSpec: PodBuilder, + driverContainerName: String, + credentialsSecret: Option[Secret]): PodBuilder +} + +private[spark] class DriverPodKubernetesCredentialsMounterImpl( + kubernetesAppId: String, + submitterLocalDriverPodKubernetesCredentials: KubernetesCredentials, + maybeUserSpecifiedMountedClientKeyFile: Option[String], + maybeUserSpecifiedMountedClientCertFile: Option[String], + maybeUserSpecifiedMountedOAuthTokenFile: Option[String], + maybeUserSpecifiedMountedCaCertFile: Option[String]) + extends DriverPodKubernetesCredentialsMounter { + + override def setDriverPodKubernetesCredentialLocations(sparkConf: SparkConf): SparkConf = { + val resolvedMountedClientKeyFile = resolveSecretLocation( + maybeUserSpecifiedMountedClientKeyFile, + submitterLocalDriverPodKubernetesCredentials.clientKeyDataBase64, + DRIVER_CREDENTIALS_CLIENT_KEY_PATH) + val resolvedMountedClientCertFile = resolveSecretLocation( + maybeUserSpecifiedMountedClientCertFile, + submitterLocalDriverPodKubernetesCredentials.clientCertDataBase64, + DRIVER_CREDENTIALS_CLIENT_CERT_PATH) + val resolvedMountedCaCertFile = resolveSecretLocation( + maybeUserSpecifiedMountedCaCertFile, + submitterLocalDriverPodKubernetesCredentials.caCertDataBase64, + DRIVER_CREDENTIALS_CA_CERT_PATH) + val resolvedMountedOAuthTokenFile = resolveSecretLocation( + maybeUserSpecifiedMountedOAuthTokenFile, + submitterLocalDriverPodKubernetesCredentials.oauthTokenBase64, + DRIVER_CREDENTIALS_OAUTH_TOKEN_PATH) + val sparkConfWithCredentialLocations = sparkConf.clone() + .setOption(KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE, resolvedMountedCaCertFile) + .setOption(KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE, resolvedMountedClientKeyFile) + .setOption(KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE, resolvedMountedClientCertFile) + .setOption(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN, resolvedMountedOAuthTokenFile) + sparkConfWithCredentialLocations.get(KUBERNETES_DRIVER_OAUTH_TOKEN).foreach { _ => + sparkConfWithCredentialLocations.set(KUBERNETES_DRIVER_OAUTH_TOKEN, "") + } + sparkConfWithCredentialLocations.get(KUBERNETES_SUBMIT_OAUTH_TOKEN).foreach { _ => + sparkConfWithCredentialLocations.set(KUBERNETES_SUBMIT_OAUTH_TOKEN, "") + } + sparkConfWithCredentialLocations + } + + override def createCredentialsSecret(): Option[Secret] = { + val allSecretData = + resolveSecretData( + maybeUserSpecifiedMountedClientKeyFile, + submitterLocalDriverPodKubernetesCredentials.clientKeyDataBase64, + DRIVER_CREDENTIALS_CLIENT_KEY_SECRET_NAME) ++ + resolveSecretData( + maybeUserSpecifiedMountedClientCertFile, + submitterLocalDriverPodKubernetesCredentials.clientCertDataBase64, + DRIVER_CREDENTIALS_CLIENT_CERT_SECRET_NAME) ++ + resolveSecretData( + maybeUserSpecifiedMountedCaCertFile, + submitterLocalDriverPodKubernetesCredentials.caCertDataBase64, + DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME) ++ + resolveSecretData( + maybeUserSpecifiedMountedOAuthTokenFile, + submitterLocalDriverPodKubernetesCredentials.oauthTokenBase64, + DRIVER_CREDENTIALS_OAUTH_TOKEN_SECRET_NAME) + if (allSecretData.isEmpty) { + None + } else { + Some(new SecretBuilder() + .withNewMetadata().withName(s"$kubernetesAppId-kubernetes-credentials").endMetadata() + .withData(allSecretData.asJava) + .build()) + } + } + + override def mountDriverKubernetesCredentials( + originalPodSpec: PodBuilder, + driverContainerName: String, + credentialsSecret: Option[Secret]): PodBuilder = { + credentialsSecret.map { secret => + originalPodSpec.editSpec() + .addNewVolume() + .withName(DRIVER_CREDENTIALS_SECRET_VOLUME_NAME) + .withNewSecret().withSecretName(secret.getMetadata.getName).endSecret() + .endVolume() + .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainerName)) + .addNewVolumeMount() + .withName(DRIVER_CREDENTIALS_SECRET_VOLUME_NAME) + .withMountPath(DRIVER_CREDENTIALS_SECRETS_BASE_DIR) + .endVolumeMount() + .endContainer() + .endSpec() + }.getOrElse(originalPodSpec) + } + + private def resolveSecretLocation( + mountedUserSpecified: Option[String], + valueMountedFromSubmitter: Option[String], + mountedCanonicalLocation: String): Option[String] = { + mountedUserSpecified.orElse(valueMountedFromSubmitter.map( _ => { + mountedCanonicalLocation + })) + } + + private def resolveSecretData( + mountedUserSpecified: Option[String], + valueMountedFromSubmitter: Option[String], + secretName: String): Map[String, String] = { + mountedUserSpecified.map { _ => Map.empty[String, String]} + .getOrElse { + valueMountedFromSubmitter.map { valueBase64 => + Map(secretName -> valueBase64) + }.getOrElse(Map.empty[String, String]) + } + } + + private implicit def augmentSparkConf(sparkConf: SparkConf): OptionSettableSparkConf = { + new OptionSettableSparkConf(sparkConf) + } +} + +private class OptionSettableSparkConf(sparkConf: SparkConf) { + def setOption[T](configEntry: OptionalConfigEntry[T], option: Option[T]): SparkConf = { + option.map( opt => { + sparkConf.set(configEntry, opt) + }).getOrElse(sparkConf) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterProvider.scala new file mode 100644 index 0000000000000..e981c54d23a9d --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterProvider.scala @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.submit.DriverPodKubernetesCredentialsProvider + +private[spark] trait DriverPodKubernetesCredentialsMounterProvider { + + def getDriverPodKubernetesCredentialsMounter() + : DriverPodKubernetesCredentialsMounter +} + +private[spark] class DriverPodKubernetesCredentialsMounterProviderImpl( + sparkConf: SparkConf, + kubernetesAppId: String) + extends DriverPodKubernetesCredentialsMounterProvider { + + override def getDriverPodKubernetesCredentialsMounter() + : DriverPodKubernetesCredentialsMounter = { + val submitterLocalDriverPodKubernetesCredentials = + new DriverPodKubernetesCredentialsProvider(sparkConf).get() + new DriverPodKubernetesCredentialsMounterImpl( + kubernetesAppId, + submitterLocalDriverPodKubernetesCredentials, + sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE), + sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE), + sparkConf.get(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN), + sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE)) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala index f22759d463cb7..5f98facfb691f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala @@ -25,9 +25,8 @@ import okhttp3.RequestBody import retrofit2.Call import org.apache.spark.{SparkException, SSLOptions} -import org.apache.spark.deploy.kubernetes.CompressionUtils +import org.apache.spark.deploy.kubernetes.{CompressionUtils, KubernetesCredentials} import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils -import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials import org.apache.spark.deploy.rest.kubernetes.v2.{ResourceStagingServiceRetrofit, RetrofitClientFactory} import org.apache.spark.util.Utils diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala index cd1f9dcdf5879..bdd4a85da8f85 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala @@ -19,15 +19,10 @@ package org.apache.spark.deploy.rest.kubernetes.v1 import com.fasterxml.jackson.annotation.{JsonIgnore, JsonSubTypes, JsonTypeInfo} import org.apache.spark.SPARK_VERSION +import org.apache.spark.deploy.kubernetes.KubernetesCredentials import org.apache.spark.deploy.rest.{SubmitRestProtocolRequest, SubmitRestProtocolResponse} import org.apache.spark.util.Utils -case class KubernetesCredentials( - oauthToken: Option[String], - caCertDataBase64: Option[String], - clientKeyDataBase64: Option[String], - clientCertDataBase64: Option[String]) - case class KubernetesCreateSubmissionRequest( appResource: AppResource, mainClass: String, diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala index 7847ba2546594..52ca3ef956a79 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala @@ -33,7 +33,7 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.SparkHadoopUtil -import org.apache.spark.deploy.kubernetes.CompressionUtils +import org.apache.spark.deploy.kubernetes.{CompressionUtils, KubernetesCredentials} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils import org.apache.spark.deploy.rest._ @@ -306,7 +306,10 @@ private[spark] class KubernetesSparkRestServer( + resolvedDirectory.getAbsolutePath) } val oauthTokenFile = writeRawStringCredentialAndGetConf("oauth-token.txt", resolvedDirectory, - KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN, kubernetesCredentials.oauthToken) + KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN, + kubernetesCredentials.oauthTokenBase64.map { base64 => + new String(BaseEncoding.base64().decode(base64), Charsets.UTF_8) + }) val caCertFile = writeBase64CredentialAndGetConf("ca.crt", resolvedDirectory, KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE, kubernetesCredentials.caCertDataBase64) val clientKeyFile = writeBase64CredentialAndGetConf("key.key", resolvedDirectory, diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala index b7c6c4fb913da..5dbe55b72bd8b 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala @@ -22,8 +22,8 @@ import javax.ws.rs.core.{MediaType, StreamingOutput} import org.glassfish.jersey.media.multipart.FormDataParam +import org.apache.spark.deploy.kubernetes.KubernetesCredentials import org.apache.spark.deploy.kubernetes.submit.v2.SubmittedResourceIdAndSecret -import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials /** * Service that receives application data that can be retrieved later on. This is primarily used diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala index 3dfa83c85e6dd..34c3192ae6780 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala @@ -26,8 +26,8 @@ import com.google.common.io.{BaseEncoding, ByteStreams, Files} import scala.collection.concurrent.TrieMap import org.apache.spark.SparkException +import org.apache.spark.deploy.kubernetes.KubernetesCredentials import org.apache.spark.deploy.kubernetes.submit.v2.SubmittedResourceIdAndSecret -import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials import org.apache.spark.internal.Logging import org.apache.spark.util.Utils diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala index b8c2b0c91bbeb..50f2c218c22c4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala @@ -21,10 +21,13 @@ import java.io.File import com.google.common.base.Charsets import com.google.common.io.Files import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient} +import io.fabric8.kubernetes.client.utils.HttpClientUtils +import okhttp3.Dispatcher import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.util.ThreadUtils private[spark] class DriverPodKubernetesClientProvider(sparkConf: SparkConf, namespace: String) { private val SERVICE_ACCOUNT_TOKEN = new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH) @@ -78,6 +81,17 @@ private[spark] class DriverPodKubernetesClientProvider(sparkConf: SparkConf, nam } serviceAccountConfigBuilder } - new DefaultKubernetesClient(configBuilder.build) + // Disable the ping thread that is not daemon, in order to allow + // the driver main thread to shut down upon errors. Otherwise, the driver + // will hang indefinitely. + val config = configBuilder + .withWebsocketPingInterval(0) + .build() + val httpClient = HttpClientUtils.createHttpClient(config).newBuilder() + // Use a Dispatcher with a custom executor service that creates daemon threads. The default + // executor service used by Dispatcher creates non-daemon threads. + .dispatcher(new Dispatcher(ThreadUtils.newDaemonCachedThreadPool("spark-on-k8s"))) + .build() + new DefaultKubernetesClient(httpClient, config) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala deleted file mode 100644 index 31c6eda77d058..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.scheduler.cluster.kubernetes - -import java.io.File - -import com.google.common.base.Charsets -import com.google.common.io.Files -import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient} -import io.fabric8.kubernetes.client.utils.HttpClientUtils -import okhttp3.Dispatcher - -import org.apache.spark.SparkConf -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.util.ThreadUtils - -private[spark] class KubernetesClientBuilder(sparkConf: SparkConf, namespace: String) { - private val SERVICE_ACCOUNT_TOKEN = new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH) - private val SERVICE_ACCOUNT_CA_CERT = new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH) - private val oauthTokenFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN) - private val caCertFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE) - private val clientKeyFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE) - private val clientCertFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE) - - /** - * Creates a {@link KubernetesClient}, expecting to be from within the context of a pod. When - * doing so, service account token files can be picked up from canonical locations. - */ - def buildFromWithinPod(): DefaultKubernetesClient = { - val baseClientConfigBuilder = new ConfigBuilder() - .withApiVersion("v1") - .withMasterUrl(KUBERNETES_MASTER_INTERNAL_URL) - .withNamespace(namespace) - - val configBuilder = oauthTokenFile - .orElse(caCertFile) - .orElse(clientKeyFile) - .orElse(clientCertFile) - .map { _ => - var mountedAuthConfigBuilder = baseClientConfigBuilder - oauthTokenFile.foreach { tokenFilePath => - val tokenFile = new File(tokenFilePath) - mountedAuthConfigBuilder = mountedAuthConfigBuilder - .withOauthToken(Files.toString(tokenFile, Charsets.UTF_8)) - } - caCertFile.foreach { caFile => - mountedAuthConfigBuilder = mountedAuthConfigBuilder.withCaCertFile(caFile) - } - clientKeyFile.foreach { keyFile => - mountedAuthConfigBuilder = mountedAuthConfigBuilder.withClientKeyFile(keyFile) - } - clientCertFile.foreach { certFile => - mountedAuthConfigBuilder = mountedAuthConfigBuilder.withClientCertFile(certFile) - } - mountedAuthConfigBuilder - }.getOrElse { - var serviceAccountConfigBuilder = baseClientConfigBuilder - if (SERVICE_ACCOUNT_CA_CERT.isFile) { - serviceAccountConfigBuilder = serviceAccountConfigBuilder.withCaCertFile( - SERVICE_ACCOUNT_CA_CERT.getAbsolutePath) - } - - if (SERVICE_ACCOUNT_TOKEN.isFile) { - serviceAccountConfigBuilder = serviceAccountConfigBuilder.withOauthToken( - Files.toString(SERVICE_ACCOUNT_TOKEN, Charsets.UTF_8)) - } - serviceAccountConfigBuilder - } - // Disable the ping thread that is not daemon, in order to allow - // the driver main thread to shut down upon errors. Otherwise, the driver - // will hang indefinitely. - val config = configBuilder - .withWebsocketPingInterval(0) - .build() - val httpClient = HttpClientUtils.createHttpClient(config).newBuilder() - // Use a Dispatcher with a custom executor service that creates daemon threads. The default - // executor service used by Dispatcher creates non-daemon threads. - .dispatcher(new Dispatcher(ThreadUtils.newDaemonCachedThreadPool("spark-on-k8s"))) - .build() - new DefaultKubernetesClient(httpClient, config) - } -} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala index 4dc1e2e44980a..f0282dbb6d31a 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala @@ -22,7 +22,7 @@ import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, DoneablePod import io.fabric8.kubernetes.client.KubernetesClient import io.fabric8.kubernetes.client.dsl.{MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource} import org.hamcrest.{BaseMatcher, Description} -import org.mockito.{ArgumentCaptor, Mock, MockitoAnnotations} +import org.mockito.{AdditionalAnswers, ArgumentCaptor, Mock, MockitoAnnotations} import org.mockito.Matchers.{any, anyVararg, argThat, eq => mockitoEq} import org.mockito.Mockito.{times, verify, when} import org.mockito.invocation.InvocationOnMock @@ -37,7 +37,6 @@ import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { - private val JARS_RESOURCE = SubmittedResourceIdAndSecret("jarsId", "jarsSecret") private val FILES_RESOURCE = SubmittedResourceIdAndSecret("filesId", "filesSecret") private val SUBMITTED_RESOURCES = SubmittedResources(JARS_RESOURCE, FILES_RESOURCE) @@ -53,9 +52,8 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { SPARK_APP_NAME_LABEL -> APP_NAME) private val CUSTOM_ANNOTATION_KEY = "customAnnotation" private val CUSTOM_ANNOTATION_VALUE = "customAnnotationValue" - private val SECRET_NAME = "secret" - private val SECRET_KEY = "secret-key" - private val SECRET_DATA = "secret-data" + private val INIT_CONTAINER_SECRET_NAME = "init-container-secret" + private val INIT_CONTAINER_SECRET_DATA = Map("secret-key" -> "secret-data") private val MAIN_CLASS = "org.apache.spark.examples.SparkPi" private val APP_ARGS = Array("3", "20") private val SPARK_JARS = Seq( @@ -70,22 +68,21 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { "hdfs://localhost:9000/app/files/file1.txt", "file:///var/data/spark-files/file2.txt") private val INIT_CONTAINER_SECRET = new SecretBuilder() .withNewMetadata() - .withName(SECRET_NAME) + .withName(INIT_CONTAINER_SECRET_NAME) .endMetadata() - .addToData(SECRET_KEY, SECRET_DATA) + .withData(INIT_CONTAINER_SECRET_DATA.asJava) .build() - private val CONFIG_MAP_NAME = "config-map" - private val CONFIG_MAP_KEY = "config-map-key" - private val CONFIG_MAP_DATA = "config-map-data" private val CUSTOM_JAVA_OPTION_KEY = "myappoption" private val CUSTOM_JAVA_OPTION_VALUE = "myappoptionvalue" private val DRIVER_JAVA_OPTIONS = s"-D$CUSTOM_JAVA_OPTION_KEY=$CUSTOM_JAVA_OPTION_VALUE" private val DRIVER_EXTRA_CLASSPATH = "/var/data/spark-app-custom/custom-jar.jar" + private val CONFIG_MAP_NAME = "config-map" + private val CONFIG_MAP_DATA = Map("config-map-key" -> "config-map-data") private val INIT_CONTAINER_CONFIG_MAP = new ConfigMapBuilder() .withNewMetadata() .withName(CONFIG_MAP_NAME) .endMetadata() - .addToData(CONFIG_MAP_KEY, CONFIG_MAP_DATA) + .withData(CONFIG_MAP_DATA.asJava) .build() private val CUSTOM_DRIVER_IMAGE = "spark-custom-driver:latest" private val DRIVER_MEMORY_MB = 512 @@ -104,6 +101,17 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val DRIVER_POD_UID = "driver-pod-uid" private val DRIVER_POD_KIND = "pod" private val DRIVER_POD_API_VERSION = "v1" + private val CREDENTIALS_SECRET_NAME = "credentials-secret" + private val CREDENTIALS_SECRET_DATA = Map("credentials-secret-key" -> "credentials-secret-value") + private val CREDENTIALS_SECRET = new SecretBuilder() + .withNewMetadata() + .withName(CREDENTIALS_SECRET_NAME) + .endMetadata() + .withData(CREDENTIALS_SECRET_DATA.asJava) + .build() + private val CREDENTIALS_SET_CONF = "spark.kubernetes.driverCredentials.provided" + private val CREDENTIALS_SET_ANNOTATION = "credentials-set" + @Mock private var initContainerConfigMapBuilder: SparkInitContainerConfigMapBuilder = _ @Mock @@ -128,6 +136,10 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { HasMetadata, java.lang.Boolean] @Mock private var resourceListOps: ResourceListOps = _ + @Mock + private var credentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider = _ + @Mock + private var credentialsMounter: DriverPodKubernetesCredentialsMounter = _ before { MockitoAnnotations.initMocks(this) @@ -174,9 +186,12 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { when(executorInitContainerConfiguration.configureSparkConfForExecutorInitContainer(SPARK_CONF)) .thenReturn(SPARK_CONF_WITH_EXECUTOR_INIT_CONF) when(kubernetesClient.resourceList(anyVararg[HasMetadata]())).thenReturn(resourceListOps) + when(credentialsMounterProvider.getDriverPodKubernetesCredentialsMounter()) + .thenReturn(credentialsMounter) } test("Run with dependency uploader") { + expectationsForNoMountedCredentials() when(initContainerComponentsProvider .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) .thenReturn(Some(submittedDependencyUploader)) @@ -194,8 +209,8 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { verifyCreatedResourcesHaveOwnerReferences(createdResources) assert(createdResources.exists { case secret: Secret => - val expectedSecretData = Map(SECRET_KEY -> SECRET_DATA) - secret.getMetadata.getName == SECRET_NAME && secret.getData.asScala == expectedSecretData + secret.getMetadata.getName == INIT_CONTAINER_SECRET_NAME && + secret.getData.asScala == INIT_CONTAINER_SECRET_DATA case _ => false }) verifyConfigMapWasCreated(createdResources) @@ -208,15 +223,8 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { } test("Run without dependency uploader") { - when(initContainerComponentsProvider - .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) - .thenReturn(None) - when(initContainerComponentsProvider - .provideSubmittedDependenciesSecretBuilder(None)) - .thenReturn(None) - when(initContainerComponentsProvider - .provideInitContainerConfigMapBuilder(None)) - .thenReturn(initContainerConfigMapBuilder) + expectationsForNoMountedCredentials() + expectationsForNoDependencyUploader() runAndVerifyDriverPodHasCorrectProperties() val resourceListArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata]) verify(kubernetesClient).resourceList(resourceListArgumentCaptor.capture()) @@ -232,6 +240,65 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { .provideSubmittedDependenciesSecretBuilder(None) } + test("Run with mounted credentials") { + expectationsForNoDependencyUploader() + when(credentialsMounter.createCredentialsSecret()).thenReturn(Some(CREDENTIALS_SECRET)) + when(credentialsMounter.mountDriverKubernetesCredentials( + any(), mockitoEq(DRIVER_CONTAINER_NAME), mockitoEq(Some(CREDENTIALS_SECRET)))) + .thenAnswer(new Answer[PodBuilder] { + override def answer(invocation: InvocationOnMock): PodBuilder = { + invocation.getArgumentAt(0, classOf[PodBuilder]).editMetadata() + .addToAnnotations(CREDENTIALS_SET_ANNOTATION, TRUE) + .endMetadata() + } + }) + when(credentialsMounter.setDriverPodKubernetesCredentialLocations(any())) + .thenAnswer(new Answer[SparkConf] { + override def answer(invocation: InvocationOnMock): SparkConf = { + invocation.getArgumentAt(0, classOf[SparkConf]).clone().set(CREDENTIALS_SET_CONF, TRUE) + } + }) + runAndVerifyPodMatchesPredicate { p => + Option(p) + .filter(pod => containerHasCorrectJvmOptions(pod, _(CREDENTIALS_SET_CONF) == TRUE)) + .exists { pod => + pod.getMetadata.getAnnotations.asScala(CREDENTIALS_SET_ANNOTATION) == TRUE + } + } + val resourceListArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata]) + verify(kubernetesClient).resourceList(resourceListArgumentCaptor.capture()) + val createdResources = resourceListArgumentCaptor.getAllValues.asScala + assert(createdResources.size === 2) + verifyCreatedResourcesHaveOwnerReferences(createdResources) + assert(createdResources.exists { + case secret: Secret => + secret.getMetadata.getName == CREDENTIALS_SECRET_NAME && + secret.getData.asScala == CREDENTIALS_SECRET_DATA + case _ => false + }) + } + + private def expectationsForNoDependencyUploader(): Unit = { + when(initContainerComponentsProvider + .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) + .thenReturn(None) + when(initContainerComponentsProvider + .provideSubmittedDependenciesSecretBuilder(None)) + .thenReturn(None) + when(initContainerComponentsProvider + .provideInitContainerConfigMapBuilder(None)) + .thenReturn(initContainerConfigMapBuilder) + } + + private def expectationsForNoMountedCredentials(): Unit = { + when(credentialsMounter.setDriverPodKubernetesCredentialLocations(any())) + .thenAnswer(AdditionalAnswers.returnsFirstArg()) + when(credentialsMounter.createCredentialsSecret()).thenReturn(None) + when(credentialsMounter.mountDriverKubernetesCredentials( + any(), mockitoEq(DRIVER_CONTAINER_NAME), mockitoEq(None))) + .thenAnswer(AdditionalAnswers.returnsFirstArg()) + } + private def verifyCreatedResourcesHaveOwnerReferences( createdResources: mutable.Buffer[HasMetadata]): Unit = { assert(createdResources.forall { resource => @@ -248,14 +315,36 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private def verifyConfigMapWasCreated(createdResources: mutable.Buffer[HasMetadata]): Unit = { assert(createdResources.exists { case configMap: ConfigMap => - val expectedConfigMapData = Map(CONFIG_MAP_KEY -> CONFIG_MAP_DATA) configMap.getMetadata.getName == CONFIG_MAP_NAME && - configMap.getData.asScala == expectedConfigMapData + configMap.getData.asScala == CONFIG_MAP_DATA case _ => false }) } private def runAndVerifyDriverPodHasCorrectProperties(): Unit = { + val expectedOptions = SPARK_CONF.getAll + .filterNot(_._1 == org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS.key) + .toMap ++ + Map( + "spark.app.id" -> APP_ID, + KUBERNETES_DRIVER_POD_NAME.key -> APP_ID, + EXECUTOR_INIT_CONF_KEY -> TRUE, + CUSTOM_JAVA_OPTION_KEY -> CUSTOM_JAVA_OPTION_VALUE, + "spark.jars" -> RESOLVED_SPARK_JARS.mkString(","), + "spark.files" -> RESOLVED_SPARK_FILES.mkString(",")) + runAndVerifyPodMatchesPredicate { p => + Option(p) + .filter(_.getMetadata.getName == APP_ID) + .filter(podHasCorrectAnnotations) + .filter(_.getMetadata.getLabels.asScala == ALL_EXPECTED_LABELS) + .filter(containerHasCorrectBasicContainerConfiguration) + .filter(containerHasCorrectBasicEnvs) + .filter(containerHasCorrectMountedClasspath) + .exists(pod => containerHasCorrectJvmOptions(pod, _ == expectedOptions)) + } + } + + private def runAndVerifyPodMatchesPredicate(pred: (Pod => Boolean)): Unit = { new Client( APP_NAME, APP_ID, @@ -265,49 +354,31 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { SPARK_JARS, SPARK_FILES, kubernetesClientProvider, - initContainerComponentsProvider).run() + initContainerComponentsProvider, + credentialsMounterProvider).run() val podMatcher = new BaseMatcher[Pod] { override def matches(o: scala.Any): Boolean = { o match { - case p: Pod => - Option(p) - .filter(_.getMetadata.getName == APP_ID) - .filter(podHasCorrectAnnotations) - .filter(_.getMetadata.getLabels.asScala == ALL_EXPECTED_LABELS) - .filter(containerHasCorrectBasicContainerConfiguration) - .filter(containerHasCorrectBasicEnvs) - .filter(containerHasCorrectMountedClasspath) - .exists(containerHasCorrectJvmOptions) - case _ => - false + case p: Pod => pred(p) + case _ => false } } - override def describeTo(description: Description): Unit = {} } verify(podOps).create(argThat(podMatcher)) } - private def containerHasCorrectJvmOptions(pod: Pod): Boolean = { + private def containerHasCorrectJvmOptions( + pod: Pod, optionsCorrectnessPredicate: (Map[String, String] => Boolean)): Boolean = { val driverContainer = pod.getSpec.getContainers.asScala.head val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)) envs.toMap.get(ENV_DRIVER_JAVA_OPTS).exists { javaOptions => val splitOptions = javaOptions.split(" ") - val expectedOptions = SPARK_CONF.getAll - .filterNot(_._1 == org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS.key) - .toMap ++ - Map( - "spark.app.id" -> APP_ID, - KUBERNETES_DRIVER_POD_NAME.key -> APP_ID, - EXECUTOR_INIT_CONF_KEY -> TRUE, - CUSTOM_JAVA_OPTION_KEY -> CUSTOM_JAVA_OPTION_VALUE, - "spark.jars" -> RESOLVED_SPARK_JARS.mkString(","), - "spark.files" -> RESOLVED_SPARK_FILES.mkString(",")) splitOptions.forall(_.startsWith("-D")) && - splitOptions.map { option => + optionsCorrectnessPredicate(splitOptions.map { option => val withoutPrefix = option.substring(2) (withoutPrefix.split("=", 2)(0), withoutPrefix.split("=", 2)(1)) - }.toMap == expectedOptions + }.toMap) } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterSuite.scala new file mode 100644 index 0000000000000..d4413076fb092 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterSuite.scala @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import io.fabric8.kubernetes.api.model.{PodBuilder, SecretBuilder} +import org.scalatest.prop.TableDrivenPropertyChecks +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.KubernetesCredentials + +class DriverPodKubernetesCredentialsMounterSuite + extends SparkFunSuite with TableDrivenPropertyChecks { + + private val CLIENT_KEY_DATA = "client-key-data" + private val CLIENT_CERT_DATA = "client-cert-data" + private val OAUTH_TOKEN_DATA = "oauth-token" + private val CA_CERT_DATA = "ca-cert-data" + private val SUBMITTER_LOCAL_DRIVER_KUBERNETES_CREDENTIALS = KubernetesCredentials( + caCertDataBase64 = Some(CA_CERT_DATA), + clientKeyDataBase64 = Some(CLIENT_KEY_DATA), + clientCertDataBase64 = Some(CLIENT_CERT_DATA), + oauthTokenBase64 = Some(OAUTH_TOKEN_DATA)) + private val APP_ID = "app-id" + private val USER_SPECIFIED_CLIENT_KEY_FILE = Some("/var/data/client-key.pem") + private val USER_SPECIFIED_CLIENT_CERT_FILE = Some("/var/data/client-cert.pem") + private val USER_SPECIFIED_OAUTH_TOKEN_FILE = Some("/var/data/token.txt") + private val USER_SPECIFIED_CA_CERT_FILE = Some("/var/data/ca.pem") + + // Different configurations of credentials mounters + private val credentialsMounterWithPreMountedFiles = + new DriverPodKubernetesCredentialsMounterImpl( + kubernetesAppId = APP_ID, + submitterLocalDriverPodKubernetesCredentials = SUBMITTER_LOCAL_DRIVER_KUBERNETES_CREDENTIALS, + maybeUserSpecifiedMountedClientKeyFile = USER_SPECIFIED_CLIENT_KEY_FILE, + maybeUserSpecifiedMountedClientCertFile = USER_SPECIFIED_CLIENT_CERT_FILE, + maybeUserSpecifiedMountedOAuthTokenFile = USER_SPECIFIED_OAUTH_TOKEN_FILE, + maybeUserSpecifiedMountedCaCertFile = USER_SPECIFIED_CA_CERT_FILE) + private val credentialsMounterWithoutPreMountedFiles = + new DriverPodKubernetesCredentialsMounterImpl( + kubernetesAppId = APP_ID, + submitterLocalDriverPodKubernetesCredentials = SUBMITTER_LOCAL_DRIVER_KUBERNETES_CREDENTIALS, + maybeUserSpecifiedMountedClientKeyFile = None, + maybeUserSpecifiedMountedClientCertFile = None, + maybeUserSpecifiedMountedOAuthTokenFile = None, + maybeUserSpecifiedMountedCaCertFile = None) + private val credentialsMounterWithoutAnyDriverCredentials = + new DriverPodKubernetesCredentialsMounterImpl( + APP_ID, KubernetesCredentials(None, None, None, None), None, None, None, None) + + // Test matrices + private val TEST_MATRIX_EXPECTED_SPARK_CONFS = Table( + ("Credentials Mounter Implementation", + "Expected client key file", + "Expected client cert file", + "Expected CA Cert file", + "Expected OAuth Token File"), + (credentialsMounterWithoutAnyDriverCredentials, + None, + None, + None, + None), + (credentialsMounterWithoutPreMountedFiles, + Some(DRIVER_CREDENTIALS_CLIENT_KEY_PATH), + Some(DRIVER_CREDENTIALS_CLIENT_CERT_PATH), + Some(DRIVER_CREDENTIALS_CA_CERT_PATH), + Some(DRIVER_CREDENTIALS_OAUTH_TOKEN_PATH)), + (credentialsMounterWithPreMountedFiles, + USER_SPECIFIED_CLIENT_KEY_FILE, + USER_SPECIFIED_CLIENT_CERT_FILE, + USER_SPECIFIED_CA_CERT_FILE, + USER_SPECIFIED_OAUTH_TOKEN_FILE)) + + private val TEST_MATRIX_EXPECTED_CREDENTIALS_SECRET = Table( + ("Credentials Mounter Implementation", "Expected Credentials Secret Data"), + (credentialsMounterWithoutAnyDriverCredentials, None), + (credentialsMounterWithoutPreMountedFiles, + Some(KubernetesSecretNameAndData( + data = Map[String, String]( + DRIVER_CREDENTIALS_CLIENT_KEY_SECRET_NAME -> CLIENT_KEY_DATA, + DRIVER_CREDENTIALS_CLIENT_CERT_SECRET_NAME -> CLIENT_CERT_DATA, + DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME -> CA_CERT_DATA, + DRIVER_CREDENTIALS_OAUTH_TOKEN_SECRET_NAME -> OAUTH_TOKEN_DATA + ), + name = s"$APP_ID-kubernetes-credentials"))), + (credentialsMounterWithPreMountedFiles, None)) + + test("Credentials mounter should set the driver's Kubernetes credentials locations") { + forAll(TEST_MATRIX_EXPECTED_SPARK_CONFS) { + case (credentialsMounter, + expectedClientKeyFile, + expectedClientCertFile, + expectedCaCertFile, + expectedOAuthTokenFile) => + val baseSparkConf = new SparkConf() + val resolvedSparkConf = + credentialsMounter.setDriverPodKubernetesCredentialLocations(baseSparkConf) + assert(resolvedSparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE) === + expectedClientKeyFile) + assert(resolvedSparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE) === + expectedClientCertFile) + assert(resolvedSparkConf.get(KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE) === + expectedCaCertFile) + assert(resolvedSparkConf.get(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN) === + expectedOAuthTokenFile) + } + } + + test("Credentials mounter should create the correct credentials secret.") { + forAll(TEST_MATRIX_EXPECTED_CREDENTIALS_SECRET) { + case (credentialsMounter, expectedSecretNameAndData) => + val builtSecret = credentialsMounter.createCredentialsSecret() + val secretNameAndData = builtSecret.map { secret => + KubernetesSecretNameAndData(secret.getMetadata.getName, secret.getData.asScala.toMap) + } + assert(secretNameAndData === expectedSecretNameAndData) + } + } + + test("When credentials secret is provided, driver pod should mount the secret volume.") { + val credentialsSecret = new SecretBuilder() + .withNewMetadata().withName("secret").endMetadata() + .addToData("secretKey", "secretValue") + .build() + val originalPodSpec = new PodBuilder() + .withNewMetadata().withName("pod").endMetadata() + .withNewSpec() + .addNewContainer() + .withName("container") + .endContainer() + .endSpec() + val podSpecWithMountedDriverKubernetesCredentials = + credentialsMounterWithoutPreMountedFiles.mountDriverKubernetesCredentials( + originalPodSpec, "container", Some(credentialsSecret)).build() + val volumes = podSpecWithMountedDriverKubernetesCredentials.getSpec.getVolumes.asScala + assert(volumes.exists(_.getName == DRIVER_CREDENTIALS_SECRET_VOLUME_NAME)) + volumes.find(_.getName == DRIVER_CREDENTIALS_SECRET_VOLUME_NAME).foreach { secretVolume => + assert(secretVolume.getSecret != null && secretVolume.getSecret.getSecretName == "secret") + } + } + + test("When credentials secret is absent, driver pod should not be changed.") { + val originalPodSpec = new PodBuilder() + val nonAdjustedPodSpec = + credentialsMounterWithoutAnyDriverCredentials.mountDriverKubernetesCredentials( + originalPodSpec, "driver", None) + assert(nonAdjustedPodSpec === originalPodSpec) + } +} + +private case class KubernetesSecretNameAndData(name: String, data: Map[String, String]) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala index 08be8af30b3bc..4ef12e8686bb0 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala @@ -27,8 +27,7 @@ import org.scalatest.BeforeAndAfter import retrofit2.Call import org.apache.spark.{SparkFunSuite, SSLOptions} -import org.apache.spark.deploy.kubernetes.SSLUtils -import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials +import org.apache.spark.deploy.kubernetes.{KubernetesCredentials, SSLUtils} import org.apache.spark.util.Utils /** diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala index 60850bb877540..9677d12681a16 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala @@ -22,7 +22,7 @@ import java.nio.file.Paths import com.google.common.io.Files import org.apache.spark.SparkFunSuite -import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials +import org.apache.spark.deploy.kubernetes.KubernetesCredentials import org.apache.spark.util.Utils /** diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala index 3be4507ac105a..ba9d088bfcfcc 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala @@ -135,6 +135,19 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) } + test("Use client key and client cert file when requesting executors") { + sparkConf.setJars(Seq( + KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, + KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) + sparkConf.set(KUBERNETES_DRIVER_CLIENT_KEY_FILE, + kubernetesTestComponents.clientConfig.getClientKeyFile) + sparkConf.set(KUBERNETES_DRIVER_CLIENT_CERT_FILE, + kubernetesTestComponents.clientConfig.getClientCertFile) + sparkConf.set(KUBERNETES_DRIVER_CA_CERT_FILE, + kubernetesTestComponents.clientConfig.getCaCertFile) + runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) + } + private def launchStagingServer(resourceStagingServerSslOptions: SSLOptions): Unit = { assume(testBackend.name == MINIKUBE_TEST_BACKEND) From 88306b2c2b566b0fbad8e924fb9c8e79c37d19d8 Mon Sep 17 00:00:00 2001 From: mccheah Date: Fri, 19 May 2017 12:18:33 -0700 Subject: [PATCH 110/156] Allow client certificate PEM for resource staging server. (#257) --- .../spark/deploy/kubernetes/config.scala | 7 +- .../deploy/kubernetes/submit/v2/Client.scala | 4 +- ...riverInitContainerComponentsProvider.scala | 7 +- .../v2/SubmittedDependencySecretBuilder.scala | 1 - .../v1/KubernetesSparkRestServer.scala | 26 +++---- .../v1/PemsToKeyStoreConverter.scala | 22 ++++-- ...SparkDependencyDownloadInitContainer.scala | 13 ++-- ...ourceStagingServerSslOptionsProvider.scala | 72 +++++++++++++++---- .../spark/deploy/kubernetes/SSLUtils.scala | 9 +-- .../kubernetes/submit/v2/SSLFilePairs.scala | 23 ++++++ ...DependencyDownloadInitContainerSuite.scala | 4 +- ...StagingServerSslOptionsProviderSuite.scala | 40 +++++++++-- .../v2/ResourceStagingServerSuite.scala | 6 +- .../integrationtest/KubernetesV1Suite.scala | 19 ++--- .../integrationtest/KubernetesV2Suite.scala | 43 +++++++---- .../ResourceStagingServerLauncher.scala | 54 +++++++++----- .../minikube/MinikubeTestBackend.scala | 2 - 17 files changed, 256 insertions(+), 96 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SSLFilePairs.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 45e5a46a26258..ab442131ad271 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -364,10 +364,15 @@ package object config extends Logging { private[spark] val RESOURCE_STAGING_SERVER_SSL_NAMESPACE = "kubernetes.resourceStagingServer" private[spark] val RESOURCE_STAGING_SERVER_CERT_PEM = ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.serverCertPem") - .doc("Certificate PEM file to use when having the Kubernetes dependency server" + + .doc("Certificate PEM file to use when having the resource staging server" + " listen on TLS.") .stringConf .createOptional + private[spark] val RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM = + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.clientCertPem") + .doc("Certificate PEM file to use when the client contacts the resource staging server.") + .stringConf + .createOptional private[spark] val RESOURCE_STAGING_SERVER_KEYSTORE_PASSWORD_FILE = ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.keyStorePasswordFile") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala index da08e17dee85b..23e3e09834372 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala @@ -25,6 +25,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.rest.kubernetes.v2.ResourceStagingServerSslOptionsProviderImpl import org.apache.spark.internal.Logging import org.apache.spark.launcher.SparkLauncher import org.apache.spark.util.Utils @@ -267,8 +268,9 @@ private[spark] object Client { val appName = sparkConf.getOption("spark.app.name") .getOrElse("spark") val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") + val sslOptionsProvider = new ResourceStagingServerSslOptionsProviderImpl(sparkConf) val initContainerComponentsProvider = new DriverInitContainerComponentsProviderImpl( - sparkConf, kubernetesAppId, sparkJars, sparkFiles) + sparkConf, kubernetesAppId, sparkJars, sparkFiles, sslOptionsProvider.getSslOptions) val kubernetesClientProvider = new SubmissionKubernetesClientProviderImpl(sparkConf) val kubernetesCredentialsMounterProvider = new DriverPodKubernetesCredentialsMounterProviderImpl(sparkConf, kubernetesAppId) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala index 5b649735f2b3d..7f6ae2ec47675 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.deploy.kubernetes.submit.v2 -import org.apache.spark.{SecurityManager, SparkConf} +import org.apache.spark.{SparkConf, SSLOptions} import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, SparkPodInitContainerBootstrap, SparkPodInitContainerBootstrapImpl} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ @@ -46,12 +46,11 @@ private[spark] class DriverInitContainerComponentsProviderImpl( sparkConf: SparkConf, kubernetesAppId: String, sparkJars: Seq[String], - sparkFiles: Seq[String]) + sparkFiles: Seq[String], + resourceStagingServerSslOptions: SSLOptions) extends DriverInitContainerComponentsProvider { private val maybeResourceStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) - private val resourceStagingServerSslOptions = new SecurityManager(sparkConf) - .getSSLOptions(RESOURCE_STAGING_SERVER_SSL_NAMESPACE) private val jarsDownloadPath = sparkConf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION) private val filesDownloadPath = sparkConf.get(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION) private val maybeSecretName = maybeResourceStagingServerUri.map { _ => diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala index 1853b2ecce6d2..b8fa43d0573f7 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala @@ -22,7 +22,6 @@ import io.fabric8.kubernetes.api.model.{Secret, SecretBuilder} import scala.collection.JavaConverters._ import org.apache.spark.SSLOptions -import org.apache.spark.deploy.kubernetes.constants._ private[spark] trait SubmittedDependencySecretBuilder { /** diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala index 52ca3ef956a79..5cd24a8f9b75e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala @@ -414,18 +414,20 @@ private[spark] object KubernetesSparkRestServer { // If keystore password isn't set but we're using PEM files, generate a password .orElse(parsedArguments.keyPemFile.map(_ => randomPassword())) val resolvedKeyStore = parsedArguments.keyStoreFile.map(new File(_)).orElse( - parsedArguments.keyPemFile.map(keyPemFile => { - parsedArguments.certPemFile.map(certPemFile => { - PemsToKeyStoreConverter.convertPemsToTempKeyStoreFile( - new File(keyPemFile), - new File(certPemFile), - "provided-key", - keyStorePassword, - keyPassword, - parsedArguments.keyStoreType) - }) - }).getOrElse(throw new SparkException("When providing PEM files to set up TLS for the" + - " submission server, both the key and the certificate must be specified."))) + for { + keyPemFile <- parsedArguments.keyPemFile + certPemFile <- parsedArguments.certPemFile + resolvedKeyStorePassword <- keyStorePassword + resolvedKeyPassword <- keyPassword + } yield { + PemsToKeyStoreConverter.convertPemsToTempKeyStoreFile( + new File(keyPemFile), + new File(certPemFile), + "provided-key", + resolvedKeyStorePassword, + resolvedKeyPassword, + parsedArguments.keyStoreType) + }) new SSLOptions( enabled = true, keyStore = resolvedKeyStore, diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala index 2c68b150baf91..178956a136d1c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala @@ -43,8 +43,8 @@ private[spark] object PemsToKeyStoreConverter { keyPemFile: File, certPemFile: File, keyAlias: String, - keyStorePassword: Option[String], - keyPassword: Option[String], + keyStorePassword: String, + keyPassword: String, keyStoreType: Option[String]): File = { require(keyPemFile.isFile, s"Key PEM file provided at ${keyPemFile.getAbsolutePath}" + " does not exist or is not a file.") @@ -58,12 +58,12 @@ private[spark] object PemsToKeyStoreConverter { keyStore.setKeyEntry( keyAlias, privateKey, - keyPassword.map(_.toCharArray).orNull, + keyPassword.toCharArray, certificates) val keyStoreDir = Utils.createTempDir("temp-keystores") val keyStoreFile = new File(keyStoreDir, s"keystore-${UUID.randomUUID()}.$resolvedKeyStoreType") Utils.tryWithResource(new FileOutputStream(keyStoreFile)) { storeStream => - keyStore.store(storeStream, keyStorePassword.map(_.toCharArray).orNull) + keyStore.store(storeStream, keyStorePassword.toCharArray) } keyStoreFile } @@ -81,6 +81,20 @@ private[spark] object PemsToKeyStoreConverter { trustStore } + def convertCertPemToTempTrustStoreFile( + certPemFile: File, + trustStorePassword: String, + trustStoreType: Option[String]): File = { + val trustStore = convertCertPemToTrustStore(certPemFile, trustStoreType) + val tempTrustStoreDir = Utils.createTempDir(namePrefix = "temp-trustStore") + val tempTrustStoreFile = new File(tempTrustStoreDir, + s"trustStore.${trustStoreType.getOrElse(KeyStore.getDefaultType)}") + Utils.tryWithResource(new FileOutputStream(tempTrustStoreFile)) { + trustStore.store(_, trustStorePassword.toCharArray) + } + tempTrustStoreFile + } + private def withPemParsedFromFile[T](pemFile: File)(f: (PEMParser => T)): T = { Utils.tryWithResource(new FileInputStream(pemFile)) { pemStream => Utils.tryWithResource(new InputStreamReader(pemStream, Charsets.UTF_8)) { pemReader => diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala index 67caa176930ea..7f21087159145 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala @@ -28,7 +28,7 @@ import retrofit2.{Call, Callback, Response} import scala.concurrent.{ExecutionContext, Future} import scala.concurrent.duration.Duration -import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf} +import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SSLOptions} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.CompressionUtils @@ -95,7 +95,7 @@ private[spark] class KubernetesSparkDependencyDownloadInitContainer( sparkConf: SparkConf, retrofitClientFactory: RetrofitClientFactory, fileFetcher: FileFetcher, - securityManager: SparkSecurityManager) extends Logging { + resourceStagingServerSslOptions: SSLOptions) extends Logging { private implicit val downloadExecutor = ExecutionContext.fromExecutorService( ThreadUtils.newDaemonCachedThreadPool("download-executor")) @@ -177,9 +177,10 @@ private[spark] class KubernetesSparkDependencyDownloadInitContainer( maybeResourceId.foreach { resourceId => require(resourceSecretLocation.isFile, errMessageOnSecretNotAFile) require(resourceDownloadDir.isDirectory, errMessageOnDownloadDirNotADirectory) - val sslOptions = securityManager.getSSLOptions("kubernetes.resourceStagingServer") val service = retrofitClientFactory.createRetrofitClient( - resourceStagingServerUri, classOf[ResourceStagingServiceRetrofit], sslOptions) + resourceStagingServerUri, + classOf[ResourceStagingServiceRetrofit], + resourceStagingServerSslOptions) val resourceSecret = Files.toString(resourceSecretLocation, Charsets.UTF_8) val downloadResourceCallback = new DownloadTarGzCallback(resourceDownloadDir) logInfo(downloadStartMessage) @@ -219,12 +220,14 @@ object KubernetesSparkDependencyDownloadInitContainer extends Logging { new SparkConf(true) } val securityManager = new SparkSecurityManager(sparkConf) + val resourceStagingServerSslOptions = + new ResourceStagingServerSslOptionsProviderImpl(sparkConf).getSslOptions val fileFetcher = new FileFetcherImpl(sparkConf, securityManager) new KubernetesSparkDependencyDownloadInitContainer( sparkConf, RetrofitClientFactoryImpl, fileFetcher, - securityManager).run() + resourceStagingServerSslOptions).run() logInfo("Finished downloading application dependencies.") } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala index 2744ed0a74616..6b88426d00e72 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala @@ -17,9 +17,11 @@ package org.apache.spark.deploy.rest.kubernetes.v2 import java.io.File +import java.security.SecureRandom import com.google.common.base.Charsets import com.google.common.io.Files +import org.apache.commons.lang3.RandomStringUtils import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.kubernetes.config._ @@ -32,20 +34,29 @@ private[spark] trait ResourceStagingServerSslOptionsProvider { private[spark] class ResourceStagingServerSslOptionsProviderImpl(sparkConf: SparkConf) extends ResourceStagingServerSslOptionsProvider with Logging { + + private val SECURE_RANDOM = new SecureRandom() + def getSslOptions: SSLOptions = { val baseSslOptions = new SparkSecurityManager(sparkConf) - .getSSLOptions("kubernetes.resourceStagingServer") + .getSSLOptions(RESOURCE_STAGING_SERVER_SSL_NAMESPACE) val maybeKeyPem = sparkConf.get(RESOURCE_STAGING_SERVER_KEY_PEM) - val maybeCertPem = sparkConf.get(RESOURCE_STAGING_SERVER_CERT_PEM) + val maybeServerCertPem = sparkConf.get(RESOURCE_STAGING_SERVER_CERT_PEM) val maybeKeyStorePasswordFile = sparkConf.get(RESOURCE_STAGING_SERVER_KEYSTORE_PASSWORD_FILE) val maybeKeyPasswordFile = sparkConf.get(RESOURCE_STAGING_SERVER_KEYSTORE_KEY_PASSWORD_FILE) + val maybeClientCertPem = sparkConf.get(RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM) logSslConfigurations( - baseSslOptions, maybeKeyPem, maybeCertPem, maybeKeyStorePasswordFile, maybeKeyPasswordFile) + baseSslOptions, + maybeKeyPem, + maybeServerCertPem, + maybeKeyStorePasswordFile, + maybeKeyPasswordFile, + maybeClientCertPem) requireNandDefined(baseSslOptions.keyStore, maybeKeyPem, "Shouldn't provide both key PEM and keyStore files for TLS.") - requireNandDefined(baseSslOptions.keyStore, maybeCertPem, + requireNandDefined(baseSslOptions.keyStore, maybeServerCertPem, "Shouldn't provide both certificate PEM and keyStore files for TLS.") requireNandDefined(baseSslOptions.keyStorePassword, maybeKeyStorePasswordFile, "Shouldn't provide both the keyStore password value and the keyStore password file.") @@ -53,42 +64,68 @@ private[spark] class ResourceStagingServerSslOptionsProviderImpl(sparkConf: Spar "Shouldn't provide both the keyStore key password value and the keyStore key password file.") requireBothOrNeitherDefined( maybeKeyPem, - maybeCertPem, + maybeServerCertPem, "When providing a certificate PEM file, the key PEM file must also be provided.", "When providing a key PEM file, the certificate PEM file must also be provided.") + requireNandDefined(baseSslOptions.trustStore, maybeClientCertPem, + "Shouldn't provide both the trustStore and a client certificate PEM file.") val resolvedKeyStorePassword = baseSslOptions.keyStorePassword .orElse(maybeKeyStorePasswordFile.map { keyStorePasswordFile => safeFileToString(keyStorePasswordFile, "KeyStore password file") }) + .orElse(maybeKeyPem.map { _ => randomPassword()}) val resolvedKeyStoreKeyPassword = baseSslOptions.keyPassword .orElse(maybeKeyPasswordFile.map { keyPasswordFile => safeFileToString(keyPasswordFile, "KeyStore key password file") }) - val resolvedKeyStore = baseSslOptions.keyStore - .orElse(maybeKeyPem.map { keyPem => + .orElse(maybeKeyPem.map { _ => randomPassword()}) + val resolvedKeyStore = baseSslOptions.keyStore.orElse { + for { + keyPem <- maybeKeyPem + certPem <- maybeServerCertPem + keyStorePassword <- resolvedKeyStorePassword + keyPassword <- resolvedKeyStoreKeyPassword + } yield { val keyPemFile = new File(keyPem) - val certPemFile = new File(maybeCertPem.get) + val certPemFile = new File(certPem) PemsToKeyStoreConverter.convertPemsToTempKeyStoreFile( keyPemFile, certPemFile, "key", - resolvedKeyStorePassword, - resolvedKeyStoreKeyPassword, + keyStorePassword, + keyPassword, baseSslOptions.keyStoreType) - }) + } + } + val resolvedTrustStorePassword = baseSslOptions.trustStorePassword + .orElse(maybeClientCertPem.map( _ => "defaultTrustStorePassword")) + val resolvedTrustStore = baseSslOptions.trustStore.orElse { + for { + clientCertPem <- maybeClientCertPem + trustStorePassword <- resolvedTrustStorePassword + } yield { + val certPemFile = new File(clientCertPem) + PemsToKeyStoreConverter.convertCertPemToTempTrustStoreFile( + certPemFile, + trustStorePassword, + baseSslOptions.trustStoreType) + } + } baseSslOptions.copy( keyStore = resolvedKeyStore, keyStorePassword = resolvedKeyStorePassword, - keyPassword = resolvedKeyStoreKeyPassword) + keyPassword = resolvedKeyStoreKeyPassword, + trustStore = resolvedTrustStore) } private def logSslConfigurations( baseSslOptions: SSLOptions, maybeKeyPem: Option[String], - maybeCertPem: Option[String], + maybeServerCertPem: Option[String], maybeKeyStorePasswordFile: Option[String], - maybeKeyPasswordFile: Option[String]) = { + maybeKeyPasswordFile: Option[String], + maybeClientCertPem: Option[String]) = { logDebug("The following SSL configurations were provided for the resource staging server:") logDebug(s"KeyStore File: ${baseSslOptions.keyStore.map(_.getAbsolutePath).getOrElse("N/A")}") logDebug("KeyStore Password: " + @@ -99,7 +136,8 @@ private[spark] class ResourceStagingServerSslOptionsProviderImpl(sparkConf: Spar logDebug(s"Key Password File: ${maybeKeyPasswordFile.getOrElse("N/A")}") logDebug(s"KeyStore Type: ${baseSslOptions.keyStoreType.getOrElse("N/A")}") logDebug(s"Key PEM: ${maybeKeyPem.getOrElse("N/A")}") - logDebug(s"Certificate PEM: ${maybeCertPem.getOrElse("N/A")}") + logDebug(s"Server-side certificate PEM: ${maybeServerCertPem.getOrElse("N/A")}") + logDebug(s"Client-side certificate PEM: ${maybeClientCertPem.getOrElse("N/A")}") } private def requireBothOrNeitherDefined( @@ -130,4 +168,8 @@ private[spark] class ResourceStagingServerSslOptionsProviderImpl(sparkConf: Spar } Files.toString(file, Charsets.UTF_8) } + + private def randomPassword(): String = { + RandomStringUtils.random(1024, 0, Integer.MAX_VALUE, false, false, null, SECURE_RANDOM) + } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala index 0cb056dcf5493..886484ffb4692 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala @@ -30,6 +30,7 @@ import org.bouncycastle.cert.jcajce.{JcaX509CertificateConverter, JcaX509v3Certi import org.bouncycastle.openssl.jcajce.JcaPEMWriter import org.bouncycastle.operator.jcajce.JcaContentSignerBuilder +import org.apache.spark.deploy.kubernetes.submit.v2.{KeyAndCertPem, KeyStoreAndTrustStore} import org.apache.spark.util.Utils private[spark] object SSLUtils { @@ -38,7 +39,7 @@ private[spark] object SSLUtils { ipAddress: String, keyStorePassword: String, keyPassword: String, - trustStorePassword: String): (File, File) = { + trustStorePassword: String): KeyStoreAndTrustStore = { val keyPairGenerator = KeyPairGenerator.getInstance("RSA") keyPairGenerator.initialize(512) val keyPair = keyPairGenerator.generateKeyPair() @@ -60,10 +61,10 @@ private[spark] object SSLUtils { Utils.tryWithResource(new FileOutputStream(trustStoreFile)) { trustStore.store(_, trustStorePassword.toCharArray) } - (keyStoreFile, trustStoreFile) + KeyStoreAndTrustStore(keyStoreFile, trustStoreFile) } - def generateKeyCertPemPair(ipAddress: String): (File, File) = { + def generateKeyCertPemPair(ipAddress: String): KeyAndCertPem = { val keyPairGenerator = KeyPairGenerator.getInstance("RSA") keyPairGenerator.initialize(512) val keyPair = keyPairGenerator.generateKeyPair() @@ -90,7 +91,7 @@ private[spark] object SSLUtils { } } } - (keyPemFile, certPemFile) + KeyAndCertPem(keyPemFile, certPemFile) } private def generateCertificate(ipAddress: String, keyPair: KeyPair): X509Certificate = { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SSLFilePairs.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SSLFilePairs.scala new file mode 100644 index 0000000000000..3d3ff7ad7011a --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SSLFilePairs.scala @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.File + +case class KeyAndCertPem(keyPem: File, certPem: File) + +case class KeyStoreAndTrustStore(keyStore: File, trustStore: File) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala index 6ab37185b8d07..c551fbc01d060 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala @@ -99,7 +99,7 @@ class KubernetesSparkDependencyDownloadInitContainerSuite sparkConf, retrofitClientFactory, fileFetcher, - securityManager = new SparkSecurityManager(sparkConf)) + resourceStagingServerSslOptions = STAGING_SERVER_SSL_OPTIONS) when(retrofitClient.downloadResources(JARS_RESOURCE_ID, downloadJarsSecretValue)) .thenReturn(downloadJarsCall) when(retrofitClient.downloadResources(FILES_RESOURCE_ID, downloadFilesSecretValue)) @@ -126,7 +126,7 @@ class KubernetesSparkDependencyDownloadInitContainerSuite sparkConf, retrofitClientFactory, fileFetcher, - securityManager = new SparkSecurityManager(sparkConf)) + resourceStagingServerSslOptions = STAGING_SERVER_SSL_OPTIONS) initContainerUnderTest.run() Mockito.verify(fileFetcher).fetchFile("http://localhost:9000/jar1.jar", downloadJarsDir) Mockito.verify(fileFetcher).fetchFile("hdfs://localhost:9000/jar2.jar", downloadJarsDir) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala index 10aced9000bf8..c33d8beb2c397 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala @@ -66,10 +66,12 @@ class ResourceStagingServerSslOptionsProviderSuite extends SparkFunSuite with Be } test("Setting key and certificate pem files should write an appropriate keyStore.") { - val (keyPemFile, certPemFile) = SSLUtils.generateKeyCertPemPair("127.0.0.1") + val keyAndCertPem = SSLUtils.generateKeyCertPemPair("127.0.0.1") sparkConf.set("spark.ssl.kubernetes.resourceStagingServer.enabled", "true") - .set("spark.ssl.kubernetes.resourceStagingServer.keyPem", keyPemFile.getAbsolutePath) - .set("spark.ssl.kubernetes.resourceStagingServer.serverCertPem", certPemFile.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.keyPem", + keyAndCertPem.keyPem.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.serverCertPem", + keyAndCertPem.certPem.getAbsolutePath) .set("spark.ssl.kubernetes.resourceStagingServer.keyStorePassword", "keyStorePassword") .set("spark.ssl.kubernetes.resourceStagingServer.keyPassword", "keyPassword") val sslOptions = sslOptionsProvider.getSslOptions @@ -81,9 +83,37 @@ class ResourceStagingServerSslOptionsProviderSuite extends SparkFunSuite with Be keyStore.load(_, "keyStorePassword".toCharArray) } val key = keyStore.getKey("key", "keyPassword".toCharArray) - compareJcaPemObjectToFileString(key, keyPemFile) + compareJcaPemObjectToFileString(key, keyAndCertPem.keyPem) val certificate = keyStore.getCertificateChain("key")(0) - compareJcaPemObjectToFileString(certificate, certPemFile) + compareJcaPemObjectToFileString(certificate, keyAndCertPem.certPem) + } + } + + test("Setting pem files without setting passwords should use random passwords.") { + val keyAndCertPem = SSLUtils.generateKeyCertPemPair("127.0.0.1") + sparkConf.set("spark.ssl.kubernetes.resourceStagingServer.enabled", "true") + .set("spark.ssl.kubernetes.resourceStagingServer.keyPem", + keyAndCertPem.keyPem.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.serverCertPem", + keyAndCertPem.certPem.getAbsolutePath) + val sslOptions = sslOptionsProvider.getSslOptions + assert(sslOptions.enabled, "SSL should be enabled.") + assert(sslOptions.keyStore.isDefined, "KeyStore should be defined.") + assert(sslOptions.keyStorePassword.isDefined) + assert(sslOptions.keyPassword.isDefined) + for { + keyStoreFile <- sslOptions.keyStore + keyStorePassword <- sslOptions.keyStorePassword + keyPassword <- sslOptions.keyPassword + } { + val keyStore = KeyStore.getInstance(KeyStore.getDefaultType) + Utils.tryWithResource(new FileInputStream(keyStoreFile)) { + keyStore.load(_, keyStorePassword.toCharArray) + } + val key = keyStore.getKey("key", keyPassword.toCharArray) + compareJcaPemObjectToFileString(key, keyAndCertPem.keyPem) + val certificate = keyStore.getCertificateChain("key")(0) + compareJcaPemObjectToFileString(certificate, keyAndCertPem.certPem) } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala index 4ef12e8686bb0..4ffb0d4dfa887 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala @@ -57,17 +57,17 @@ class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfter { } test("Enable SSL on the server") { - val (keyStore, trustStore) = SSLUtils.generateKeyStoreTrustStorePair( + val keyStoreAndTrustStore = SSLUtils.generateKeyStoreTrustStorePair( ipAddress = "127.0.0.1", keyStorePassword = "keyStore", keyPassword = "key", trustStorePassword = "trustStore") val sslOptions = SSLOptions( enabled = true, - keyStore = Some(keyStore), + keyStore = Some(keyStoreAndTrustStore.keyStore), keyStorePassword = Some("keyStore"), keyPassword = Some("key"), - trustStore = Some(trustStore), + trustStore = Some(keyStoreAndTrustStore.trustStore), trustStorePassword = Some("trustStore")) sslOptionsProvider.setOptions(sslOptions) server.start() diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala index f09339a9c3e08..559cb281c7c62 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala @@ -34,7 +34,7 @@ import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackend import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube -import org.apache.spark.deploy.kubernetes.integrationtest.constants.{GCE_TEST_BACKEND, MINIKUBE_TEST_BACKEND} +import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 import org.apache.spark.deploy.kubernetes.submit.v1.{Client, ExternalSuppliedUrisDriverServiceManager} import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} @@ -190,16 +190,17 @@ private[spark] class KubernetesV1Suite(testBackend: IntegrationTestBackend) test("Enable SSL on the driver submit server") { assume(testBackend.name == MINIKUBE_TEST_BACKEND) - val (keyStoreFile, trustStoreFile) = SSLUtils.generateKeyStoreTrustStorePair( + val keyStoreAndTrustStore = SSLUtils.generateKeyStoreTrustStorePair( Minikube.getMinikubeIp, "changeit", "changeit", "changeit") - sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE, s"file://${keyStoreFile.getAbsolutePath}") + sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE, + s"file://${keyStoreAndTrustStore.keyStore.getAbsolutePath}") sparkConf.set("spark.ssl.kubernetes.driversubmitserver.keyStorePassword", "changeit") sparkConf.set("spark.ssl.kubernetes.driversubmitserver.keyPassword", "changeit") sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE, - s"file://${trustStoreFile.getAbsolutePath}") + s"file://${keyStoreAndTrustStore.trustStore.getAbsolutePath}") sparkConf.set("spark.ssl.kubernetes.driversubmitserver.trustStorePassword", "changeit") sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) new Client( @@ -212,10 +213,12 @@ private[spark] class KubernetesV1Suite(testBackend: IntegrationTestBackend) test("Enable SSL on the driver submit server using PEM files") { assume(testBackend.name == MINIKUBE_TEST_BACKEND) - val (keyPem, certPem) = SSLUtils.generateKeyCertPemPair(Minikube.getMinikubeIp) - sparkConf.set(DRIVER_SUBMIT_SSL_KEY_PEM, s"file://${keyPem.getAbsolutePath}") - sparkConf.set(DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM, s"file://${certPem.getAbsolutePath}") - sparkConf.set(DRIVER_SUBMIT_SSL_SERVER_CERT_PEM, s"file://${certPem.getAbsolutePath}") + val keyAndCertPem = SSLUtils.generateKeyCertPemPair(Minikube.getMinikubeIp) + sparkConf.set(DRIVER_SUBMIT_SSL_KEY_PEM, s"file://${keyAndCertPem.keyPem.getAbsolutePath}") + sparkConf.set( + DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM, s"file://${keyAndCertPem.certPem.getAbsolutePath}") + sparkConf.set( + DRIVER_SUBMIT_SSL_SERVER_CERT_PEM, s"file://${keyAndCertPem.certPem.getAbsolutePath}") sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) new Client( sparkConf = sparkConf, diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala index ba9d088bfcfcc..e9900b90cb588 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala @@ -29,7 +29,7 @@ import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackend import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND -import org.apache.spark.deploy.kubernetes.submit.v2.Client +import org.apache.spark.deploy.kubernetes.submit.v2.{Client, KeyAndCertPem} import org.apache.spark.launcher.SparkLauncher @DoNotDiscover @@ -65,31 +65,34 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) test("Use submission v2.") { assume(testBackend.name == MINIKUBE_TEST_BACKEND) - launchStagingServer(SSLOptions()) + launchStagingServer(SSLOptions(), None) runSparkPiAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) } test("Enable SSL on the submission server") { assume(testBackend.name == MINIKUBE_TEST_BACKEND) - val (keyStore, trustStore) = SSLUtils.generateKeyStoreTrustStorePair( + val keyStoreAndTrustStore = SSLUtils.generateKeyStoreTrustStorePair( ipAddress = Minikube.getMinikubeIp, keyStorePassword = "keyStore", keyPassword = "key", trustStorePassword = "trustStore") sparkConf.set(RESOURCE_STAGING_SERVER_SSL_ENABLED, true) - .set("spark.ssl.kubernetes.resourceStagingServer.keyStore", keyStore.getAbsolutePath) - .set("spark.ssl.kubernetes.resourceStagingServer.trustStore", trustStore.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.keyStore", + keyStoreAndTrustStore.keyStore.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.trustStore", + keyStoreAndTrustStore.trustStore.getAbsolutePath) .set("spark.ssl.kubernetes.resourceStagingServer.keyStorePassword", "keyStore") .set("spark.ssl.kubernetes.resourceStagingServer.keyPassword", "key") .set("spark.ssl.kubernetes.resourceStagingServer.trustStorePassword", "trustStore") launchStagingServer(SSLOptions( enabled = true, - keyStore = Some(keyStore), - trustStore = Some(trustStore), + keyStore = Some(keyStoreAndTrustStore.keyStore), + trustStore = Some(keyStoreAndTrustStore.trustStore), keyStorePassword = Some("keyStore"), keyPassword = Some("key"), - trustStorePassword = Some("trustStore"))) + trustStorePassword = Some("trustStore")), + None) runSparkPiAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) } @@ -104,7 +107,7 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) test("Dynamic executor scaling basic test") { assume(testBackend.name == MINIKUBE_TEST_BACKEND) - launchStagingServer(SSLOptions()) + launchStagingServer(SSLOptions(), None) createShuffleServiceDaemonSet() sparkConf.setJars(Seq(KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) @@ -117,6 +120,7 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) } test("Use remote resources without the resource staging server.") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) val assetServerUri = staticAssetServerLauncher.launchStaticAssetServer() sparkConf.setJars(Seq( s"$assetServerUri/${KubernetesSuite.EXAMPLES_JAR_FILE.getName}", @@ -126,7 +130,8 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) } test("Mix remote resources with submitted ones.") { - launchStagingServer(SSLOptions()) + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + launchStagingServer(SSLOptions(), None) val assetServerUri = staticAssetServerLauncher.launchStaticAssetServer() sparkConf.setJars(Seq( KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, @@ -135,7 +140,20 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) } + test("Use key and certificate PEM files for TLS.") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + val keyAndCertificate = SSLUtils.generateKeyCertPemPair(Minikube.getMinikubeIp) + launchStagingServer( + SSLOptions(enabled = true), + Some(keyAndCertificate)) + sparkConf.set(RESOURCE_STAGING_SERVER_SSL_ENABLED, true) + .set( + RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM.key, keyAndCertificate.certPem.getAbsolutePath) + runSparkPiAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) + } + test("Use client key and client cert file when requesting executors") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) sparkConf.setJars(Seq( KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) @@ -148,11 +166,12 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) } - private def launchStagingServer(resourceStagingServerSslOptions: SSLOptions): Unit = { + private def launchStagingServer( + resourceStagingServerSslOptions: SSLOptions, keyAndCertPem: Option[KeyAndCertPem]): Unit = { assume(testBackend.name == MINIKUBE_TEST_BACKEND) val resourceStagingServerPort = resourceStagingServerLauncher.launchStagingServer( - resourceStagingServerSslOptions) + resourceStagingServerSslOptions, keyAndCertPem) val resourceStagingServerUriScheme = if (resourceStagingServerSslOptions.enabled) { "https" } else { diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala index 3a99f907d15fd..1ba54c131c196 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala @@ -16,21 +16,17 @@ */ package org.apache.spark.deploy.kubernetes.integrationtest -import java.io.StringWriter +import java.io.{File, StringWriter} import java.util.Properties -import java.util.concurrent.TimeUnit import com.google.common.io.{BaseEncoding, Files} -import com.google.common.util.concurrent.SettableFuture -import io.fabric8.kubernetes.api.model.{ConfigMapBuilder, Endpoints, HasMetadata, HTTPGetActionBuilder, KeyToPathBuilder, Pod, PodBuilder, SecretBuilder, ServiceBuilder} -import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watcher} -import io.fabric8.kubernetes.client.Watcher.Action -import io.fabric8.kubernetes.client.internal.readiness.Readiness +import io.fabric8.kubernetes.api.model.{ConfigMapBuilder, Endpoints, HTTPGetActionBuilder, KeyToPathBuilder, Pod, PodBuilder, SecretBuilder, ServiceBuilder} +import io.fabric8.kubernetes.client.KubernetesClient import scala.collection.JavaConverters._ import org.apache.spark.SSLOptions import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.submit.v2.ContainerNameEqualityPredicate +import org.apache.spark.deploy.kubernetes.submit.v2.{ContainerNameEqualityPredicate, KeyAndCertPem} import org.apache.spark.util.Utils /** @@ -38,23 +34,39 @@ import org.apache.spark.util.Utils */ private[spark] class ResourceStagingServerLauncher(kubernetesClient: KubernetesClient) { - private val KEYSTORE_DIR = "/mnt/secrets/spark-staging" - private val KEYSTORE_FILE = s"$KEYSTORE_DIR/keyStore" + private val SECRETS_ROOT_DIR = "/mnt/secrets/spark-staging" + private val KEYSTORE_SECRET_KEY = "keyStore" + private val KEYSTORE_FILE = s"$SECRETS_ROOT_DIR/$KEYSTORE_SECRET_KEY" + private val KEY_PEM_SECRET_KEY = "keyPem" + private val CERT_PEM_SECRET_KEY = "certPem" + private val KEY_PEM_FILE = s"$SECRETS_ROOT_DIR/$KEY_PEM_SECRET_KEY" + private val CERT_PEM_FILE = s"$SECRETS_ROOT_DIR/$CERT_PEM_SECRET_KEY" + private val SSL_SECRET_NAME = "resource-staging-server-ssl-secrets" private val PROPERTIES_FILE_NAME = "staging-server.properties" private val PROPERTIES_DIR = "/var/data/spark-staging-server" private val PROPERTIES_FILE_PATH = s"$PROPERTIES_DIR/$PROPERTIES_FILE_NAME" // Returns the NodePort the staging server is listening on - def launchStagingServer(sslOptions: SSLOptions): Int = { + def launchStagingServer( + sslOptions: SSLOptions, + keyAndCertPem: Option[KeyAndCertPem] = None): Int = { val stagingServerProperties = new Properties() val stagingServerSecret = sslOptions.keyStore.map { keyStore => val keyStoreBytes = Files.toByteArray(keyStore) val keyStoreBase64 = BaseEncoding.base64().encode(keyStoreBytes) + Map(KEYSTORE_SECRET_KEY -> keyStoreBase64) + }.orElse { + keyAndCertPem.map { keyAndCert => + val keyPemBytes = Files.toByteArray(keyAndCert.keyPem) + val keyPemBase64 = BaseEncoding.base64().encode(keyPemBytes) + val certPemBytes = Files.toByteArray(keyAndCert.certPem) + val certPemBase64 = BaseEncoding.base64().encode(certPemBytes) + Map(KEY_PEM_SECRET_KEY -> keyPemBase64, CERT_PEM_SECRET_KEY -> certPemBase64) + } + }.map { secretData => new SecretBuilder() - .withNewMetadata() - .withName("resource-staging-server-keystore") - .endMetadata() - .addToData("keyStore", keyStoreBase64) + .withNewMetadata().withName(SSL_SECRET_NAME).endMetadata() + .withData(secretData.asJava) .build() } stagingServerProperties.setProperty( @@ -67,10 +79,18 @@ private[spark] class ResourceStagingServerLauncher(kubernetesClient: KubernetesC stagingServerProperties.setProperty( "spark.ssl.kubernetes.resourceStagingServer.keyPassword", password) } - stagingServerSecret.foreach { _ => + sslOptions.keyStore.foreach { _ => stagingServerProperties.setProperty( "spark.ssl.kubernetes.resourceStagingServer.keyStore", KEYSTORE_FILE) } + keyAndCertPem.foreach { _ => + stagingServerProperties.setProperty( + RESOURCE_STAGING_SERVER_KEY_PEM.key, KEY_PEM_FILE) + } + keyAndCertPem.foreach { _ => + stagingServerProperties.setProperty( + RESOURCE_STAGING_SERVER_CERT_PEM.key, CERT_PEM_FILE) + } val propertiesWriter = new StringWriter() stagingServerProperties.store(propertiesWriter, "Resource staging server properties.") val stagingServerConfigMap = new ConfigMapBuilder() @@ -126,7 +146,7 @@ private[spark] class ResourceStagingServerLauncher(kubernetesClient: KubernetesC .editMatchingContainer(new ContainerNameEqualityPredicate("staging-server-container")) .addNewVolumeMount() .withName("keystore-volume") - .withMountPath(KEYSTORE_DIR) + .withMountPath(SECRETS_ROOT_DIR) .endVolumeMount() .endContainer() .endSpec() diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/MinikubeTestBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/MinikubeTestBackend.scala index 6e0049b813719..461264877edc2 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/MinikubeTestBackend.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/MinikubeTestBackend.scala @@ -42,6 +42,4 @@ private[spark] class MinikubeTestBackend extends IntegrationTestBackend { } override def name(): String = MINIKUBE_TEST_BACKEND - - } From 8f6f0a041873d6d6fb1a80111d9baf70f05524d3 Mon Sep 17 00:00:00 2001 From: mccheah Date: Fri, 19 May 2017 15:47:11 -0700 Subject: [PATCH 111/156] Differentiate between URI and SSL settings for in-cluster vs. submission (#281) --- .../kubernetes/OptionRequirements.scala | 40 ++++++ .../spark/deploy/kubernetes/config.scala | 69 ++++++++-- .../spark/deploy/kubernetes/constants.scala | 1 + ...riverInitContainerComponentsProvider.scala | 64 ++++++++- ...dDependencyInitContainerConfigPlugin.scala | 55 ++++++-- .../v2/SubmittedDependencySecretBuilder.scala | 44 +++++-- ...ourceStagingServerSslOptionsProvider.scala | 121 ++++++++---------- .../KubernetesClusterSchedulerBackend.scala | 3 +- ...ndencyInitContainerConfigPluginSuite.scala | 34 +++-- ...ubmittedDependencySecretBuilderSuite.scala | 40 +++++- 10 files changed, 341 insertions(+), 130 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/OptionRequirements.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/OptionRequirements.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/OptionRequirements.scala new file mode 100644 index 0000000000000..eda43de0a9a5b --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/OptionRequirements.scala @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +private[spark] object OptionRequirements { + + def requireBothOrNeitherDefined( + opt1: Option[_], + opt2: Option[_], + errMessageWhenFirstIsMissing: String, + errMessageWhenSecondIsMissing: String): Unit = { + requireSecondIfFirstIsDefined(opt1, opt2, errMessageWhenSecondIsMissing) + requireSecondIfFirstIsDefined(opt2, opt1, errMessageWhenFirstIsMissing) + } + + def requireSecondIfFirstIsDefined( + opt1: Option[_], opt2: Option[_], errMessageWhenSecondIsMissing: String): Unit = { + opt1.foreach { _ => + require(opt2.isDefined, errMessageWhenSecondIsMissing) + } + } + + def requireNandDefined(opt1: Option[_], opt2: Option[_], errMessage: String): Unit = { + opt1.foreach { _ => require(opt2.isEmpty, errMessage) } + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index ab442131ad271..759a7df505829 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -362,6 +362,8 @@ package object config extends Logging { .createOptional private[spark] val RESOURCE_STAGING_SERVER_SSL_NAMESPACE = "kubernetes.resourceStagingServer" + private[spark] val RESOURCE_STAGING_SERVER_INTERNAL_SSL_NAMESPACE = + "kubernetes.resourceStagingServer.internal" private[spark] val RESOURCE_STAGING_SERVER_CERT_PEM = ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.serverCertPem") .doc("Certificate PEM file to use when having the resource staging server" + @@ -370,35 +372,70 @@ package object config extends Logging { .createOptional private[spark] val RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM = ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.clientCertPem") - .doc("Certificate PEM file to use when the client contacts the resource staging server.") + .doc("Certificate PEM file to use when the client contacts the resource staging server." + + " This must strictly be a path to a file on the submitting machine's disk.") + .stringConf + .createOptional + private[spark] val RESOURCE_STAGING_SERVER_INTERNAL_CLIENT_CERT_PEM = + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_INTERNAL_SSL_NAMESPACE.clientCertPem") + .doc("Certificate PEM file to use when the init-container contacts the resource staging" + + " server. If this is not provided, it defaults to the value of" + + " spark.ssl.kubernetes.resourceStagingServer.clientCertPem. This can be a URI with" + + " a scheme of local:// which denotes that the file is pre-mounted on the init-container's" + + " disk. A uri without a scheme or a scheme of file:// will result in this file being" + + " mounted from the submitting machine's disk as a secret into the pods.") .stringConf .createOptional - private[spark] val RESOURCE_STAGING_SERVER_KEYSTORE_PASSWORD_FILE = ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.keyStorePasswordFile") - .doc("File containing the keystore password for the Kubernetes dependency server.") + .doc("File containing the keystore password for the Kubernetes resource staging server.") .stringConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_KEYSTORE_KEY_PASSWORD_FILE = ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.keyPasswordFile") - .doc("File containing the key password for the Kubernetes dependency server.") + .doc("File containing the key password for the Kubernetes resource staging server.") .stringConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_SSL_ENABLED = ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.enabled") - .doc("Whether or not to use SSL when communicating with the dependency server.") + .doc("Whether or not to use SSL when communicating with the resource staging server.") + .booleanConf + .createOptional + private[spark] val RESOURCE_STAGING_SERVER_INTERNAL_SSL_ENABLED = + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_INTERNAL_SSL_NAMESPACE.enabled") + .doc("Whether or not to use SSL when communicating with the resource staging server from" + + " the init-container. If this is not provided, defaults to" + + " the value of spark.ssl.kubernetes.resourceStagingServer.enabled") .booleanConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE = ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.trustStore") - .doc("File containing the trustStore to communicate with the Kubernetes dependency server.") + .doc("File containing the trustStore to communicate with the Kubernetes dependency server." + + " This must strictly be a path on the submitting machine's disk.") + .stringConf + .createOptional + private[spark] val RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_FILE = + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_INTERNAL_SSL_NAMESPACE.trustStore") + .doc("File containing the trustStore to communicate with the Kubernetes dependency server" + + " from the init-container. If this is not provided, defaults to the value of" + + " spark.ssl.kubernetes.resourceStagingServer.trustStore. This can be a URI with a scheme" + + " of local:// indicating that the trustStore is pre-mounted on the init-container's" + + " disk. If no scheme, or a scheme of file:// is provided, this file is mounted from the" + + " submitting machine's disk as a Kubernetes secret into the pods.") .stringConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD = ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.trustStorePassword") - .doc("Password for the trustStore for talking to the dependency server.") + .doc("Password for the trustStore for communicating to the dependency server.") + .stringConf + .createOptional + private[spark] val RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_PASSWORD = + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_INTERNAL_SSL_NAMESPACE.trustStorePassword") + .doc("Password for the trustStore for communicating to the dependency server from the" + + " init-container. If this is not provided, defaults to" + + " spark.ssl.kubernetes.resourceStagingServer.trustStorePassword.") .stringConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE = @@ -406,11 +443,27 @@ package object config extends Logging { .doc("Type of trustStore for communicating with the dependency server.") .stringConf .createOptional + private[spark] val RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_TYPE = + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_INTERNAL_SSL_NAMESPACE.trustStoreType") + .doc("Type of trustStore for communicating with the dependency server from the" + + " init-container. If this is not provided, defaults to" + + " spark.ssl.kubernetes.resourceStagingServer.trustStoreType") + .stringConf + .createOptional // Driver and Init-Container parameters for submission v2 private[spark] val RESOURCE_STAGING_SERVER_URI = ConfigBuilder("spark.kubernetes.resourceStagingServer.uri") - .doc("Base URI for the Spark resource staging server") + .doc("Base URI for the Spark resource staging server.") + .stringConf + .createOptional + + private[spark] val RESOURCE_STAGING_SERVER_INTERNAL_URI = + ConfigBuilder("spark.kubernetes.resourceStagingServer.internal.uri") + .doc("Base URI for the Spark resource staging server when the init-containers access it for" + + " downloading resources. If this is not provided, it defaults to the value provided in" + + " spark.kubernetes.resourceStagingServer.uri, the URI that the submission client uses to" + + " upload the resources from outside the cluster.") .stringConf .createOptional diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 8d0965078aaa8..ea11ca2ec8f21 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -115,6 +115,7 @@ package object constants { private[spark] val INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY = "downloadSubmittedFilesSecret" private[spark] val INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY = "trustStore" + private[spark] val INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY = "ssl-certificate" private[spark] val INIT_CONTAINER_CONFIG_MAP_KEY = "download-submitted-files" private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME = "download-jars-volume" private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME = "download-files" diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala index 7f6ae2ec47675..0a5e6cd216011 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala @@ -17,10 +17,11 @@ package org.apache.spark.deploy.kubernetes.submit.v2 import org.apache.spark.{SparkConf, SSLOptions} -import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, SparkPodInitContainerBootstrap, SparkPodInitContainerBootstrapImpl} +import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, OptionRequirements, SparkPodInitContainerBootstrap, SparkPodInitContainerBootstrapImpl} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.rest.kubernetes.v2.RetrofitClientFactoryImpl +import org.apache.spark.util.Utils /** * Interface that wraps the provision of everything the submission client needs to set up the @@ -47,10 +48,51 @@ private[spark] class DriverInitContainerComponentsProviderImpl( kubernetesAppId: String, sparkJars: Seq[String], sparkFiles: Seq[String], - resourceStagingServerSslOptions: SSLOptions) + resourceStagingServerExternalSslOptions: SSLOptions) extends DriverInitContainerComponentsProvider { private val maybeResourceStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) + private val maybeResourceStagingServerInternalUri = + sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_URI) + private val maybeResourceStagingServerInternalTrustStore = + sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_FILE) + .orElse(sparkConf.get(RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE)) + private val maybeResourceStagingServerInternalTrustStorePassword = + sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_PASSWORD) + .orElse(sparkConf.get(RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD)) + private val maybeResourceStagingServerInternalTrustStoreType = + sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_TYPE) + .orElse(sparkConf.get(RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE)) + private val maybeResourceStagingServerInternalClientCert = + sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_CLIENT_CERT_PEM) + .orElse(sparkConf.get(RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM)) + private val resourceStagingServerInternalSslEnabled = + sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_SSL_ENABLED) + .orElse(sparkConf.get(RESOURCE_STAGING_SERVER_SSL_ENABLED)) + .getOrElse(false) + + OptionRequirements.requireNandDefined( + maybeResourceStagingServerInternalClientCert, + maybeResourceStagingServerInternalTrustStore, + "Cannot provide both a certificate file and a trustStore file for init-containers to" + + " use for contacting the resource staging server over TLS.") + + require(maybeResourceStagingServerInternalTrustStore.forall { trustStore => + Option(Utils.resolveURI(trustStore).getScheme).getOrElse("file") match { + case "file" | "local" => true + case _ => false + } + }, "TrustStore URI used for contacting the resource staging server from init containers must" + + " have no scheme, or scheme file://, or scheme local://.") + + require(maybeResourceStagingServerInternalClientCert.forall { trustStore => + Option(Utils.resolveURI(trustStore).getScheme).getOrElse("file") match { + case "file" | "local" => true + case _ => false + } + }, "Client cert file URI used for contacting the resource staging server from init containers" + + " must have no scheme, or scheme file://, or scheme local://.") + private val jarsDownloadPath = sparkConf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION) private val filesDownloadPath = sparkConf.get(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION) private val maybeSecretName = maybeResourceStagingServerUri.map { _ => @@ -71,14 +113,20 @@ private[spark] class DriverInitContainerComponentsProviderImpl( filesResourceId <- maybeSubmittedResourceIds.map(_.filesResourceId) } yield { new SubmittedDependencyInitContainerConfigPluginImpl( - stagingServerUri, + // Configure the init-container with the internal URI over the external URI. + maybeResourceStagingServerInternalUri.getOrElse(stagingServerUri), jarsResourceId, filesResourceId, INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY, INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY, INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, - INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH, - resourceStagingServerSslOptions) + INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY, + resourceStagingServerInternalSslEnabled, + maybeResourceStagingServerInternalTrustStore, + maybeResourceStagingServerInternalClientCert, + maybeResourceStagingServerInternalTrustStorePassword, + maybeResourceStagingServerInternalTrustStoreType, + INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH) } new SparkInitContainerConfigMapBuilderImpl( sparkJars, @@ -113,7 +161,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( stagingServerUri, sparkJars, sparkFiles, - resourceStagingServerSslOptions, + resourceStagingServerExternalSslOptions, RetrofitClientFactoryImpl) } } @@ -133,7 +181,9 @@ private[spark] class DriverInitContainerComponentsProviderImpl( INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY, INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY, INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, - resourceStagingServerSslOptions) + INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY, + maybeResourceStagingServerInternalTrustStore, + maybeResourceStagingServerInternalClientCert) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala index bc9abc4eaba81..1b086e60d3d0d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala @@ -16,9 +16,10 @@ */ package org.apache.spark.deploy.kubernetes.submit.v2 -import org.apache.spark.SSLOptions +import org.apache.spark.SparkException import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.internal.config.OptionalConfigEntry +import org.apache.spark.util.Utils private[spark] trait SubmittedDependencyInitContainerConfigPlugin { /** @@ -34,36 +35,62 @@ private[spark] trait SubmittedDependencyInitContainerConfigPlugin { } private[spark] class SubmittedDependencyInitContainerConfigPluginImpl( - resourceStagingServerUri: String, + internalResourceStagingServerUri: String, jarsResourceId: String, filesResourceId: String, jarsSecretKey: String, filesSecretKey: String, trustStoreSecretKey: String, - secretsVolumeMountPath: String, - resourceStagingServiceSslOptions: SSLOptions) + clientCertSecretKey: String, + resourceStagingServerSslEnabled: Boolean, + maybeInternalTrustStoreUri: Option[String], + maybeInternalClientCertUri: Option[String], + maybeInternalTrustStorePassword: Option[String], + maybeInternalTrustStoreType: Option[String], + secretsVolumeMountPath: String) extends SubmittedDependencyInitContainerConfigPlugin { override def configurationsToFetchSubmittedDependencies(): Map[String, String] = { Map[String, String]( - RESOURCE_STAGING_SERVER_URI.key -> resourceStagingServerUri, + RESOURCE_STAGING_SERVER_URI.key -> internalResourceStagingServerUri, INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> jarsResourceId, INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> s"$secretsVolumeMountPath/$jarsSecretKey", INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> filesResourceId, INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> s"$secretsVolumeMountPath/$filesSecretKey", - RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> - resourceStagingServiceSslOptions.enabled.toString) ++ - resourceStagingServiceSslOptions.trustStore.map { _ => - (RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key, - s"$secretsVolumeMountPath/$trustStoreSecretKey") - }.toMap ++ - resourceStagingServiceSslOptions.trustStorePassword.map { password => + RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> resourceStagingServerSslEnabled.toString) ++ + resolveSecretPath( + maybeInternalTrustStoreUri, + trustStoreSecretKey, + RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE, + "TrustStore URI") ++ + resolveSecretPath( + maybeInternalClientCertUri, + clientCertSecretKey, + RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM, + "Client certificate URI") ++ + maybeInternalTrustStorePassword.map { password => (RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key, password) }.toMap ++ - resourceStagingServiceSslOptions.trustStoreType.map { storeType => + maybeInternalTrustStoreType.map { storeType => (RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key, storeType) }.toMap } + + private def resolveSecretPath( + maybeUri: Option[String], + secretKey: String, + configEntry: OptionalConfigEntry[String], + uriType: String): Map[String, String] = { + maybeUri.map(Utils.resolveURI).map { uri => + val resolvedPath = Option(uri.getScheme).getOrElse("file") match { + case "file" => s"$secretsVolumeMountPath/$secretKey" + case "local" => uri.getPath + case invalid => throw new SparkException(s"$uriType has invalid scheme $invalid must be" + + s" local://, file://, or empty.") + } + (configEntry.key, resolvedPath) + }.toMap + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala index b8fa43d0573f7..1a33757e45aa0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala @@ -16,12 +16,14 @@ */ package org.apache.spark.deploy.kubernetes.submit.v2 +import java.io.File + import com.google.common.base.Charsets import com.google.common.io.{BaseEncoding, Files} import io.fabric8.kubernetes.api.model.{Secret, SecretBuilder} import scala.collection.JavaConverters._ -import org.apache.spark.SSLOptions +import org.apache.spark.util.Utils private[spark] trait SubmittedDependencySecretBuilder { /** @@ -32,28 +34,30 @@ private[spark] trait SubmittedDependencySecretBuilder { } private[spark] class SubmittedDependencySecretBuilderImpl( - secretName: String, - jarsResourceSecret: String, - filesResourceSecret: String, - jarsSecretKey: String, - filesSecretKey: String, - trustStoreSecretKey: String, - resourceStagingServerSslOptions: SSLOptions) + secretName: String, + jarsResourceSecret: String, + filesResourceSecret: String, + jarsSecretKey: String, + filesSecretKey: String, + trustStoreSecretKey: String, + clientCertSecretKey: String, + internalTrustStoreUri: Option[String], + internalClientCertUri: Option[String]) extends SubmittedDependencySecretBuilder { override def build(): Secret = { - val trustStoreBase64 = resourceStagingServerSslOptions.trustStore.map { trustStoreFile => - require(trustStoreFile.isFile, "Dependency server trustStore provided at" + - trustStoreFile.getAbsolutePath + " does not exist or is not a file.") - (trustStoreSecretKey, BaseEncoding.base64().encode(Files.toByteArray(trustStoreFile))) - }.toMap + val trustStoreBase64 = convertFileToBase64IfSubmitterLocal( + trustStoreSecretKey, internalTrustStoreUri) + val clientCertBase64 = convertFileToBase64IfSubmitterLocal( + clientCertSecretKey, internalClientCertUri) val jarsSecretBase64 = BaseEncoding.base64().encode(jarsResourceSecret.getBytes(Charsets.UTF_8)) val filesSecretBase64 = BaseEncoding.base64().encode( filesResourceSecret.getBytes(Charsets.UTF_8)) val secretData = Map( jarsSecretKey -> jarsSecretBase64, filesSecretKey -> filesSecretBase64) ++ - trustStoreBase64 + trustStoreBase64 ++ + clientCertBase64 val kubernetesSecret = new SecretBuilder() .withNewMetadata() .withName(secretName) @@ -62,4 +66,16 @@ private[spark] class SubmittedDependencySecretBuilderImpl( .build() kubernetesSecret } + + private def convertFileToBase64IfSubmitterLocal(secretKey: String, secretUri: Option[String]) + : Map[String, String] = { + secretUri.filter { trustStore => + Option(Utils.resolveURI(trustStore).getScheme).getOrElse("file") == "file" + }.map { uri => + val file = new File(Utils.resolveURI(uri).getPath) + require(file.isFile, "Dependency server trustStore provided at" + + file.getAbsolutePath + " does not exist or is not a file.") + (secretKey, BaseEncoding.base64().encode(Files.toByteArray(file))) + }.toMap + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala index 6b88426d00e72..0dd0b08433def 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala @@ -23,7 +23,8 @@ import com.google.common.base.Charsets import com.google.common.io.Files import org.apache.commons.lang3.RandomStringUtils -import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException, SSLOptions} +import org.apache.spark.{SecurityManager, SparkConf, SparkException, SSLOptions} +import org.apache.spark.deploy.kubernetes.OptionRequirements import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.rest.kubernetes.v1.PemsToKeyStoreConverter import org.apache.spark.internal.Logging @@ -38,7 +39,7 @@ private[spark] class ResourceStagingServerSslOptionsProviderImpl(sparkConf: Spar private val SECURE_RANDOM = new SecureRandom() def getSslOptions: SSLOptions = { - val baseSslOptions = new SparkSecurityManager(sparkConf) + val baseSslOptions = new SecurityManager(sparkConf) .getSSLOptions(RESOURCE_STAGING_SERVER_SSL_NAMESPACE) val maybeKeyPem = sparkConf.get(RESOURCE_STAGING_SERVER_KEY_PEM) val maybeServerCertPem = sparkConf.get(RESOURCE_STAGING_SERVER_CERT_PEM) @@ -47,39 +48,47 @@ private[spark] class ResourceStagingServerSslOptionsProviderImpl(sparkConf: Spar val maybeClientCertPem = sparkConf.get(RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM) logSslConfigurations( - baseSslOptions, - maybeKeyPem, - maybeServerCertPem, - maybeKeyStorePasswordFile, - maybeKeyPasswordFile, - maybeClientCertPem) - - requireNandDefined(baseSslOptions.keyStore, maybeKeyPem, - "Shouldn't provide both key PEM and keyStore files for TLS.") - requireNandDefined(baseSslOptions.keyStore, maybeServerCertPem, - "Shouldn't provide both certificate PEM and keyStore files for TLS.") - requireNandDefined(baseSslOptions.keyStorePassword, maybeKeyStorePasswordFile, - "Shouldn't provide both the keyStore password value and the keyStore password file.") - requireNandDefined(baseSslOptions.keyPassword, maybeKeyPasswordFile, - "Shouldn't provide both the keyStore key password value and the keyStore key password file.") - requireBothOrNeitherDefined( - maybeKeyPem, - maybeServerCertPem, - "When providing a certificate PEM file, the key PEM file must also be provided.", - "When providing a key PEM file, the certificate PEM file must also be provided.") - requireNandDefined(baseSslOptions.trustStore, maybeClientCertPem, - "Shouldn't provide both the trustStore and a client certificate PEM file.") + baseSslOptions, + maybeKeyPem, + maybeServerCertPem, + maybeKeyStorePasswordFile, + maybeKeyPasswordFile, + maybeClientCertPem) + + OptionRequirements.requireNandDefined( + baseSslOptions.keyStore, + maybeKeyPem, + "Shouldn't provide both key PEM and keyStore files for TLS.") + OptionRequirements.requireNandDefined( + baseSslOptions.keyStore, + maybeServerCertPem, + "Shouldn't provide both certificate PEM and keyStore files for TLS.") + OptionRequirements.requireNandDefined( + baseSslOptions.keyStorePassword, + maybeKeyStorePasswordFile, + "Shouldn't provide both the keyStore password value and the keyStore password file.") + OptionRequirements.requireNandDefined( + baseSslOptions.keyPassword, + maybeKeyPasswordFile, + "Shouldn't provide both a keyStore key password value and a keyStore key password file.") + OptionRequirements.requireBothOrNeitherDefined( + maybeKeyPem, + maybeServerCertPem, + "When providing a certificate PEM file, the key PEM file must also be provided.", + "When providing a key PEM file, the certificate PEM file must also be provided.") + OptionRequirements.requireNandDefined(baseSslOptions.trustStore, maybeClientCertPem, + "Shouldn't provide both the trustStore and a client certificate PEM file.") val resolvedKeyStorePassword = baseSslOptions.keyStorePassword - .orElse(maybeKeyStorePasswordFile.map { keyStorePasswordFile => - safeFileToString(keyStorePasswordFile, "KeyStore password file") - }) - .orElse(maybeKeyPem.map { _ => randomPassword()}) + .orElse(maybeKeyStorePasswordFile.map { keyStorePasswordFile => + safeFileToString(keyStorePasswordFile, "KeyStore password file") + }) + .orElse(maybeKeyPem.map { _ => randomPassword()}) val resolvedKeyStoreKeyPassword = baseSslOptions.keyPassword - .orElse(maybeKeyPasswordFile.map { keyPasswordFile => - safeFileToString(keyPasswordFile, "KeyStore key password file") - }) - .orElse(maybeKeyPem.map { _ => randomPassword()}) + .orElse(maybeKeyPasswordFile.map { keyPasswordFile => + safeFileToString(keyPasswordFile, "KeyStore key password file") + }) + .orElse(maybeKeyPem.map { _ => randomPassword()}) val resolvedKeyStore = baseSslOptions.keyStore.orElse { for { keyPem <- maybeKeyPem @@ -90,16 +99,16 @@ private[spark] class ResourceStagingServerSslOptionsProviderImpl(sparkConf: Spar val keyPemFile = new File(keyPem) val certPemFile = new File(certPem) PemsToKeyStoreConverter.convertPemsToTempKeyStoreFile( - keyPemFile, - certPemFile, - "key", - keyStorePassword, - keyPassword, - baseSslOptions.keyStoreType) + keyPemFile, + certPemFile, + "key", + keyStorePassword, + keyPassword, + baseSslOptions.keyStoreType) } } val resolvedTrustStorePassword = baseSslOptions.trustStorePassword - .orElse(maybeClientCertPem.map( _ => "defaultTrustStorePassword")) + .orElse(maybeClientCertPem.map( _ => "defaultTrustStorePassword")) val resolvedTrustStore = baseSslOptions.trustStore.orElse { for { clientCertPem <- maybeClientCertPem @@ -107,16 +116,16 @@ private[spark] class ResourceStagingServerSslOptionsProviderImpl(sparkConf: Spar } yield { val certPemFile = new File(clientCertPem) PemsToKeyStoreConverter.convertCertPemToTempTrustStoreFile( - certPemFile, - trustStorePassword, - baseSslOptions.trustStoreType) + certPemFile, + trustStorePassword, + baseSslOptions.trustStoreType) } } baseSslOptions.copy( - keyStore = resolvedKeyStore, - keyStorePassword = resolvedKeyStorePassword, - keyPassword = resolvedKeyStoreKeyPassword, - trustStore = resolvedTrustStore) + keyStore = resolvedKeyStore, + keyStorePassword = resolvedKeyStorePassword, + keyPassword = resolvedKeyStoreKeyPassword, + trustStore = resolvedTrustStore) } private def logSslConfigurations( @@ -140,26 +149,6 @@ private[spark] class ResourceStagingServerSslOptionsProviderImpl(sparkConf: Spar logDebug(s"Client-side certificate PEM: ${maybeClientCertPem.getOrElse("N/A")}") } - private def requireBothOrNeitherDefined( - opt1: Option[_], - opt2: Option[_], - errMessageWhenFirstIsMissing: String, - errMessageWhenSecondIsMissing: String): Unit = { - requireSecondIfFirstIsDefined(opt1, opt2, errMessageWhenSecondIsMissing) - requireSecondIfFirstIsDefined(opt2, opt1, errMessageWhenFirstIsMissing) - } - - private def requireSecondIfFirstIsDefined( - opt1: Option[_], opt2: Option[_], errMessageWhenSecondIsMissing: String): Unit = { - opt1.foreach { _ => - require(opt2.isDefined, errMessageWhenSecondIsMissing) - } - } - - private def requireNandDefined(opt1: Option[_], opt2: Option[_], errMessage: String): Unit = { - opt1.foreach { _ => require(opt2.isEmpty, errMessage) } - } - private def safeFileToString(filePath: String, fileType: String): String = { val file = new File(filePath) if (!file.isFile) { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 0dd875b307a6d..5627f7c20de3d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -322,7 +322,8 @@ private[spark] class KubernetesClusterSchedulerBackend( .addToRequests("cpu", executorCpuQuantity) .addToLimits("cpu", executorCpuQuantity) .endResources() - .withEnv(requiredEnv.asJava) + .addAllToEnv(requiredEnv.asJava) + .addToEnv(executorExtraClasspathEnv.toSeq: _*) .withPorts(requiredPorts.asJava) .endContainer() .endSpec() diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala index 11a671085c201..09b41dc1bcaaf 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala @@ -23,20 +23,18 @@ import org.apache.spark.deploy.kubernetes.config._ class SubmittedDependencyInitContainerConfigPluginSuite extends SparkFunSuite { private val STAGING_SERVER_URI = "http://localhost:9000" + private val STAGING_SERVER_INTERNAL_URI = "http://internalHost:9000" private val JARS_RESOURCE_ID = "jars-id" private val FILES_RESOURCE_ID = "files-id" private val JARS_SECRET_KEY = "jars" private val FILES_SECRET_KEY = "files" private val TRUSTSTORE_SECRET_KEY = "trustStore" - private val SECRETS_VOLUME_MOUNT_PATH = "/var/data/" + private val CLIENT_CERT_SECRET_KEY = "client-cert" + private val SECRETS_VOLUME_MOUNT_PATH = "/var/data" private val TRUSTSTORE_PASSWORD = "trustStore" private val TRUSTSTORE_FILE = "/mnt/secrets/trustStore.jks" + private val CLIENT_CERT_URI = "local:///mnt/secrets/client-cert.pem" private val TRUSTSTORE_TYPE = "jks" - private val RESOURCE_STAGING_SERVICE_SSL_OPTIONS = SSLOptions( - enabled = true, - trustStore = Some(new File(TRUSTSTORE_FILE)), - trustStorePassword = Some(TRUSTSTORE_PASSWORD), - trustStoreType = Some(TRUSTSTORE_TYPE)) test("Plugin should provide configuration for fetching uploaded dependencies") { val configPluginUnderTest = new SubmittedDependencyInitContainerConfigPluginImpl( @@ -46,8 +44,13 @@ class SubmittedDependencyInitContainerConfigPluginSuite extends SparkFunSuite { JARS_SECRET_KEY, FILES_SECRET_KEY, TRUSTSTORE_SECRET_KEY, - SECRETS_VOLUME_MOUNT_PATH, - SSLOptions()) + CLIENT_CERT_SECRET_KEY, + false, + None, + None, + None, + None, + SECRETS_VOLUME_MOUNT_PATH) val addedConfigurations = configPluginUnderTest.configurationsToFetchSubmittedDependencies() val expectedConfigurations = Map( RESOURCE_STAGING_SERVER_URI.key -> STAGING_SERVER_URI, @@ -65,19 +68,24 @@ class SubmittedDependencyInitContainerConfigPluginSuite extends SparkFunSuite { val configPluginUnderTest = new SubmittedDependencyInitContainerConfigPluginImpl( STAGING_SERVER_URI, JARS_RESOURCE_ID, - FILES_RESOURCE_ID, - JARS_SECRET_KEY, + FILES_RESOURCE_ID, JARS_SECRET_KEY, FILES_SECRET_KEY, TRUSTSTORE_SECRET_KEY, - SECRETS_VOLUME_MOUNT_PATH, - RESOURCE_STAGING_SERVICE_SSL_OPTIONS) + CLIENT_CERT_SECRET_KEY, + true, + Some(TRUSTSTORE_FILE), + Some(CLIENT_CERT_URI), + Some(TRUSTSTORE_PASSWORD), + Some(TRUSTSTORE_TYPE), + SECRETS_VOLUME_MOUNT_PATH) val addedConfigurations = configPluginUnderTest.configurationsToFetchSubmittedDependencies() val expectedSslConfigurations = Map( RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> "true", RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key -> s"$SECRETS_VOLUME_MOUNT_PATH/$TRUSTSTORE_SECRET_KEY", RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key -> TRUSTSTORE_PASSWORD, - RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key -> TRUSTSTORE_TYPE) + RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key -> TRUSTSTORE_TYPE, + RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM.key -> "/mnt/secrets/client-cert.pem") assert(expectedSslConfigurations.toSet.subsetOf(addedConfigurations.toSet)) } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala index 189d87e27a28a..358edbecf8708 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala @@ -35,7 +35,9 @@ class SubmittedDependencySecretBuilderSuite extends SparkFunSuite { private val JARS_SECRET_KEY = "jars-secret-key" private val FILES_SECRET_KEY = "files-secret-key" private val TRUSTSTORE_SECRET_KEY = "truststore-secret-key" + private val CLIENT_CERT_SECRET_KEY = "client-cert" private val TRUSTSTORE_STRING_CONTENTS = "trustStore-contents" + private val CLIENT_CERT_STRING_CONTENTS = "client-certificate-contents" test("Building the secret without a trustStore") { val builder = new SubmittedDependencySecretBuilderImpl( @@ -45,7 +47,9 @@ class SubmittedDependencySecretBuilderSuite extends SparkFunSuite { JARS_SECRET_KEY, FILES_SECRET_KEY, TRUSTSTORE_SECRET_KEY, - SSLOptions()) + CLIENT_CERT_SECRET_KEY, + None, + None) val secret = builder.build() assert(secret.getMetadata.getName === SECRET_NAME) val secretDecodedData = decodeSecretData(secret) @@ -60,10 +64,12 @@ class SubmittedDependencySecretBuilderSuite extends SparkFunSuite { } test("Building the secret with a trustStore") { - val tempTrustStoreDir = Utils.createTempDir(namePrefix = "temp-truststores") + val tempSslDir = Utils.createTempDir(namePrefix = "temp-ssl-tests") try { - val trustStoreFile = new File(tempTrustStoreDir, "trustStore.jks") + val trustStoreFile = new File(tempSslDir, "trustStore.jks") Files.write(TRUSTSTORE_STRING_CONTENTS, trustStoreFile, Charsets.UTF_8) + val clientCertFile = new File(tempSslDir, "cert.pem") + Files.write(CLIENT_CERT_STRING_CONTENTS, clientCertFile, Charsets.UTF_8) val builder = new SubmittedDependencySecretBuilderImpl( SECRET_NAME, JARS_SECRET, @@ -71,13 +77,33 @@ class SubmittedDependencySecretBuilderSuite extends SparkFunSuite { JARS_SECRET_KEY, FILES_SECRET_KEY, TRUSTSTORE_SECRET_KEY, - SSLOptions(trustStore = Some(trustStoreFile))) + CLIENT_CERT_SECRET_KEY, + Some(trustStoreFile.getAbsolutePath), + Some(clientCertFile.getAbsolutePath)) val secret = builder.build() - val secretDecodedData = decodeSecretData(secret) - assert(secretDecodedData(TRUSTSTORE_SECRET_KEY) === TRUSTSTORE_STRING_CONTENTS) + val decodedSecretData = decodeSecretData(secret) + assert(decodedSecretData(TRUSTSTORE_SECRET_KEY) === TRUSTSTORE_STRING_CONTENTS) + assert(decodedSecretData(CLIENT_CERT_SECRET_KEY) === CLIENT_CERT_STRING_CONTENTS) } finally { - tempTrustStoreDir.delete() + tempSslDir.delete() } } + test("If trustStore and certificate are container-local, don't add secret entries") { + val builder = new SubmittedDependencySecretBuilderImpl( + SECRET_NAME, + JARS_SECRET, + FILES_SECRET, + JARS_SECRET_KEY, + FILES_SECRET_KEY, + TRUSTSTORE_SECRET_KEY, + CLIENT_CERT_SECRET_KEY, + Some("local:///mnt/secrets/trustStore.jks"), + Some("local:///mnt/secrets/cert.pem")) + val secret = builder.build() + val decodedSecretData = decodeSecretData(secret) + assert(!decodedSecretData.contains(TRUSTSTORE_SECRET_KEY)) + assert(!decodedSecretData.contains(CLIENT_CERT_SECRET_KEY)) + } + } From 408c65f65c569a63508ada134139e4d5b186cd90 Mon Sep 17 00:00:00 2001 From: mccheah Date: Mon, 22 May 2017 14:31:42 -0700 Subject: [PATCH 112/156] Monitor pod status in submission v2. (#283) * Monitor pod status in submission v2. * Address comments --- .../{v1 => }/LoggingPodStatusWatcher.scala | 90 ++++++++++++++----- .../deploy/kubernetes/submit/v1/Client.scala | 14 +-- .../deploy/kubernetes/submit/v2/Client.scala | 65 +++++++++----- .../kubernetes/submit/v2/ClientV2Suite.scala | 42 +++++++-- 4 files changed, 154 insertions(+), 57 deletions(-) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v1 => }/LoggingPodStatusWatcher.scala (54%) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/LoggingPodStatusWatcher.scala similarity index 54% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/LoggingPodStatusWatcher.scala index 537bcccaa1458..1633a084e463c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/LoggingPodStatusWatcher.scala @@ -14,32 +14,36 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v1 +package org.apache.spark.deploy.kubernetes.submit -import java.util.concurrent.{CountDownLatch, Executors, TimeUnit} +import java.util.concurrent.{CountDownLatch, TimeUnit} -import io.fabric8.kubernetes.api.model.Pod +import io.fabric8.kubernetes.api.model.{ContainerStateRunning, ContainerStateTerminated, ContainerStateWaiting, ContainerStatus, Pod} import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action import scala.collection.JavaConverters._ +import org.apache.spark.SparkException import org.apache.spark.internal.Logging import org.apache.spark.util.ThreadUtils +private[kubernetes] trait LoggingPodStatusWatcher extends Watcher[Pod] { + def awaitCompletion(): Unit +} + /** * A monitor for the running Kubernetes pod of a Spark application. Status logging occurs on * every state change and also at an interval for liveness. * - * @param podCompletedFuture a CountDownLatch that is set to true when the watched pod finishes * @param appId - * @param interval ms between each state request. If set to 0 or a negative number, the periodic - * logging will be disabled. + * @param maybeLoggingInterval ms between each state request. If provided, must be a positive + * number. */ -private[kubernetes] class LoggingPodStatusWatcher(podCompletedFuture: CountDownLatch, - appId: String, - interval: Long) - extends Watcher[Pod] with Logging { +private[kubernetes] class LoggingPodStatusWatcherImpl( + appId: String, maybeLoggingInterval: Option[Long]) + extends LoggingPodStatusWatcher with Logging { + private val podCompletedFuture = new CountDownLatch(1) // start timer for periodic logging private val scheduler = ThreadUtils.newDaemonSingleThreadScheduledExecutor("logging-pod-status-watcher") @@ -47,13 +51,13 @@ private[kubernetes] class LoggingPodStatusWatcher(podCompletedFuture: CountDownL override def run() = logShortStatus() } - private var pod: Option[Pod] = Option.empty - private def phase: String = pod.map(_.getStatus().getPhase()).getOrElse("unknown") - private def status: String = pod.map(_.getStatus().getContainerStatuses().toString()) - .getOrElse("unknown") + private var pod = Option.empty[Pod] + + private def phase: String = pod.map(_.getStatus.getPhase).getOrElse("unknown") def start(): Unit = { - if (interval > 0) { + maybeLoggingInterval.foreach { interval => + require(interval > 0, s"Logging interval must be a positive time value, got: $interval ms.") scheduler.scheduleAtFixedRate(logRunnable, 0, interval, TimeUnit.MILLISECONDS) } } @@ -98,7 +102,7 @@ private[kubernetes] class LoggingPodStatusWatcher(podCompletedFuture: CountDownL } private def formatPodState(pod: Pod): String = { - + // TODO include specific container state val details = Seq[(String, String)]( // pod metadata ("pod name", pod.getMetadata.getName()), @@ -116,17 +120,59 @@ private[kubernetes] class LoggingPodStatusWatcher(podCompletedFuture: CountDownL ("start time", pod.getStatus.getStartTime), ("container images", pod.getStatus.getContainerStatuses() - .asScala - .map(_.getImage) - .mkString(", ")), + .asScala + .map(_.getImage) + .mkString(", ")), ("phase", pod.getStatus.getPhase()), ("status", pod.getStatus.getContainerStatuses().toString) ) + formatPairsBundle(details) + } + private def formatPairsBundle(pairs: Seq[(String, String)]) = { // Use more loggable format if value is null or empty - details.map { case (k, v) => - val newValue = Option(v).filter(_.nonEmpty).getOrElse("N/A") - s"\n\t $k: $newValue" + pairs.map { + case (k, v) => s"\n\t $k: ${Option(v).filter(_.nonEmpty).getOrElse("N/A")}" }.mkString("") } + + override def awaitCompletion(): Unit = { + podCompletedFuture.countDown() + logInfo(pod.map { p => + s"Container final statuses:\n\n${containersDescription(p)}" + }.getOrElse("No containers were found in the driver pod.")) + } + + private def containersDescription(p: Pod): String = { + p.getStatus.getContainerStatuses.asScala.map { status => + Seq( + ("Container name", status.getName), + ("Container image", status.getImage)) ++ + containerStatusDescription(status) + }.map(formatPairsBundle).mkString("\n\n") + } + + private def containerStatusDescription( + containerStatus: ContainerStatus): Seq[(String, String)] = { + val state = containerStatus.getState + Option(state.getRunning) + .orElse(Option(state.getTerminated)) + .orElse(Option(state.getWaiting)) + .map { + case running: ContainerStateRunning => + Seq( + ("Container state", "Running"), + ("Container started at", running.getStartedAt)) + case waiting: ContainerStateWaiting => + Seq( + ("Container state", "Waiting"), + ("Pending reason", waiting.getReason)) + case terminated: ContainerStateTerminated => + Seq( + ("Container state", "Terminated"), + ("Exit code", terminated.getExitCode.toString)) + case unknown => + throw new SparkException(s"Unexpected container status type ${unknown.getClass}.") + }.getOrElse(Seq(("Container state", "N/A"))) + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala index fa3c97c6957b5..32fc434cb693a 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala @@ -33,7 +33,7 @@ import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.deploy.kubernetes.{CompressionUtils, KubernetesCredentials} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.submit.{DriverPodKubernetesCredentialsProvider, KubernetesFileUtils} +import org.apache.spark.deploy.kubernetes.submit.{DriverPodKubernetesCredentialsProvider, KubernetesFileUtils, LoggingPodStatusWatcherImpl} import org.apache.spark.deploy.rest.kubernetes.v1.{AppResource, ContainerAppResource, HttpClientUtil, KubernetesCreateSubmissionRequest, KubernetesSparkRestApi, RemoteAppResource, UploadedAppResource} import org.apache.spark.internal.Logging import org.apache.spark.util.{ShutdownHookManager, Utils} @@ -83,7 +83,9 @@ private[spark] class Client( MEMORY_OVERHEAD_MIN)) private val driverContainerMemoryWithOverhead = driverContainerMemoryMb + memoryOverheadMb - private val waitForAppCompletion: Boolean = sparkConf.get(WAIT_FOR_APP_COMPLETION) + private val waitForAppCompletion = sparkConf.get(WAIT_FOR_APP_COMPLETION) + private val loggingInterval = Some(sparkConf.get(REPORT_INTERVAL)) + .filter( _ => waitForAppCompletion) private val secretBase64String = { val secretBytes = new Array[Byte](128) @@ -147,10 +149,8 @@ private[spark] class Client( driverServiceManager.start(kubernetesClient, kubernetesAppId, sparkConf) // start outer watch for status logging of driver pod // only enable interval logging if in waitForAppCompletion mode - val loggingInterval = if (waitForAppCompletion) sparkConf.get(REPORT_INTERVAL) else 0 - val driverPodCompletedLatch = new CountDownLatch(1) - val loggingWatch = new LoggingPodStatusWatcher(driverPodCompletedLatch, kubernetesAppId, - loggingInterval) + val loggingWatch = new LoggingPodStatusWatcherImpl( + kubernetesAppId, loggingInterval) Utils.tryWithResource(kubernetesClient .pods() .withName(kubernetesDriverPodName) @@ -230,7 +230,7 @@ private[spark] class Client( // wait if configured to do so if (waitForAppCompletion) { logInfo(s"Waiting for application $kubernetesAppId to finish...") - driverPodCompletedLatch.await() + loggingWatch.awaitCompletion() logInfo(s"Application $kubernetesAppId finished.") } else { logInfo(s"Application $kubernetesAppId successfully launched.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala index 23e3e09834372..e4ca5c1458abe 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala @@ -25,6 +25,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.{LoggingPodStatusWatcher, LoggingPodStatusWatcherImpl} import org.apache.spark.deploy.rest.kubernetes.v2.ResourceStagingServerSslOptionsProviderImpl import org.apache.spark.internal.Logging import org.apache.spark.launcher.SparkLauncher @@ -48,9 +49,11 @@ private[spark] class Client( appArgs: Array[String], sparkJars: Seq[String], sparkFiles: Seq[String], + waitForAppCompletion: Boolean, kubernetesClientProvider: SubmissionKubernetesClientProvider, initContainerComponentsProvider: DriverInitContainerComponentsProvider, - kubernetesCredentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider) + kubernetesCredentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider, + loggingPodStatusWatcher: LoggingPodStatusWatcher) extends Logging { private val kubernetesDriverPodName = sparkConf.get(KUBERNETES_DRIVER_POD_NAME) @@ -186,27 +189,40 @@ private[spark] class Client( .endContainer() .endSpec() .build() - val createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod) - try { - val driverOwnedResources = Seq(initContainerConfigMap) ++ - maybeSubmittedDependenciesSecret.toSeq ++ - credentialsSecret.toSeq - val driverPodOwnerReference = new OwnerReferenceBuilder() - .withName(createdDriverPod.getMetadata.getName) - .withApiVersion(createdDriverPod.getApiVersion) - .withUid(createdDriverPod.getMetadata.getUid) - .withKind(createdDriverPod.getKind) - .withController(true) - .build() - driverOwnedResources.foreach { resource => - val originalMetadata = resource.getMetadata - originalMetadata.setOwnerReferences(Collections.singletonList(driverPodOwnerReference)) + Utils.tryWithResource( + kubernetesClient + .pods() + .withName(resolvedDriverPod.getMetadata.getName) + .watch(loggingPodStatusWatcher)) { _ => + val createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod) + try { + val driverOwnedResources = Seq(initContainerConfigMap) ++ + maybeSubmittedDependenciesSecret.toSeq ++ + credentialsSecret.toSeq + val driverPodOwnerReference = new OwnerReferenceBuilder() + .withName(createdDriverPod.getMetadata.getName) + .withApiVersion(createdDriverPod.getApiVersion) + .withUid(createdDriverPod.getMetadata.getUid) + .withKind(createdDriverPod.getKind) + .withController(true) + .build() + driverOwnedResources.foreach { resource => + val originalMetadata = resource.getMetadata + originalMetadata.setOwnerReferences(Collections.singletonList(driverPodOwnerReference)) + } + kubernetesClient.resourceList(driverOwnedResources: _*).createOrReplace() + } catch { + case e: Throwable => + kubernetesClient.pods().delete(createdDriverPod) + throw e + } + if (waitForAppCompletion) { + logInfo(s"Waiting for application $kubernetesAppId to finish...") + loggingPodStatusWatcher.awaitCompletion() + logInfo(s"Application $kubernetesAppId finished.") + } else { + logInfo(s"Deployed Spark application $kubernetesAppId into Kubernetes.") } - kubernetesClient.resourceList(driverOwnedResources: _*).createOrReplace() - } catch { - case e: Throwable => - kubernetesClient.pods().delete(createdDriverPod) - throw e } } } @@ -274,6 +290,9 @@ private[spark] object Client { val kubernetesClientProvider = new SubmissionKubernetesClientProviderImpl(sparkConf) val kubernetesCredentialsMounterProvider = new DriverPodKubernetesCredentialsMounterProviderImpl(sparkConf, kubernetesAppId) + val waitForAppCompletion = sparkConf.get(WAIT_FOR_APP_COMPLETION) + val loggingInterval = Option(sparkConf.get(REPORT_INTERVAL)).filter( _ => waitForAppCompletion) + val loggingPodStatusWatcher = new LoggingPodStatusWatcherImpl(kubernetesAppId, loggingInterval) new Client( appName, kubernetesAppId, @@ -282,8 +301,10 @@ private[spark] object Client { appArgs, sparkJars, sparkFiles, + waitForAppCompletion, kubernetesClientProvider, initContainerComponentsProvider, - kubernetesCredentialsMounterProvider).run() + kubernetesCredentialsMounterProvider, + loggingPodStatusWatcher).run() } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala index f0282dbb6d31a..9ad46e52747fd 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala @@ -19,7 +19,7 @@ package org.apache.spark.deploy.kubernetes.submit.v2 import java.io.File import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, DoneablePod, HasMetadata, Pod, PodBuilder, PodList, Secret, SecretBuilder} -import io.fabric8.kubernetes.client.KubernetesClient +import io.fabric8.kubernetes.client.{KubernetesClient, Watch} import io.fabric8.kubernetes.client.dsl.{MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource} import org.hamcrest.{BaseMatcher, Description} import org.mockito.{AdditionalAnswers, ArgumentCaptor, Mock, MockitoAnnotations} @@ -35,6 +35,7 @@ import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.deploy.kubernetes.SparkPodInitContainerBootstrap import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.LoggingPodStatusWatcher class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val JARS_RESOURCE = SubmittedResourceIdAndSecret("jarsId", "jarsSecret") @@ -59,13 +60,13 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val SPARK_JARS = Seq( "hdfs://localhost:9000/app/jars/jar1.jar", "file:///app/jars/jar2.jar") private val RESOLVED_SPARK_JARS = Seq( - "hdfs://localhost:9000/app/jars/jar1.jar", "file:///var/data/spark-jars/jar2.jar") + "hdfs://localhost:9000/app/jars/jar1.jar", "file:///var/data/spark-jars/jar2.jar") private val RESOLVED_SPARK_REMOTE_AND_LOCAL_JARS = Seq( - "/var/data/spark-jars/jar1.jar", "/var/data/spark-jars/jar2.jar") + "/var/data/spark-jars/jar1.jar", "/var/data/spark-jars/jar2.jar") private val SPARK_FILES = Seq( - "hdfs://localhost:9000/app/files/file1.txt", "file:///app/files/file2.txt") + "hdfs://localhost:9000/app/files/file1.txt", "file:///app/files/file2.txt") private val RESOLVED_SPARK_FILES = Seq( - "hdfs://localhost:9000/app/files/file1.txt", "file:///var/data/spark-files/file2.txt") + "hdfs://localhost:9000/app/files/file1.txt", "file:///var/data/spark-files/file2.txt") private val INIT_CONTAINER_SECRET = new SecretBuilder() .withNewMetadata() .withName(INIT_CONTAINER_SECRET_NAME) @@ -140,6 +141,12 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private var credentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider = _ @Mock private var credentialsMounter: DriverPodKubernetesCredentialsMounter = _ + @Mock + private var loggingPodStatusWatcher: LoggingPodStatusWatcher = _ + @Mock + private var namedPodResource: PodResource[Pod, DoneablePod] = _ + @Mock + private var watch: Watch = _ before { MockitoAnnotations.initMocks(this) @@ -177,6 +184,8 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { .build() } }) + when(podOps.withName(APP_ID)).thenReturn(namedPodResource) + when(namedPodResource.watch(loggingPodStatusWatcher)).thenReturn(watch) when(containerLocalizedFilesResolver.resolveSubmittedAndRemoteSparkJars()) .thenReturn(RESOLVED_SPARK_REMOTE_AND_LOCAL_JARS) when(containerLocalizedFilesResolver.resolveSubmittedSparkJars()) @@ -278,6 +287,25 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { }) } + test("Waiting for completion should await completion on the status watcher.") { + expectationsForNoMountedCredentials() + expectationsForNoDependencyUploader() + new Client( + APP_NAME, + APP_ID, + MAIN_CLASS, + SPARK_CONF, + APP_ARGS, + SPARK_JARS, + SPARK_FILES, + true, + kubernetesClientProvider, + initContainerComponentsProvider, + credentialsMounterProvider, + loggingPodStatusWatcher).run() + verify(loggingPodStatusWatcher).awaitCompletion() + } + private def expectationsForNoDependencyUploader(): Unit = { when(initContainerComponentsProvider .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) @@ -353,9 +381,11 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { APP_ARGS, SPARK_JARS, SPARK_FILES, + false, kubernetesClientProvider, initContainerComponentsProvider, - credentialsMounterProvider).run() + credentialsMounterProvider, + loggingPodStatusWatcher).run() val podMatcher = new BaseMatcher[Pod] { override def matches(o: scala.Any): Boolean = { o match { From 8f3d96532f966c5ba8cd5d9bebd2e0743067fb91 Mon Sep 17 00:00:00 2001 From: mccheah Date: Tue, 23 May 2017 12:57:45 -0700 Subject: [PATCH 113/156] Replace submission v1 with submission v2. (#286) * Replace submission v1 with submission v2. * Address documentation changes. * Fix documentation --- conf/kubernetes-resource-staging-server.yaml | 63 ++ .../org/apache/spark/deploy/SparkSubmit.scala | 2 +- dev/.rat-excludes | 1 - docs/running-on-kubernetes.md | 416 ++++++---- ....kubernetes.submit.v1.DriverServiceManager | 2 - .../deploy/kubernetes/CompressionUtils.scala | 74 +- .../SparkPodInitContainerBootstrap.scala | 2 +- .../spark/deploy/kubernetes/config.scala | 84 +- .../kubernetes/submit/{v2 => }/Client.scala | 5 +- .../ContainerLocalizedFilesResolver.scala | 2 +- .../ContainerNameEqualityPredicate.scala | 2 +- ...riverInitContainerComponentsProvider.scala | 4 +- ...riverPodKubernetesCredentialsMounter.scala | 2 +- ...KubernetesCredentialsMounterProvider.scala | 3 +- .../ExecutorInitContainerConfiguration.scala | 2 +- .../submit/{v2 => }/InitContainerUtil.scala | 2 +- ...opertiesConfigMapFromScalaMapBuilder.scala | 2 +- .../SparkInitContainerConfigMapBuilder.scala | 3 +- .../SubmissionKubernetesClientProvider.scala | 2 +- ...dDependencyInitContainerConfigPlugin.scala | 2 +- .../SubmittedDependencySecretBuilder.scala | 2 +- .../SubmittedDependencyUploaderImpl.scala | 5 +- .../submit/{v2 => }/SubmittedResources.scala | 2 +- .../deploy/kubernetes/submit/v1/Client.scala | 743 ------------------ .../submit/v1/DriverServiceManager.scala | 99 --- ...DriverSubmitSslConfigurationProvider.scala | 354 --------- ...rnalSuppliedUrisDriverServiceManager.scala | 105 --- .../submit/v1/KubernetesResourceCleaner.scala | 53 -- .../v1/NodePortUrisDriverServiceManager.scala | 70 -- ...esSparkRestApi.scala => FileFetcher.scala} | 24 +- ...SparkDependencyDownloadInitContainer.scala | 50 +- .../{v1 => }/PemsToKeyStoreConverter.scala | 3 +- .../{v2 => }/ResourceStagingServer.scala | 2 +- ...ourceStagingServerSslOptionsProvider.scala | 3 +- .../{v2 => }/ResourceStagingService.scala | 4 +- .../{v2 => }/ResourceStagingServiceImpl.scala | 11 +- .../ResourceStagingServiceRetrofit.scala | 4 +- .../{v2 => }/RetrofitClientFactory.scala | 2 +- .../{v2 => }/SparkConfPropertiesParser.scala | 4 +- .../rest/kubernetes/v1/HttpClientUtil.scala | 131 --- .../v1/KubernetesRestProtocolMessages.scala | 75 -- .../v1/KubernetesSparkRestServer.scala | 483 ------------ .../v1/MultiServerFeignTarget.scala | 89 --- .../spark/deploy/kubernetes/SSLUtils.scala | 2 +- .../submit/{v2 => }/ClientV2Suite.scala | 3 +- ...ContainerLocalizedFilesResolverSuite.scala | 2 +- ...PodKubernetesCredentialsMounterSuite.scala | 4 +- ...cutorInitContainerConfigurationSuite.scala | 2 +- .../submit/{v2 => }/SSLFilePairs.scala | 2 +- ...rkInitContainerConfigMapBuilderSuite.scala | 2 +- ...ndencyInitContainerConfigPluginSuite.scala | 6 +- ...ubmittedDependencySecretBuilderSuite.scala | 4 +- .../SubmittedDependencyUploaderSuite.scala | 4 +- ...DependencyDownloadInitContainerSuite.scala | 4 +- ...StagingServerSslOptionsProviderSuite.scala | 2 +- .../{v2 => }/ResourceStagingServerSuite.scala | 2 +- .../ResourceStagingServiceImplSuite.scala | 2 +- .../src/main/docker/driver-v2/Dockerfile | 43 - .../src/main/docker/driver/Dockerfile | 18 +- .../Dockerfile | 2 +- .../docker/resource-staging-server/Dockerfile | 2 +- .../integrationtest/KubernetesSuite.scala | 248 +++++- .../KubernetesTestComponents.scala | 29 +- .../integrationtest/KubernetesV1Suite.scala | 339 -------- .../integrationtest/KubernetesV2Suite.scala | 265 ------- .../ResourceStagingServerLauncher.scala | 2 +- .../docker/SparkDockerImageBuilder.scala | 10 +- 67 files changed, 668 insertions(+), 3324 deletions(-) create mode 100644 conf/kubernetes-resource-staging-server.yaml delete mode 100644 resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.kubernetes.submit.v1.DriverServiceManager rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/Client.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/ContainerLocalizedFilesResolver.scala (97%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/ContainerNameEqualityPredicate.scala (95%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/DriverInitContainerComponentsProvider.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/DriverPodKubernetesCredentialsMounter.scala (99%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/DriverPodKubernetesCredentialsMounterProvider.scala (92%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/ExecutorInitContainerConfiguration.scala (97%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/InitContainerUtil.scala (97%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/PropertiesConfigMapFromScalaMapBuilder.scala (97%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SparkInitContainerConfigMapBuilder.scala (95%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SubmissionKubernetesClientProvider.scala (97%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SubmittedDependencyInitContainerConfigPlugin.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SubmittedDependencySecretBuilder.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SubmittedDependencyUploaderImpl.scala (95%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SubmittedResources.scala (96%) delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverServiceManager.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/ExternalSuppliedUrisDriverServiceManager.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/KubernetesResourceCleaner.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/NodePortUrisDriverServiceManager.scala rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{v1/KubernetesSparkRestApi.scala => FileFetcher.scala} (56%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/KubernetesSparkDependencyDownloadInitContainer.scala (95%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{v1 => }/PemsToKeyStoreConverter.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/ResourceStagingServer.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/ResourceStagingServerSslOptionsProvider.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/ResourceStagingService.scala (97%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/ResourceStagingServiceImpl.scala (91%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/ResourceStagingServiceRetrofit.scala (93%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/RetrofitClientFactory.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/SparkConfPropertiesParser.scala (94%) delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/HttpClientUtil.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/MultiServerFeignTarget.scala rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/ClientV2Suite.scala (99%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/ContainerLocalizedFilesResolverSuite.scala (98%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/DriverPodKubernetesCredentialsMounterSuite.scala (99%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/ExecutorInitContainerConfigurationSuite.scala (97%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SSLFilePairs.scala (94%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SparkInitContainerConfigMapBuilderSuite.scala (98%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SubmittedDependencyInitContainerConfigPluginSuite.scala (96%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SubmittedDependencySecretBuilderSuite.scala (97%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SubmittedDependencyUploaderSuite.scala (97%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/KubernetesSparkDependencyDownloadInitContainerSuite.scala (98%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/ResourceStagingServerSslOptionsProviderSuite.scala (99%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/ResourceStagingServerSuite.scala (99%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/ResourceStagingServiceImplSuite.scala (98%) delete mode 100644 resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-v2/Dockerfile rename resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/{driver-init => init-container}/Dockerfile (95%) delete mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala delete mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala diff --git a/conf/kubernetes-resource-staging-server.yaml b/conf/kubernetes-resource-staging-server.yaml new file mode 100644 index 0000000000000..de0da3edcb901 --- /dev/null +++ b/conf/kubernetes-resource-staging-server.yaml @@ -0,0 +1,63 @@ +--- +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: spark-resource-staging-server +spec: + replicas: 1 + template: + metadata: + labels: + resource-staging-server-instance: default + spec: + volumes: + - name: resource-staging-server-properties + configMap: + name: spark-resource-staging-server-config + containers: + - name: spark-resource-staging-server + image: kubespark/spark-resource-staging-server:v2.1.0-kubernetes-0.1.0-alpha.3 + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 100m + memory: 256Mi + volumeMounts: + - name: resource-staging-server-properties + mountPath: '/etc/spark-resource-staging-server' + args: + - '/etc/spark-resource-staging-server/resource-staging-server.properties' +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: spark-resource-staging-server-config +data: + resource-staging-server.properties: | + spark.kubernetes.resourceStagingServer.port=10000 + spark.ssl.kubernetes.resourceStagingServer.enabled=false +# Other possible properties are listed below, primarily for setting up TLS. The paths given by KeyStore, password, and PEM files here should correspond to +# files that are securely mounted into the resource staging server container, via e.g. secret volumes. +# spark.ssl.kubernetes.resourceStagingServer.keyStore=/mnt/secrets/resource-staging-server/keyStore.jks +# spark.ssl.kubernetes.resourceStagingServer.keyStorePassword=changeit +# spark.ssl.kubernetes.resourceStagingServer.keyPassword=changeit +# spark.ssl.kubernetes.resourceStagingServer.keyStorePasswordFile=/mnt/secrets/resource-staging-server/keystore-password.txt +# spark.ssl.kubernetes.resourceStagingServer.keyPasswordFile=/mnt/secrets/resource-staging-server/keystore-key-password.txt +# spark.ssl.kubernetes.resourceStagingServer.keyPem=/mnt/secrets/resource-staging-server/key.pem +# spark.ssl.kubernetes.resourceStagingServer.serverCertPem=/mnt/secrets/resource-staging-server/cert.pem +--- +apiVersion: v1 +kind: Service +metadata: + name: spark-resource-staging-service +spec: + type: NodePort + selector: + resource-staging-server-instance: default + ports: + - protocol: TCP + port: 10000 + targetPort: 10000 + nodePort: 31000 diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index aeccd0088d76c..59ccf3af24ce7 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -619,7 +619,7 @@ object SparkSubmit { } if (isKubernetesCluster) { - childMainClass = "org.apache.spark.deploy.kubernetes.submit.v1.Client" + childMainClass = "org.apache.spark.deploy.kubernetes.submit.Client" childArgs += args.primaryResource childArgs += args.mainClass childArgs ++= args.childArgs diff --git a/dev/.rat-excludes b/dev/.rat-excludes index 6a805b3293a6f..6be1c72bc6cfb 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -103,4 +103,3 @@ org.apache.spark.scheduler.ExternalClusterManager org.apache.spark.deploy.yarn.security.ServiceCredentialProvider spark-warehouse structured-streaming/* -org.apache.spark.deploy.kubernetes.submit.v1.DriverServiceManager diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 5b7bb6cc612c5..98393cbbbba2d 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -3,15 +3,25 @@ layout: global title: Running Spark on Kubernetes --- -Support for running on [Kubernetes](https://kubernetes.io/docs/whatisk8s/) is available in experimental status. The feature set is -currently limited and not well-tested. This should not be used in production environments. +Support for running on [Kubernetes](https://kubernetes.io/docs/whatisk8s/) is available in experimental status. The +feature set is currently limited and not well-tested. This should not be used in production environments. ## Prerequisites -* You must have a running Kubernetes cluster with access configured to it using [kubectl](https://kubernetes.io/docs/user-guide/prereqs/). If you do not already have a working Kubernetes cluster, you may setup a test cluster on your local machine using [minikube](https://kubernetes.io/docs/getting-started-guides/minikube/). - * We recommend that minikube be updated to the most recent version (0.18.0 at the time of this documentation), as some earlier versions may not start up the kubernetes cluster with all the necessary components. -* You must have appropriate permissions to create and list [pods](https://kubernetes.io/docs/user-guide/pods/), [nodes](https://kubernetes.io/docs/admin/node/) and [services](https://kubernetes.io/docs/user-guide/services/) in your cluster. You can verify that you can list these resources by running `kubectl get nodes`, `kubectl get pods` and `kubectl get svc` which should give you a list of nodes, pods and services (if any) respectively. -* You must have a spark distribution with Kubernetes support. This may be obtained from the [release tarball](https://github.com/apache-spark-on-k8s/spark/releases) or by [building Spark with Kubernetes support](../resource-managers/kubernetes/README.md#building-spark-with-kubernetes-support). +* You must have a running Kubernetes cluster with access configured to it +using [kubectl](https://kubernetes.io/docs/user-guide/prereqs/). If you do not already have a working Kubernetes +cluster, you may setup a test cluster on your local machine using +[minikube](https://kubernetes.io/docs/getting-started-guides/minikube/). + * We recommend that minikube be updated to the most recent version (0.19.0 at the time of this documentation), as some + earlier versions may not start up the kubernetes cluster with all the necessary components. +* You must have appropriate permissions to create and list [pods](https://kubernetes.io/docs/user-guide/pods/), +[ConfigMaps](https://kubernetes.io/docs/tasks/configure-pod-container/configmap/) and +[secrets](https://kubernetes.io/docs/concepts/configuration/secret/) in your cluster. You can verify that +you can list these resources by running `kubectl get pods` `kubectl get configmap`, and `kubectl get secrets` which +should give you a list of pods and configmaps (if any) respectively. +* You must have a spark distribution with Kubernetes support. This may be obtained from the +[release tarball](https://github.com/apache-spark-on-k8s/spark/releases) or by +[building Spark with Kubernetes support](../resource-managers/kubernetes/README.md#building-spark-with-kubernetes-support). ## Driver & Executor Images @@ -19,7 +29,8 @@ Kubernetes requires users to supply images that can be deployed into containers be run in a container runtime environment that Kubernetes supports. Docker is a container runtime environment that is frequently used with Kubernetes, so Spark provides some support for working with Docker to get started quickly. -If you wish to use pre-built docker images, you may use the images published in [kubespark](https://hub.docker.com/u/kubespark/). The images are as follows: +If you wish to use pre-built docker images, you may use the images published in +[kubespark](https://hub.docker.com/u/kubespark/). The images are as follows: @@ -31,20 +42,27 @@ If you wish to use pre-built docker images, you may use the images published in + + + +
    ComponentImage
    Spark Executor Image kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2
    Spark Initialization Imagekubespark/spark-init:v2.1.0-kubernetes-0.1.0-alpha.2
    -You may also build these docker images from sources, or customize them as required. Spark distributions include the Docker files for the driver and the executor at -`dockerfiles/driver/Dockerfile` and `dockerfiles/executor/Dockerfile`, respectively. Use these Docker files to build the -Docker images, and then tag them with the registry that the images should be sent to. Finally, push the images to the -registry. +You may also build these docker images from sources, or customize them as required. Spark distributions include the +Docker files for the driver, executor, and init-container at `dockerfiles/driver/Dockerfile`, +`dockerfiles/executor/Dockerfile`, and `dockerfiles/init-container/Dockerfile` respectively. Use these Docker files to +build the Docker images, and then tag them with the registry that the images should be sent to. Finally, push the images +to the registry. For example, if the registry host is `registry-host` and the registry is listening on port 5000: cd $SPARK_HOME docker build -t registry-host:5000/spark-driver:latest -f dockerfiles/driver/Dockerfile . docker build -t registry-host:5000/spark-executor:latest -f dockerfiles/executor/Dockerfile . + docker build -t registry-host:5000/spark-init:latest -f dockerfiles/init-container/Dockerfile . docker push registry-host:5000/spark-driver:latest docker push registry-host:5000/spark-executor:latest + docker push registry-host:5000/spark-init:latest ## Submitting Applications to Kubernetes @@ -60,7 +78,8 @@ are set up as described above: --conf spark.app.name=spark-pi \ --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 \ --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 \ - examples/jars/spark_examples_2.11-2.2.0.jar + --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.1.0-alpha.2 \ + local:///opt/spark/examples/jars/spark_examples_2.11-2.2.0.jar The Spark master, specified either via passing the `--master` command line argument to `spark-submit` or by setting `spark.master` in the application's configuration, must be a URL with the format `k8s://`. Prefixing the @@ -80,13 +99,53 @@ In the above example, the specific Kubernetes cluster can be used with spark sub Note that applications can currently only be executed in cluster mode, where the driver and its executors are running on the cluster. -### Specifying input files +Finally, notice that in the above example we specify a jar with a specific URI with a scheme of `local://`. This URI is +the location of the example jar that is already in the Docker image. Using dependencies that are on your machine's local +disk is discussed below. + +## Dependency Management + +Application dependencies that are being submitted from your machine need to be sent to a **resource staging server** +that the driver and executor can then communicate with to retrieve those dependencies. A YAML file denoting a minimal +set of Kubernetes resources that runs this service is located in the file `conf/kubernetes-resource-staging-server.yaml`. +This YAML file configures a Deployment with one pod running the resource staging server configured with a ConfigMap, +and exposes the server through a Service with a fixed NodePort. Deploying a resource staging server with the included +YAML file requires you to have permissions to create Deployments, Services, and ConfigMaps. + +To run the resource staging server with default configurations, the Kubernetes resources can be created: + + kubectl create -f conf/kubernetes-resource-staging-server.yaml + +and then you can compute the value of Pi as follows: + + bin/spark-submit \ + --deploy-mode cluster \ + --class org.apache.spark.examples.SparkPi \ + --master k8s://: \ + --kubernetes-namespace default \ + --conf spark.executor.instances=5 \ + --conf spark.app.name=spark-pi \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.resourceStagingServer.uri=http://:31000 \ + examples/jars/spark_examples_2.11-2.2.0.jar + +The Docker image for the resource staging server may also be built from source, in a similar manner to the driver +and executor images. The Dockerfile is provided in `dockerfiles/resource-staging-server/Dockerfile`. + +The provided YAML file specifically sets the NodePort to 31000 on the service's specification. If port 31000 is not +available on any of the nodes of your cluster, you should remove the NodePort field from the service's specification +and allow the Kubernetes cluster to determine the NodePort itself. Be sure to provide the correct port in the resource +staging server URI when submitting your application, in accordance to the NodePort chosen by the Kubernetes cluster. + +### Dependency Management Without The Resource Staging Server -Spark supports specifying JAR paths that are either on the submitting host's disk, or are located on the disk of the -driver and executors. Refer to the [application submission](submitting-applications.html#advanced-dependency-management) -section for details. Note that files specified with the `local://` scheme should be added to the container image of both -the driver and the executors. Files without a scheme or with the scheme `file://` are treated as being on the disk of -the submitting machine, and are uploaded to the driver running in Kubernetes before launching the application. +Note that this resource staging server is only required for submitting local dependencies. If your application's +dependencies are all hosted in remote locations like HDFS or http servers, they may be referred to by their appropriate +remote URIs. Also, application dependencies can be pre-mounted into custom-built Docker images. Those dependencies +can be added to the classpath by referencing them with `local://` URIs and/or setting the `SPARK_EXTRA_CLASSPATH` +environment variable in your Dockerfiles. ### Accessing Kubernetes Clusters @@ -111,70 +170,127 @@ If our local proxy were listening on port 8001, we would have our submission loo --conf spark.app.name=spark-pi \ --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 \ --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 \ - examples/jars/spark_examples_2.11-2.2.0.jar + --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.1.0-alpha.2 \ + local:///opt/spark/examples/jars/spark_examples_2.11-2.2.0.jar Communication between Spark and Kubernetes clusters is performed using the fabric8 kubernetes-client library. The above mechanism using `kubectl proxy` can be used when we have authentication providers that the fabric8 kubernetes-client library does not support. Authentication using X509 Client Certs and OAuth tokens is currently supported. +## Dynamic Executor Scaling + +Spark on Kubernetes supports Dynamic Allocation with cluster mode. This mode requires running +an external shuffle service. This is typically a [daemonset](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/) +with a provisioned [hostpath](https://kubernetes.io/docs/concepts/storage/volumes/#hostpath) volume. +This shuffle service may be shared by executors belonging to different SparkJobs. Using Spark with dynamic allocation +on Kubernetes assumes that a cluster administrator has set up one or more shuffle-service daemonsets in the cluster. + +A sample configuration file is provided in `conf/kubernetes-shuffle-service.yaml` which can be customized as needed +for a particular cluster. It is important to note that `spec.template.metadata.labels` are setup appropriately for the shuffle +service because there may be multiple shuffle service instances running in a cluster. The labels give Spark applications +a way to target a particular shuffle service. + +For example, if the shuffle service we want to use is in the default namespace, and +has pods with labels `app=spark-shuffle-service` and `spark-version=2.1.0`, we can +use those tags to target that particular shuffle service at job launch time. In order to run a job with dynamic allocation enabled, +the command may then look like the following: + + bin/spark-submit \ + --deploy-mode cluster \ + --class org.apache.spark.examples.GroupByTest \ + --master k8s://: \ + --kubernetes-namespace default \ + --conf spark.app.name=group-by-test \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:latest \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:latest \ + --conf spark.dynamicAllocation.enabled=true \ + --conf spark.shuffle.service.enabled=true \ + --conf spark.kubernetes.shuffle.namespace=default \ + --conf spark.kubernetes.shuffle.labels="app=spark-shuffle-service,spark-version=2.1.0" \ + local:///opt/spark/examples/jars/spark_examples_2.11-2.2.0.jar 10 400000 2 + ## Advanced - -### Setting Up TLS For Submitting the Driver - -When submitting to Kubernetes, a pod is started for the driver, and the pod starts an HTTP server. This HTTP server -receives the driver's configuration, including uploaded driver jars, from the client before starting the application. -Spark supports using TLS to encrypt the traffic in this bootstrapping process. It is recommended to configure this -whenever possible. - -See the [security page](security.html) and [configuration](configuration.html) sections for more information on -configuring TLS; use the prefix `spark.ssl.kubernetes.driversubmitserver` in configuring the TLS-related fields in the context -of submitting to Kubernetes. For example, to set the trustStore used when the local machine communicates with the driver -pod in starting the application, set `spark.ssl.kubernetes.driversubmitserver.trustStore`. - -One note about the keyStore is that it can be specified as either a file on the client machine or a file in the -container image's disk. Thus `spark.ssl.kubernetes.driversubmitserver.keyStore` can be a URI with a scheme of either `file:` -or `local:`. A scheme of `file:` corresponds to the keyStore being located on the client machine; it is mounted onto -the driver container as a [secret volume](https://kubernetes.io/docs/user-guide/secrets/). When the URI has the scheme -`local:`, the file is assumed to already be on the container's disk at the appropriate path. - -Finally, the submission server and client can be configured to use PEM files instead of Java keyStores. When using -this mode, set `spark.ssl.kubernetes.driversubmitserver.keyPem` and -`spark.ssl.kubernetes.driversubmitserver.serverCertPem` to configure the key and certificate files on the driver -submission server. These files can be uploaded from the submitter's machine if they have no scheme or a scheme of -`file:`, or they can be located on the container's disk if they have the scheme `local:`. The client's certificate -file should be provided via setting `spark.ssl.kubernetes.driversubmitserver.clientCertPem`, and this file must be -located on the submitting machine's local disk. - -### Submission of Local Files through Ingress/External controller - -Kubernetes pods run with their own IP address space. If Spark is run in cluster mode, the driver pod may not be -accessible to the submitter. However, the submitter needs to send local dependencies from its local disk to the driver -pod. - -By default, Spark will place a [Service](https://kubernetes.io/docs/user-guide/services/#type-nodeport) with a NodePort -that is opened on every node. The submission client will then contact the driver at one of the node's -addresses with the appropriate service port. - -There may be cases where the nodes cannot be reached by the submission client. For example, the cluster may -only be reachable through an external load balancer. The user may provide their own external URI for Spark driver -services. To use a your own external URI instead of a node's IP and node port, first set -`spark.kubernetes.driver.serviceManagerType` to `ExternalAnnotation`. A service will be created with the annotation -`spark-job.alpha.apache.org/provideExternalUri`, and this service routes to the driver pod. You will need to run a -separate process that watches the API server for services that are created with this annotation in the application's -namespace (set by `spark.kubernetes.namespace`). The process should determine a URI that routes to this service -(potentially configuring infrastructure to handle the URI behind the scenes), and patch the service to include an -annotation `spark-job.alpha.apache.org/resolvedExternalUri`, which has its value as the external URI that your process -has provided (e.g. `https://example.com:8080/my-job`). - -Note that the URI provided in the annotation needs to route traffic to the appropriate destination on the pod, which has -a empty path portion of the URI. This means the external URI provider will likely need to rewrite the path from the -external URI to the destination on the pod, e.g. https://example.com:8080/spark-app-1/submit will need to route traffic -to https://:/. Note that the paths of these two URLs are different. - -If the above is confusing, keep in mind that this functionality is only necessary if the submitter cannot reach any of -the nodes at the driver's node port. It is recommended to use the default configuration with the node port service -whenever possible. + +### Securing the Resource Staging Server with TLS + +The default configuration of the resource staging server is not secured with TLS. It is highly recommended to configure +this to protect the secrets and jars/files being submitted through the staging server. + +The YAML file in `conf/kubernetes-resource-staging-server.yaml` includes a ConfigMap resource that holds the resource +staging server's configuration. The properties can be adjusted here to make the resource staging server listen over TLS. +Refer to the [security](security.html) page for the available settings related to TLS. The namespace for the +resource staging server is `kubernetes.resourceStagingServer`, so for example the path to the server's keyStore would +be set by `spark.ssl.kubernetes.resourceStagingServer.keyStore`. + +In addition to the settings specified by the previously linked security page, the resource staging server supports the +following additional configurations: + + + + + + + + + + + + + + + + + + + + + + + +
    Property NameDefaultMeaning
    spark.ssl.kubernetes.resourceStagingServer.keyPem(none) + Private key file encoded in PEM format that the resource staging server uses to secure connections over TLS. If this + is specified, the associated public key file must be specified in + spark.ssl.kubernetes.resourceStagingServer.serverCertPem. PEM files and a keyStore file (set by + spark.ssl.kubernetes.resourceStagingServer.keyStore) cannot both be specified at the same time. +
    spark.ssl.kubernetes.resourceStagingServer.serverCertPem(none) + Certificate file encoded in PEM format that the resource staging server uses to secure connections over TLS. If this + is specified, the associated private key file must be specified in + spark.ssl.kubernetes.resourceStagingServer.keyPem. PEM files and a keyStore file (set by + spark.ssl.kubernetes.resourceStagingServer.keyStore) cannot both be specified at the same time. +
    spark.ssl.kubernetes.resourceStagingServer.keyStorePasswordFile(none) + Provides the KeyStore password through a file in the container instead of a static value. This is useful if the + keyStore's password is to be mounted into the container with a secret. +
    spark.ssl.kubernetes.resourceStagingServer.keyPasswordFile(none) + Provides the keyStore's key password using a file in the container instead of a static value. This is useful if the + keyStore's key password is to be mounted into the container with a secret. +
    + +Note that while the properties can be set in the ConfigMap, you will still need to consider the means of mounting the +appropriate secret files into the resource staging server's container. A common mechanism that is used for this is +to use [Kubernetes secrets](https://kubernetes.io/docs/concepts/configuration/secret/) that are mounted as secret +volumes. Refer to the appropriate Kubernetes documentation for guidance and adjust the resource staging server's +specification in the provided YAML file accordingly. + +Finally, when you submit your application, you must specify either a trustStore or a PEM-encoded certificate file to +communicate with the resource staging server over TLS. The trustStore can be set with +`spark.ssl.kubernetes.resourceStagingServer.trustStore`, or a certificate file can be set with +`spark.ssl.kubernetes.resourceStagingServer.clientCertPem`. For example, our SparkPi example now looks like this: + + bin/spark-submit \ + --deploy-mode cluster \ + --class org.apache.spark.examples.SparkPi \ + --master k8s://https://: \ + --kubernetes-namespace default \ + --conf spark.executor.instances=5 \ + --conf spark.app.name=spark-pi \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.resourceStagingServer.uri=https://:31000 \ + --conf spark.ssl.kubernetes.resourceStagingServer.enabled=true \ + --conf spark.ssl.kubernetes.resourceStagingServer.clientCertPem=/home/myuser/cert.pem \ + examples/jars/spark_examples_2.11-2.2.0.jar ### Spark Properties @@ -208,6 +324,16 @@ from the other deployment modes. See the [configuration page](configuration.html Docker tag format. + + spark.kubernetes.initcontainer.docker.image + spark-init:2.2.0 + + Docker image to use for the init-container that is run before the driver and executor containers. Specify this using + the standard Docker tag format. The + init-container is responsible for fetching application dependencies from both remote locations like HDFS or S3, + and from the resource staging server, if applicable. + + spark.kubernetes.shuffle.namespace default @@ -218,7 +344,7 @@ from the other deployment modes. See the [configuration page](configuration.html spark.kubernetes.shuffle.labels - (none) + (none) Labels that will be used to look up shuffle service pods. This should be a comma-separated list of label key-value pairs, where each label is in the format key=value. The labels chosen must be such that @@ -334,123 +460,113 @@ from the other deployment modes. See the [configuration page](configuration.html - spark.kubernetes.driver.submissionServerMemory - 256m + spark.kubernetes.driver.labels + (none) - The amount of memory to allocate for the driver submission server. + Custom labels that will be added to the driver pod. This should be a comma-separated list of label key-value pairs, + where each label is in the format key=value. Note that Spark also adds its own labels to the driver pod + for bookkeeping purposes. - spark.kubernetes.driver.memoryOverhead - (driverMemory + driverSubmissionServerMemory) * 0.10, with minimum of 384 + spark.kubernetes.driver.annotations + (none) - The amount of off-heap memory (in megabytes) to be allocated for the driver and the driver submission server. This - is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to - grow with the driver size (typically 6-10%). + Custom annotations that will be added to the driver pod. This should be a comma-separated list of label key-value + pairs, where each annotation is in the format key=value. - spark.kubernetes.driver.labels + spark.kubernetes.driver.pod.name (none) - Custom labels that will be added to the driver pod. This should be a comma-separated list of label key-value pairs, - where each label is in the format key=value. Note that Spark also adds its own labels to the driver pod - for bookkeeping purposes. + Name of the driver pod. If not set, the driver pod name is set to "spark.app.name" suffixed by the current timestamp + to avoid name conflicts. - spark.kubernetes.driver.annotations + spark.kubernetes.submission.waitAppCompletion + true + + In cluster mode, whether to wait for the application to finish before exiting the launcher process. When changed to + false, the launcher has a "fire-and-forget" behavior when launching the Spark job. + + + + spark.kubernetes.resourceStagingServer.port + 10000 + + Port for the resource staging server to listen on when it is deployed. + + + + spark.kubernetes.resourceStagingServer.uri (none) - Custom annotations that will be added to the driver pod. This should be a comma-separated list of label key-value - pairs, where each annotation is in the format key=value. + URI of the resource staging server that Spark should use to distribute the application's local dependencies. Note + that by default, this URI must be reachable by both the submitting machine and the pods running in the cluster. If + one URI is not simultaneously reachable both by the submitter and the driver/executor pods, configure the pods to + access the staging server at a different URI by setting + spark.kubernetes.resourceStagingServer.internal.uri as discussed below. - spark.kubernetes.driverSubmissionTimeout - 60s + spark.kubernetes.resourceStagingServer.internal.uri + Value of spark.kubernetes.resourceStagingServer.uri - Time to wait for the driver pod to start running before aborting its execution. + URI of the resource staging server to communicate with when init-containers bootstrap the driver and executor pods + with submitted local dependencies. Note that this URI must by the pods running in the cluster. This is useful to + set if the resource staging server has a separate "internal" URI that must be accessed by components running in the + cluster. - spark.kubernetes.driver.service.exposeUiPort - false + spark.ssl.kubernetes.resourceStagingServer.internal.trustStore + Value of spark.ssl.kubernetes.resourceStagingServer.trustStore - Whether to expose the driver Web UI port as a service NodePort. Turned off by default because NodePort is a limited - resource. + Location of the trustStore file to use when communicating with the resource staging server over TLS, as + init-containers bootstrap the driver and executor pods with submitted local dependencies. This can be a URI with a + scheme of local://, which denotes that the file is pre-mounted on the pod's disk. A uri without a + scheme or a scheme of file:// will result in this file being mounted from the submitting machine's + disk as a secret into the init-containers. - spark.kubernetes.driver.pod.name - (none) + spark.ssl.kubernetes.resourceStagingServer.internal.trustStorePassword + Value of spark.ssl.kubernetes.resourceStagingServer.trustStorePassword - Name of the driver pod. If not set, the driver pod name is set to "spark.app.name" suffixed by the current timestamp to avoid name conflicts. + Password of the trustStore file that is used when communicating with the resource staging server over TLS, as + init-containers bootstrap the driver and executor pods with submitted local dependencies. - spark.kubernetes.submission.waitAppCompletion - true + spark.ssl.kubernetes.resourceStagingServer.internal.trustStoreType + Value of spark.ssl.kubernetes.resourceStagingServer.trustStoreType - In cluster mode, whether to wait for the application to finish before exiting the launcher process. When changed to - false, the launcher has a "fire-and-forget" behavior when launching the Spark job. + Type of the trustStore file that is used when communicating with the resource staging server over TLS, when + init-containers bootstrap the driver and executor pods with submitted local dependencies. - spark.kubernetes.report.interval - 1s + spark.ssl.kubernetes.resourceStagingServer.internal.clientCertPem + Value of spark.ssl.kubernetes.resourceStagingServer.clientCertPem - Interval between reports of the current Spark job status in cluster mode. + Location of the certificate file to use when communicating with the resource staging server over TLS, as + init-containers bootstrap the driver and executor pods with submitted local dependencies. This can be a URI with a + scheme of local://, which denotes that the file is pre-mounted on the pod's disk. A uri without a + scheme or a scheme of file:// will result in this file being mounted from the submitting machine's + disk as a secret into the init-containers. - spark.kubernetes.driver.serviceManagerType - NodePort + spark.kubernetes.report.interval + 1s - A tag indicating which class to use for creating the Kubernetes service and determining its URI for the submission - client. Valid values are currently NodePort and ExternalAnnotation. By default, a service - is created with the NodePort type, and the driver will be contacted at one of the nodes at the port - that the nodes expose for the service. If the nodes cannot be contacted from the submitter's machine, consider - setting this to ExternalAnnotation as described in "Determining the Driver Base URI" above. One may - also include a custom implementation of org.apache.spark.deploy.rest.kubernetes.DriverServiceManager on - the submitter's classpath - spark-submit service loads an instance of that class. To use the custom - implementation, set this value to the custom implementation's return value of - DriverServiceManager#getServiceManagerType(). This method should only be done as a last resort. + Interval between reports of the current Spark job status in cluster mode. -## Dynamic Executor Scaling - -Spark on Kubernetes supports Dynamic Allocation with cluster mode. This mode requires running -an external shuffle service. This is typically a [daemonset](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/) -with a provisioned [hostpath](https://kubernetes.io/docs/concepts/storage/volumes/#hostpath) volume. -This shuffle service may be shared by executors belonging to different SparkJobs. Using Spark with dynamic allocation -on Kubernetes assumes that a cluster administrator has set up one or more shuffle-service daemonsets in the cluster. - -A sample configuration file is provided in `conf/kubernetes-shuffle-service.yaml` which can be customized as needed -for a particular cluster. It is important to note that `spec.template.metadata.labels` are setup appropriately for the shuffle -service because there may be multiple shuffle service instances running in a cluster. The labels give us a way to target a particular -shuffle service. - -For example, if the shuffle service we want to use is in the default namespace, and -has pods with labels `app=spark-shuffle-service` and `spark-version=2.1.0`, we can -use those tags to target that particular shuffle service at job launch time. In order to run a job with dynamic allocation enabled, -the command may then look like the following: - - bin/spark-submit \ - --deploy-mode cluster \ - --class org.apache.spark.examples.GroupByTest \ - --master k8s://: \ - --kubernetes-namespace default \ - --conf spark.app.name=group-by-test \ - --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:latest \ - --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:latest \ - --conf spark.dynamicAllocation.enabled=true \ - --conf spark.shuffle.service.enabled=true \ - --conf spark.kubernetes.shuffle.namespace=default \ - --conf spark.kubernetes.shuffle.labels="app=spark-shuffle-service,spark-version=2.1.0" \ - examples/jars/spark_examples_2.11-2.2.0.jar 10 400000 2 ## Current Limitations diff --git a/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.kubernetes.submit.v1.DriverServiceManager b/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.kubernetes.submit.v1.DriverServiceManager deleted file mode 100644 index 2ed0387c51bc6..0000000000000 --- a/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.kubernetes.submit.v1.DriverServiceManager +++ /dev/null @@ -1,2 +0,0 @@ -org.apache.spark.deploy.kubernetes.submit.v1.ExternalSuppliedUrisDriverServiceManager -org.apache.spark.deploy.kubernetes.submit.v1.NodePortUrisDriverServiceManager diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/CompressionUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/CompressionUtils.scala index 03991ba26a6f7..a6f0ca502f6f0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/CompressionUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/CompressionUtils.scala @@ -16,19 +16,17 @@ */ package org.apache.spark.deploy.kubernetes -import java.io.{ByteArrayInputStream, File, FileInputStream, FileOutputStream, InputStream, OutputStream} +import java.io.{File, FileInputStream, FileOutputStream, InputStream, OutputStream} import java.util.zip.{GZIPInputStream, GZIPOutputStream} import com.google.common.io.Files -import org.apache.commons.codec.binary.Base64 import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveInputStream, TarArchiveOutputStream} import org.apache.commons.compress.utils.CharsetNames import org.apache.commons.io.IOUtils import scala.collection.mutable -import org.apache.spark.deploy.rest.kubernetes.v1.TarGzippedData import org.apache.spark.internal.Logging -import org.apache.spark.util.{ByteBufferOutputStream, Utils} +import org.apache.spark.util.Utils private[spark] object CompressionUtils extends Logging { // Defaults from TarArchiveOutputStream @@ -36,30 +34,6 @@ private[spark] object CompressionUtils extends Logging { private val RECORD_SIZE = 512 private val ENCODING = CharsetNames.UTF_8 - /** - * Compresses all of the given paths into a gzipped-tar archive, returning the compressed data in - * memory as an instance of {@link TarGzippedData}. The files are taken without consideration to - * their original folder structure, and are added to the tar archive in a flat hierarchy. - * Directories are not allowed, and duplicate file names are de-duplicated by appending a numeric - * suffix to the file name, before the file extension. For example, if paths a/b.txt and b/b.txt - * were provided, then the files added to the tar archive would be b.txt and b-1.txt. - * @param paths A list of file paths to be archived - * @return An in-memory representation of the compressed data. - */ - def createTarGzip(paths: Iterable[String]): TarGzippedData = { - val compressedBytesStream = Utils.tryWithResource(new ByteBufferOutputStream()) { raw => - writeTarGzipToStream(raw, paths) - raw - } - val compressedAsBase64 = Base64.encodeBase64String(compressedBytesStream.toByteBuffer.array) - TarGzippedData( - dataBase64 = compressedAsBase64, - blockSize = BLOCK_SIZE, - recordSize = RECORD_SIZE, - encoding = ENCODING - ) - } - def writeTarGzipToStream(outputStream: OutputStream, paths: Iterable[String]): Unit = { Utils.tryWithResource(new GZIPOutputStream(outputStream)) { gzipping => Utils.tryWithResource(new TarArchiveOutputStream( @@ -98,50 +72,14 @@ private[spark] object CompressionUtils extends Logging { } } - /** - * Decompresses the provided tar archive to a directory. - * @param compressedData In-memory representation of the compressed data, ideally created via - * {@link createTarGzip}. - * @param rootOutputDir Directory to write the output files to. All files from the tarball - * are written here in a flat hierarchy. - * @return List of file paths for each file that was unpacked from the archive. - */ - def unpackAndWriteCompressedFiles( - compressedData: TarGzippedData, - rootOutputDir: File): Seq[String] = { - val compressedBytes = Base64.decodeBase64(compressedData.dataBase64) - if (!rootOutputDir.exists) { - if (!rootOutputDir.mkdirs) { - throw new IllegalStateException(s"Failed to create output directory for unpacking" + - s" files at ${rootOutputDir.getAbsolutePath}") - } - } else if (rootOutputDir.isFile) { - throw new IllegalArgumentException(s"Root dir for writing decompressed files: " + - s"${rootOutputDir.getAbsolutePath} exists and is not a directory.") - } - Utils.tryWithResource(new ByteArrayInputStream(compressedBytes)) { compressedBytesStream => - unpackTarStreamToDirectory( - compressedBytesStream, - rootOutputDir, - compressedData.blockSize, - compressedData.recordSize, - compressedData.encoding) - } - } - - def unpackTarStreamToDirectory( - inputStream: InputStream, - outputDir: File, - blockSize: Int = BLOCK_SIZE, - recordSize: Int = RECORD_SIZE, - encoding: String = ENCODING): Seq[String] = { + def unpackTarStreamToDirectory(inputStream: InputStream, outputDir: File): Seq[String] = { val paths = mutable.Buffer.empty[String] Utils.tryWithResource(new GZIPInputStream(inputStream)) { gzipped => Utils.tryWithResource(new TarArchiveInputStream( gzipped, - blockSize, - recordSize, - encoding)) { tarInputStream => + BLOCK_SIZE, + RECORD_SIZE, + ENCODING)) { tarInputStream => var nextTarEntry = tarInputStream.getNextTarEntry while (nextTarEntry != null) { val outputFile = new File(outputDir, nextTarEntry.getName) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala index 227420db4636d..0d4e82566643d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala @@ -19,7 +19,7 @@ package org.apache.spark.deploy.kubernetes import io.fabric8.kubernetes.api.model.{ContainerBuilder, EmptyDirVolumeSource, PodBuilder, VolumeMount, VolumeMountBuilder} import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.submit.v2.{ContainerNameEqualityPredicate, InitContainerUtil} +import org.apache.spark.deploy.kubernetes.submit.{ContainerNameEqualityPredicate, InitContainerUtil} private[spark] trait SparkPodInitContainerBootstrap { /** diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 759a7df505829..bcb9a96cae960 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -20,7 +20,6 @@ import java.util.concurrent.TimeUnit import org.apache.spark.{SPARK_VERSION => sparkVersion} import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.submit.v1.NodePortUrisDriverServiceManager import org.apache.spark.internal.Logging import org.apache.spark.internal.config.ConfigBuilder import org.apache.spark.network.util.ByteUnit @@ -212,77 +211,6 @@ package object config extends Logging { .stringConf .createOptional - private[spark] val KUBERNETES_DRIVER_SUBMIT_TIMEOUT = - ConfigBuilder("spark.kubernetes.driverSubmissionTimeout") - .doc("Time to wait for the driver process to start running before aborting its execution.") - .timeConf(TimeUnit.SECONDS) - .createWithDefault(60L) - - private[spark] val KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE = - ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.keyStore") - .doc("KeyStore file for the driver submission server listening on SSL. Can be pre-mounted" + - " on the driver container or uploaded from the submitting client.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE = - ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.trustStore") - .doc("TrustStore containing certificates for communicating to the driver submission server" + - " over SSL.") - .stringConf - .createOptional - - private[spark] val DRIVER_SUBMIT_SSL_ENABLED = - ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.enabled") - .doc("Whether or not to use SSL when sending the application dependencies to the driver pod.") - .booleanConf - .createWithDefault(false) - - private[spark] val DRIVER_SUBMIT_SSL_KEY_PEM = - ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.keyPem") - .doc("Key PEM file that the driver submission server will use when setting up TLS" + - " connections. Can be pre-mounted on the driver pod's disk or uploaded from the" + - " submitting client's machine.") - .stringConf - .createOptional - - private[spark] val DRIVER_SUBMIT_SSL_SERVER_CERT_PEM = - ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.serverCertPem") - .doc("Certificate PEM file that is associated with the key PEM file" + - " the submission server uses to set up TLS connections. Can be pre-mounted" + - " on the driver pod's disk or uploaded from the submitting client's machine.") - .stringConf - .createOptional - - private[spark] val DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM = - ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.clientCertPem") - .doc("Certificate pem file that the submission client uses to connect to the submission" + - " server over TLS. This should often be the same as the server certificate, but can be" + - " different if the submission client will contact the driver through a proxy instead of" + - " the driver service directly.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_SERVICE_NAME = - ConfigBuilder("spark.kubernetes.driver.service.name") - .doc("Kubernetes service that exposes the driver pod for external access.") - .internal() - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_SUBMIT_SERVER_MEMORY = - ConfigBuilder("spark.kubernetes.driver.submissionServerMemory") - .doc("The amount of memory to allocate for the driver submission server.") - .bytesConf(ByteUnit.MiB) - .createWithDefaultString("256m") - - private[spark] val EXPOSE_KUBERNETES_DRIVER_SERVICE_UI_PORT = - ConfigBuilder("spark.kubernetes.driver.service.exposeUiPort") - .doc("Whether to expose the driver Web UI port as a service NodePort. Turned off by default" + - " because NodePort is a limited resource. Use alternatives if possible.") - .booleanConf - .createWithDefault(false) - private[spark] val KUBERNETES_DRIVER_POD_NAME = ConfigBuilder("spark.kubernetes.driver.pod.name") .doc("Name of the driver pod.") @@ -327,13 +255,6 @@ package object config extends Logging { .longConf .createWithDefault(1) - private[spark] val DRIVER_SERVICE_MANAGER_TYPE = - ConfigBuilder("spark.kubernetes.driver.serviceManagerType") - .doc("A tag indicating which class to use for creating the Kubernetes service and" + - " determining its URI for the submission client.") - .stringConf - .createWithDefault(NodePortUrisDriverServiceManager.TYPE) - private[spark] val WAIT_FOR_APP_COMPLETION = ConfigBuilder("spark.kubernetes.submission.waitAppCompletion") .doc("In cluster mode, whether to wait for the application to finish before exiting the" + @@ -347,8 +268,7 @@ package object config extends Logging { .timeConf(TimeUnit.MILLISECONDS) .createWithDefaultString("1s") - // Spark dependency server for submission v2 - + // Spark resource staging server. private[spark] val RESOURCE_STAGING_SERVER_PORT = ConfigBuilder("spark.kubernetes.resourceStagingServer.port") .doc("Port for the Kubernetes resource staging server to listen on.") @@ -451,7 +371,7 @@ package object config extends Logging { .stringConf .createOptional - // Driver and Init-Container parameters for submission v2 + // Driver and Init-Container parameters private[spark] val RESOURCE_STAGING_SERVER_URI = ConfigBuilder("spark.kubernetes.resourceStagingServer.uri") .doc("Base URI for the Spark resource staging server.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index e4ca5c1458abe..bfb0bc3ffb0f3 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.io.File import java.util.Collections @@ -25,8 +25,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.submit.{LoggingPodStatusWatcher, LoggingPodStatusWatcherImpl} -import org.apache.spark.deploy.rest.kubernetes.v2.ResourceStagingServerSslOptionsProviderImpl +import org.apache.spark.deploy.rest.kubernetes.ResourceStagingServerSslOptionsProviderImpl import org.apache.spark.internal.Logging import org.apache.spark.launcher.SparkLauncher import org.apache.spark.util.Utils diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolver.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala similarity index 97% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolver.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala index 5505d87fa8072..c635484c4c124 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolver.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.io.File diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerNameEqualityPredicate.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerNameEqualityPredicate.scala similarity index 95% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerNameEqualityPredicate.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerNameEqualityPredicate.scala index 5101e1506e4d5..434919208ba2e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerNameEqualityPredicate.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerNameEqualityPredicate.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.lang.Boolean diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala index 0a5e6cd216011..7fbb0c9274bf5 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala @@ -14,13 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import org.apache.spark.{SparkConf, SSLOptions} import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, OptionRequirements, SparkPodInitContainerBootstrap, SparkPodInitContainerBootstrapImpl} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.kubernetes.v2.RetrofitClientFactoryImpl +import org.apache.spark.deploy.rest.kubernetes.RetrofitClientFactoryImpl import org.apache.spark.util.Utils /** diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala similarity index 99% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounter.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala index 9759669335774..ded0237732ce0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounter.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import io.fabric8.kubernetes.api.model.{PodBuilder, Secret, SecretBuilder} import scala.collection.JavaConverters._ diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala similarity index 92% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterProvider.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala index e981c54d23a9d..3f0e7d97275a5 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala @@ -14,11 +14,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.submit.DriverPodKubernetesCredentialsProvider private[spark] trait DriverPodKubernetesCredentialsMounterProvider { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfiguration.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfiguration.scala similarity index 97% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfiguration.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfiguration.scala index adfdc060f0d0f..2292365995d1f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfiguration.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfiguration.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.config._ diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/InitContainerUtil.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerUtil.scala similarity index 97% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/InitContainerUtil.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerUtil.scala index 0526ca53baaab..9b7faaa78a9aa 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/InitContainerUtil.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerUtil.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/PropertiesConfigMapFromScalaMapBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PropertiesConfigMapFromScalaMapBuilder.scala similarity index 97% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/PropertiesConfigMapFromScalaMapBuilder.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PropertiesConfigMapFromScalaMapBuilder.scala index cb9194552d2b6..8103272c27518 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/PropertiesConfigMapFromScalaMapBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PropertiesConfigMapFromScalaMapBuilder.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.io.StringWriter import java.util.Properties diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilder.scala similarity index 95% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilder.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilder.scala index 362fbbdf517dc..4062a3113eddf 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilder.scala @@ -14,12 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import io.fabric8.kubernetes.api.model.ConfigMap import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils private[spark] trait SparkInitContainerConfigMapBuilder { /** diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmissionKubernetesClientProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmissionKubernetesClientProvider.scala similarity index 97% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmissionKubernetesClientProvider.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmissionKubernetesClientProvider.scala index af3de6ce85026..17b61d4a6ace0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmissionKubernetesClientProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmissionKubernetesClientProvider.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient, KubernetesClient} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPlugin.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPlugin.scala index 1b086e60d3d0d..06d3648efb89f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPlugin.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import org.apache.spark.SparkException import org.apache.spark.deploy.kubernetes.config._ diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilder.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilder.scala index 1a33757e45aa0..7850853df97e6 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilder.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.io.File diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala similarity index 95% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala index 5f98facfb691f..9d0d863d174bc 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.io.{File, FileOutputStream} import javax.ws.rs.core.MediaType @@ -26,8 +26,7 @@ import retrofit2.Call import org.apache.spark.{SparkException, SSLOptions} import org.apache.spark.deploy.kubernetes.{CompressionUtils, KubernetesCredentials} -import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils -import org.apache.spark.deploy.rest.kubernetes.v2.{ResourceStagingServiceRetrofit, RetrofitClientFactory} +import org.apache.spark.deploy.rest.kubernetes.{ResourceStagingServiceRetrofit, RetrofitClientFactory} import org.apache.spark.util.Utils private[spark] trait SubmittedDependencyUploader { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedResources.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedResources.scala similarity index 96% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedResources.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedResources.scala index f4e5e991180ce..225972c1057f2 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedResources.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedResources.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit case class SubmittedResourceIdAndSecret(resourceId: String, resourceSecret: String) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala deleted file mode 100644 index 32fc434cb693a..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala +++ /dev/null @@ -1,743 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit.v1 - -import java.io.File -import java.security.SecureRandom -import java.util.ServiceLoader -import java.util.concurrent.{CountDownLatch, TimeUnit} - -import com.google.common.io.Files -import com.google.common.util.concurrent.SettableFuture -import io.fabric8.kubernetes.api.model._ -import io.fabric8.kubernetes.client.{ConfigBuilder => K8SConfigBuilder, DefaultKubernetesClient, KubernetesClient, KubernetesClientException, Watcher} -import io.fabric8.kubernetes.client.Watcher.Action -import org.apache.commons.codec.binary.Base64 -import scala.collection.JavaConverters._ - -import org.apache.spark.{SparkConf, SparkException} -import org.apache.spark.deploy.kubernetes.{CompressionUtils, KubernetesCredentials} -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.submit.{DriverPodKubernetesCredentialsProvider, KubernetesFileUtils, LoggingPodStatusWatcherImpl} -import org.apache.spark.deploy.rest.kubernetes.v1.{AppResource, ContainerAppResource, HttpClientUtil, KubernetesCreateSubmissionRequest, KubernetesSparkRestApi, RemoteAppResource, UploadedAppResource} -import org.apache.spark.internal.Logging -import org.apache.spark.util.{ShutdownHookManager, Utils} - -private[spark] class Client( - sparkConf: SparkConf, - mainClass: String, - mainAppResource: String, - appArgs: Array[String]) extends Logging { - import Client._ - - private val namespace = sparkConf.get(KUBERNETES_NAMESPACE) - private val master = resolveK8sMaster(sparkConf.get("spark.master")) - - private val launchTime = System.currentTimeMillis - private val appName = sparkConf.getOption("spark.app.name") - .getOrElse("spark") - private val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") - private val kubernetesDriverPodName = sparkConf.get(KUBERNETES_DRIVER_POD_NAME) - .getOrElse(kubernetesAppId) - private val secretName = s"$SUBMISSION_APP_SECRET_PREFIX-$kubernetesAppId" - private val secretDirectory = s"$DRIVER_CONTAINER_SUBMISSION_SECRETS_BASE_DIR/$kubernetesAppId" - private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) - private val uiPort = sparkConf.getInt("spark.ui.port", DEFAULT_UI_PORT) - private val driverSubmitTimeoutSecs = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_TIMEOUT) - private val driverServiceManagerType = sparkConf.get(DRIVER_SERVICE_MANAGER_TYPE) - private val sparkFiles = sparkConf.getOption("spark.files") - .map(_.split(",")) - .getOrElse(Array.empty[String]) - private val sparkJars = sparkConf.getOption("spark.jars") - .map(_.split(",")) - .getOrElse(Array.empty[String]) - - // CPU settings - private val driverCpuCores = sparkConf.getOption("spark.driver.cores").getOrElse("1") - - // Memory settings - private val driverMemoryMb = sparkConf.get(org.apache.spark.internal.config.DRIVER_MEMORY) - private val driverSubmitServerMemoryMb = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_SERVER_MEMORY) - private val driverSubmitServerMemoryString = sparkConf.get( - KUBERNETES_DRIVER_SUBMIT_SERVER_MEMORY.key, - KUBERNETES_DRIVER_SUBMIT_SERVER_MEMORY.defaultValueString) - private val driverContainerMemoryMb = driverMemoryMb + driverSubmitServerMemoryMb - private val memoryOverheadMb = sparkConf - .get(KUBERNETES_DRIVER_MEMORY_OVERHEAD) - .getOrElse(math.max((MEMORY_OVERHEAD_FACTOR * driverContainerMemoryMb).toInt, - MEMORY_OVERHEAD_MIN)) - private val driverContainerMemoryWithOverhead = driverContainerMemoryMb + memoryOverheadMb - - private val waitForAppCompletion = sparkConf.get(WAIT_FOR_APP_COMPLETION) - private val loggingInterval = Some(sparkConf.get(REPORT_INTERVAL)) - .filter( _ => waitForAppCompletion) - - private val secretBase64String = { - val secretBytes = new Array[Byte](128) - SECURE_RANDOM.nextBytes(secretBytes) - Base64.encodeBase64String(secretBytes) - } - - private val serviceAccount = sparkConf.get(KUBERNETES_SERVICE_ACCOUNT_NAME) - private val customLabels = sparkConf.get(KUBERNETES_DRIVER_LABELS) - private val customAnnotations = sparkConf.get(KUBERNETES_DRIVER_ANNOTATIONS) - - private val kubernetesResourceCleaner = new KubernetesResourceCleaner - - def run(): Unit = { - logInfo(s"Starting application $kubernetesAppId in Kubernetes...") - val submitterLocalFiles = KubernetesFileUtils.getOnlySubmitterLocalFiles(sparkFiles) - val submitterLocalJars = KubernetesFileUtils.getOnlySubmitterLocalFiles(sparkJars) - (submitterLocalFiles ++ submitterLocalJars).foreach { file => - if (!new File(Utils.resolveURI(file).getPath).isFile) { - throw new SparkException(s"File $file does not exist or is a directory.") - } - } - if (KubernetesFileUtils.isUriLocalFile(mainAppResource) && - !new File(Utils.resolveURI(mainAppResource).getPath).isFile) { - throw new SparkException(s"Main app resource file $mainAppResource is not a file or" + - s" is a directory.") - } - val driverServiceManager = getDriverServiceManager - val parsedCustomLabels = parseKeyValuePairs(customLabels, KUBERNETES_DRIVER_LABELS.key, - "labels") - parsedCustomLabels.keys.foreach { key => - require(key != SPARK_APP_ID_LABEL, "Label with key" + - s" $SPARK_APP_ID_LABEL cannot be used in" + - " spark.kubernetes.driver.labels, as it is reserved for Spark's" + - " internal configuration.") - } - val parsedCustomAnnotations = parseKeyValuePairs( - customAnnotations, - KUBERNETES_DRIVER_ANNOTATIONS.key, - "annotations") - val driverPodKubernetesCredentials = new DriverPodKubernetesCredentialsProvider(sparkConf).get() - var k8ConfBuilder = new K8SConfigBuilder() - .withApiVersion("v1") - .withMasterUrl(master) - .withNamespace(namespace) - sparkConf.get(KUBERNETES_SUBMIT_CA_CERT_FILE).foreach { - f => k8ConfBuilder = k8ConfBuilder.withCaCertFile(f) - } - sparkConf.get(KUBERNETES_SUBMIT_CLIENT_KEY_FILE).foreach { - f => k8ConfBuilder = k8ConfBuilder.withClientKeyFile(f) - } - sparkConf.get(KUBERNETES_SUBMIT_CLIENT_CERT_FILE).foreach { - f => k8ConfBuilder = k8ConfBuilder.withClientCertFile(f) - } - sparkConf.get(KUBERNETES_SUBMIT_OAUTH_TOKEN).foreach { token => - k8ConfBuilder = k8ConfBuilder.withOauthToken(token) - } - - val k8ClientConfig = k8ConfBuilder.build - Utils.tryWithResource(new DefaultKubernetesClient(k8ClientConfig)) { kubernetesClient => - driverServiceManager.start(kubernetesClient, kubernetesAppId, sparkConf) - // start outer watch for status logging of driver pod - // only enable interval logging if in waitForAppCompletion mode - val loggingWatch = new LoggingPodStatusWatcherImpl( - kubernetesAppId, loggingInterval) - Utils.tryWithResource(kubernetesClient - .pods() - .withName(kubernetesDriverPodName) - .watch(loggingWatch)) { _ => - loggingWatch.start() - val resourceCleanShutdownHook = ShutdownHookManager.addShutdownHook(() => - kubernetesResourceCleaner.deleteAllRegisteredResourcesFromKubernetes(kubernetesClient)) - val cleanupServiceManagerHook = ShutdownHookManager.addShutdownHook( - ShutdownHookManager.DEFAULT_SHUTDOWN_PRIORITY)( - () => driverServiceManager.stop()) - // Place the error hook at a higher priority in order for the error hook to run before - // the stop hook. - val serviceManagerErrorHook = ShutdownHookManager.addShutdownHook( - ShutdownHookManager.DEFAULT_SHUTDOWN_PRIORITY + 1)(() => - driverServiceManager.handleSubmissionError( - new SparkException("Submission shutting down early..."))) - try { - val sslConfigurationProvider = new DriverSubmitSslConfigurationProvider( - sparkConf, kubernetesAppId, kubernetesClient, kubernetesResourceCleaner) - val submitServerSecret = kubernetesClient.secrets().createNew() - .withNewMetadata() - .withName(secretName) - .endMetadata() - .withData(Map((SUBMISSION_APP_SECRET_NAME, secretBase64String)).asJava) - .withType("Opaque") - .done() - kubernetesResourceCleaner.registerOrUpdateResource(submitServerSecret) - val sslConfiguration = sslConfigurationProvider.getSslConfiguration() - val (driverPod, driverService) = launchDriverKubernetesComponents( - kubernetesClient, - driverServiceManager, - parsedCustomLabels, - parsedCustomAnnotations, - submitServerSecret, - sslConfiguration) - configureOwnerReferences( - kubernetesClient, - submitServerSecret, - sslConfiguration.sslSecret, - driverPod, - driverService) - submitApplicationToDriverServer( - kubernetesClient, - driverServiceManager, - sslConfiguration, - driverService, - submitterLocalFiles, - submitterLocalJars, - driverPodKubernetesCredentials) - // Now that the application has started, persist the components that were created beyond - // the shutdown hook. We still want to purge the one-time secrets, so do not unregister - // those. - kubernetesResourceCleaner.unregisterResource(driverPod) - kubernetesResourceCleaner.unregisterResource(driverService) - } catch { - case e: Throwable => - driverServiceManager.handleSubmissionError(e) - throw e - } finally { - Utils.tryLogNonFatalError { - kubernetesResourceCleaner.deleteAllRegisteredResourcesFromKubernetes(kubernetesClient) - } - Utils.tryLogNonFatalError { - driverServiceManager.stop() - } - // Remove the shutdown hooks that would be redundant - Utils.tryLogNonFatalError { - ShutdownHookManager.removeShutdownHook(resourceCleanShutdownHook) - } - Utils.tryLogNonFatalError { - ShutdownHookManager.removeShutdownHook(cleanupServiceManagerHook) - } - Utils.tryLogNonFatalError { - ShutdownHookManager.removeShutdownHook(serviceManagerErrorHook) - } - } - // wait if configured to do so - if (waitForAppCompletion) { - logInfo(s"Waiting for application $kubernetesAppId to finish...") - loggingWatch.awaitCompletion() - logInfo(s"Application $kubernetesAppId finished.") - } else { - logInfo(s"Application $kubernetesAppId successfully launched.") - } - } - } - } - - private def submitApplicationToDriverServer( - kubernetesClient: KubernetesClient, - driverServiceManager: DriverServiceManager, - sslConfiguration: DriverSubmitSslConfiguration, - driverService: Service, - submitterLocalFiles: Iterable[String], - submitterLocalJars: Iterable[String], - driverPodKubernetesCredentials: KubernetesCredentials): Unit = { - sparkConf.getOption("spark.app.id").foreach { id => - logWarning(s"Warning: Provided app id in spark.app.id as $id will be" + - s" overridden as $kubernetesAppId") - } - sparkConf.setIfMissing(KUBERNETES_DRIVER_POD_NAME, kubernetesDriverPodName) - sparkConf.set(KUBERNETES_DRIVER_SERVICE_NAME, driverService.getMetadata.getName) - sparkConf.set("spark.app.id", kubernetesAppId) - sparkConf.setIfMissing("spark.app.name", appName) - sparkConf.setIfMissing("spark.driver.port", DEFAULT_DRIVER_PORT.toString) - sparkConf.setIfMissing("spark.driver.blockManager.port", DEFAULT_BLOCKMANAGER_PORT.toString) - sparkConf.setIfMissing("spark.blockManager.port", DEFAULT_BLOCKMANAGER_PORT.toString) - sparkConf.get(KUBERNETES_SUBMIT_OAUTH_TOKEN).foreach { _ => - sparkConf.set(KUBERNETES_SUBMIT_OAUTH_TOKEN, "") - } - sparkConf.get(KUBERNETES_DRIVER_OAUTH_TOKEN).foreach { _ => - sparkConf.set(KUBERNETES_DRIVER_OAUTH_TOKEN, "") - } - val driverSubmitter = buildDriverSubmissionClient( - kubernetesClient, - driverServiceManager, - driverService, - sslConfiguration) - // Sanity check to see if the driver submitter is even reachable. - driverSubmitter.ping() - logInfo(s"Submitting local resources to driver pod for application " + - s"$kubernetesAppId ...") - val submitRequest = buildSubmissionRequest( - submitterLocalFiles, - submitterLocalJars, - driverPodKubernetesCredentials) - driverSubmitter.submitApplication(submitRequest) - logInfo("Successfully submitted local resources and driver configuration to" + - " driver pod.") - // After submitting, adjust the service to only expose the Spark UI - val uiServiceType = if (sparkConf.get(EXPOSE_KUBERNETES_DRIVER_SERVICE_UI_PORT)) "NodePort" - else "ClusterIP" - val uiServicePort = new ServicePortBuilder() - .withName(UI_PORT_NAME) - .withPort(uiPort) - .withNewTargetPort(uiPort) - .build() - val resolvedService = kubernetesClient.services().withName(kubernetesAppId).edit() - .editSpec() - .withType(uiServiceType) - .withPorts(uiServicePort) - .endSpec() - .done() - kubernetesResourceCleaner.registerOrUpdateResource(resolvedService) - logInfo("Finished submitting application to Kubernetes.") - } - - private def launchDriverKubernetesComponents( - kubernetesClient: KubernetesClient, - driverServiceManager: DriverServiceManager, - customLabels: Map[String, String], - customAnnotations: Map[String, String], - submitServerSecret: Secret, - sslConfiguration: DriverSubmitSslConfiguration): (Pod, Service) = { - val driverKubernetesSelectors = (Map( - SPARK_DRIVER_LABEL -> kubernetesAppId, - SPARK_APP_ID_LABEL -> kubernetesAppId, - SPARK_APP_NAME_LABEL -> appName) - ++ customLabels) - val endpointsReadyFuture = SettableFuture.create[Endpoints] - val endpointsReadyWatcher = new DriverEndpointsReadyWatcher(endpointsReadyFuture) - val serviceReadyFuture = SettableFuture.create[Service] - val serviceReadyWatcher = new DriverServiceReadyWatcher(serviceReadyFuture) - val podReadyFuture = SettableFuture.create[Pod] - val podWatcher = new DriverPodReadyWatcher(podReadyFuture) - Utils.tryWithResource(kubernetesClient - .pods() - .withName(kubernetesDriverPodName) - .watch(podWatcher)) { _ => - Utils.tryWithResource(kubernetesClient - .services() - .withName(kubernetesAppId) - .watch(serviceReadyWatcher)) { _ => - Utils.tryWithResource(kubernetesClient - .endpoints() - .withName(kubernetesAppId) - .watch(endpointsReadyWatcher)) { _ => - val serviceTemplate = createDriverServiceTemplate(driverKubernetesSelectors) - val driverService = kubernetesClient.services().create( - driverServiceManager.customizeDriverService(serviceTemplate).build()) - kubernetesResourceCleaner.registerOrUpdateResource(driverService) - val driverPod = createDriverPod( - kubernetesClient, - driverKubernetesSelectors, - customAnnotations, - submitServerSecret, - sslConfiguration) - waitForReadyKubernetesComponents(kubernetesClient, endpointsReadyFuture, - serviceReadyFuture, podReadyFuture) - (driverPod, driverService) - } - } - } - } - - /** - * Sets the owner reference for all the kubernetes components to link to the driver pod. - * - * @return The driver service after it has been adjusted to reflect the new owner - * reference. - */ - private def configureOwnerReferences( - kubernetesClient: KubernetesClient, - submitServerSecret: Secret, - sslSecret: Option[Secret], - driverPod: Pod, - driverService: Service): Service = { - val driverPodOwnerRef = new OwnerReferenceBuilder() - .withName(driverPod.getMetadata.getName) - .withUid(driverPod.getMetadata.getUid) - .withApiVersion(driverPod.getApiVersion) - .withKind(driverPod.getKind) - .withController(true) - .build() - sslSecret.foreach(secret => { - val updatedSecret = kubernetesClient.secrets().withName(secret.getMetadata.getName).edit() - .editMetadata() - .addToOwnerReferences(driverPodOwnerRef) - .endMetadata() - .done() - kubernetesResourceCleaner.registerOrUpdateResource(updatedSecret) - }) - val updatedSubmitServerSecret = kubernetesClient - .secrets() - .withName(submitServerSecret.getMetadata.getName) - .edit() - .editMetadata() - .addToOwnerReferences(driverPodOwnerRef) - .endMetadata() - .done() - kubernetesResourceCleaner.registerOrUpdateResource(updatedSubmitServerSecret) - val updatedService = kubernetesClient - .services() - .withName(driverService.getMetadata.getName) - .edit() - .editMetadata() - .addToOwnerReferences(driverPodOwnerRef) - .endMetadata() - .done() - kubernetesResourceCleaner.registerOrUpdateResource(updatedService) - updatedService - } - - private def waitForReadyKubernetesComponents( - kubernetesClient: KubernetesClient, - endpointsReadyFuture: SettableFuture[Endpoints], - serviceReadyFuture: SettableFuture[Service], - podReadyFuture: SettableFuture[Pod]) = { - try { - podReadyFuture.get(driverSubmitTimeoutSecs, TimeUnit.SECONDS) - logInfo("Driver pod successfully created in Kubernetes cluster.") - } catch { - case e: Throwable => - val finalErrorMessage: String = buildSubmitFailedErrorMessage(kubernetesClient, e) - logError(finalErrorMessage, e) - throw new SparkException(finalErrorMessage, e) - } - try { - serviceReadyFuture.get(driverSubmitTimeoutSecs, TimeUnit.SECONDS) - logInfo("Driver service created successfully in Kubernetes.") - } catch { - case e: Throwable => - throw new SparkException(s"The driver service was not ready" + - s" in $driverSubmitTimeoutSecs seconds.", e) - } - try { - endpointsReadyFuture.get(driverSubmitTimeoutSecs, TimeUnit.SECONDS) - logInfo("Driver endpoints ready to receive application submission") - } catch { - case e: Throwable => - throw new SparkException(s"The driver service endpoint was not ready" + - s" in $driverSubmitTimeoutSecs seconds.", e) - } - } - - private def createDriverPod( - kubernetesClient: KubernetesClient, - driverKubernetesSelectors: Map[String, String], - customAnnotations: Map[String, String], - submitServerSecret: Secret, - sslConfiguration: DriverSubmitSslConfiguration): Pod = { - val containerPorts = buildContainerPorts() - val probePingHttpGet = new HTTPGetActionBuilder() - .withScheme(if (sslConfiguration.enabled) "HTTPS" else "HTTP") - .withPath("/v1/submissions/ping") - .withNewPort(SUBMISSION_SERVER_PORT_NAME) - .build() - val driverCpuQuantity = new QuantityBuilder(false) - .withAmount(driverCpuCores) - .build() - val driverMemoryQuantity = new QuantityBuilder(false) - .withAmount(s"${driverContainerMemoryMb}M") - .build() - val driverMemoryLimitQuantity = new QuantityBuilder(false) - .withAmount(s"${driverContainerMemoryWithOverhead}M") - .build() - val driverPod = kubernetesClient.pods().createNew() - .withNewMetadata() - .withName(kubernetesDriverPodName) - .withLabels(driverKubernetesSelectors.asJava) - .withAnnotations(customAnnotations.asJava) - .endMetadata() - .withNewSpec() - .withRestartPolicy("Never") - .addNewVolume() - .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) - .withNewSecret() - .withSecretName(submitServerSecret.getMetadata.getName) - .endSecret() - .endVolume() - .addToVolumes(sslConfiguration.sslPodVolume.toSeq: _*) - .withServiceAccount(serviceAccount.getOrElse("default")) - .addNewContainer() - .withName(DRIVER_CONTAINER_NAME) - .withImage(driverDockerImage) - .withImagePullPolicy("IfNotPresent") - .addNewVolumeMount() - .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) - .withMountPath(secretDirectory) - .withReadOnly(true) - .endVolumeMount() - .addToVolumeMounts(sslConfiguration.sslPodVolumeMount.toSeq: _*) - .addNewEnv() - .withName(ENV_SUBMISSION_SECRET_LOCATION) - .withValue(s"$secretDirectory/$SUBMISSION_APP_SECRET_NAME") - .endEnv() - .addNewEnv() - .withName(ENV_SUBMISSION_SERVER_PORT) - .withValue(SUBMISSION_SERVER_PORT.toString) - .endEnv() - // Note that SPARK_DRIVER_MEMORY only affects the REST server via spark-class. - .addNewEnv() - .withName(ENV_DRIVER_MEMORY) - .withValue(driverSubmitServerMemoryString) - .endEnv() - .addToEnv(sslConfiguration.sslPodEnvVars: _*) - .withNewResources() - .addToRequests("cpu", driverCpuQuantity) - .addToLimits("cpu", driverCpuQuantity) - .addToRequests("memory", driverMemoryQuantity) - .addToLimits("memory", driverMemoryLimitQuantity) - .endResources() - .withPorts(containerPorts.asJava) - .withNewReadinessProbe().withHttpGet(probePingHttpGet).endReadinessProbe() - .endContainer() - .endSpec() - .done() - kubernetesResourceCleaner.registerOrUpdateResource(driverPod) - driverPod - } - - private def createDriverServiceTemplate(driverKubernetesSelectors: Map[String, String]) - : ServiceBuilder = { - val driverSubmissionServicePort = new ServicePortBuilder() - .withName(SUBMISSION_SERVER_PORT_NAME) - .withPort(SUBMISSION_SERVER_PORT) - .withNewTargetPort(SUBMISSION_SERVER_PORT) - .build() - new ServiceBuilder() - .withNewMetadata() - .withName(kubernetesAppId) - .withLabels(driverKubernetesSelectors.asJava) - .endMetadata() - .withNewSpec() - .withSelector(driverKubernetesSelectors.asJava) - .withPorts(driverSubmissionServicePort) - .endSpec() - } - - private class DriverPodReadyWatcher(resolvedDriverPod: SettableFuture[Pod]) extends Watcher[Pod] { - override def eventReceived(action: Action, pod: Pod): Unit = { - if ((action == Action.ADDED || action == Action.MODIFIED) - && pod.getStatus.getPhase == "Running" - && !resolvedDriverPod.isDone) { - pod.getStatus - .getContainerStatuses - .asScala - .find(status => - status.getName == DRIVER_CONTAINER_NAME && status.getReady) - .foreach { _ => resolvedDriverPod.set(pod) } - } - } - - override def onClose(cause: KubernetesClientException): Unit = { - logDebug("Driver pod readiness watch closed.", cause) - } - } - - private class DriverEndpointsReadyWatcher(resolvedDriverEndpoints: SettableFuture[Endpoints]) - extends Watcher[Endpoints] { - override def eventReceived(action: Action, endpoints: Endpoints): Unit = { - if ((action == Action.ADDED || action == Action.MODIFIED) - && (endpoints != null) - && (endpoints.getSubsets != null) - && endpoints.getSubsets.asScala.nonEmpty - && endpoints.getSubsets.asScala.exists(_.getAddresses.asScala.nonEmpty) - && !resolvedDriverEndpoints.isDone) { - resolvedDriverEndpoints.set(endpoints) - } - } - - override def onClose(cause: KubernetesClientException): Unit = { - logDebug("Driver endpoints readiness watch closed.", cause) - } - } - - private class DriverServiceReadyWatcher(resolvedDriverService: SettableFuture[Service]) - extends Watcher[Service] { - override def eventReceived(action: Action, service: Service): Unit = { - if ((action == Action.ADDED || action == Action.MODIFIED) - && !resolvedDriverService.isDone) { - resolvedDriverService.set(service) - } - } - - override def onClose(cause: KubernetesClientException): Unit = { - logDebug("Driver service readiness watch closed.", cause) - } - } - - private def buildSubmitFailedErrorMessage( - kubernetesClient: KubernetesClient, - e: Throwable): String = { - val driverPod = try { - kubernetesClient.pods().withName(kubernetesDriverPodName).get() - } catch { - case throwable: Throwable => - logError(s"Timed out while waiting $driverSubmitTimeoutSecs seconds for the" + - " driver pod to start, but an error occurred while fetching the driver" + - " pod's details.", throwable) - throw new SparkException(s"Timed out while waiting $driverSubmitTimeoutSecs" + - " seconds for the driver pod to start. Unfortunately, in attempting to fetch" + - " the latest state of the pod, another error was thrown. Check the logs for" + - " the error that was thrown in looking up the driver pod.", e) - } - val topLevelMessage = s"The driver pod with name ${driverPod.getMetadata.getName}" + - s" in namespace ${driverPod.getMetadata.getNamespace} was not ready in" + - s" $driverSubmitTimeoutSecs seconds." - val podStatusPhase = if (driverPod.getStatus.getPhase != null) { - s"Latest phase from the pod is: ${driverPod.getStatus.getPhase}" - } else { - "The pod had no final phase." - } - val podStatusMessage = if (driverPod.getStatus.getMessage != null) { - s"Latest message from the pod is: ${driverPod.getStatus.getMessage}" - } else { - "The pod had no final message." - } - val failedDriverContainerStatusString = driverPod.getStatus - .getContainerStatuses - .asScala - .find(_.getName == DRIVER_CONTAINER_NAME) - .map(status => { - val lastState = status.getState - if (lastState.getRunning != null) { - "Driver container last state: Running\n" + - s"Driver container started at: ${lastState.getRunning.getStartedAt}" - } else if (lastState.getWaiting != null) { - "Driver container last state: Waiting\n" + - s"Driver container wait reason: ${lastState.getWaiting.getReason}\n" + - s"Driver container message: ${lastState.getWaiting.getMessage}\n" - } else if (lastState.getTerminated != null) { - "Driver container last state: Terminated\n" + - s"Driver container started at: ${lastState.getTerminated.getStartedAt}\n" + - s"Driver container finished at: ${lastState.getTerminated.getFinishedAt}\n" + - s"Driver container exit reason: ${lastState.getTerminated.getReason}\n" + - s"Driver container exit code: ${lastState.getTerminated.getExitCode}\n" + - s"Driver container message: ${lastState.getTerminated.getMessage}" - } else { - "Driver container last state: Unknown" - } - }).getOrElse("The driver container wasn't found in the pod; expected to find" + - s" container with name $DRIVER_CONTAINER_NAME") - s"$topLevelMessage\n" + - s"$podStatusPhase\n" + - s"$podStatusMessage\n\n$failedDriverContainerStatusString" - } - - private def buildContainerPorts(): Seq[ContainerPort] = { - Seq((DRIVER_PORT_NAME, sparkConf.getInt("spark.driver.port", DEFAULT_DRIVER_PORT)), - (BLOCK_MANAGER_PORT_NAME, - sparkConf.getInt("spark.blockManager.port", DEFAULT_BLOCKMANAGER_PORT)), - (SUBMISSION_SERVER_PORT_NAME, SUBMISSION_SERVER_PORT), - (UI_PORT_NAME, uiPort)).map(port => new ContainerPortBuilder() - .withName(port._1) - .withContainerPort(port._2) - .build()) - } - - private def buildSubmissionRequest( - submitterLocalFiles: Iterable[String], - submitterLocalJars: Iterable[String], - driverPodKubernetesCredentials: KubernetesCredentials): KubernetesCreateSubmissionRequest = { - val mainResourceUri = Utils.resolveURI(mainAppResource) - val resolvedAppResource: AppResource = Option(mainResourceUri.getScheme) - .getOrElse("file") match { - case "file" => - val appFile = new File(mainResourceUri.getPath) - val fileBytes = Files.toByteArray(appFile) - val fileBase64 = Base64.encodeBase64String(fileBytes) - UploadedAppResource(resourceBase64Contents = fileBase64, name = appFile.getName) - case "local" => ContainerAppResource(mainAppResource) - case other => RemoteAppResource(other) - } - val uploadFilesBase64Contents = CompressionUtils.createTarGzip(submitterLocalFiles.map( - Utils.resolveURI(_).getPath)) - val uploadJarsBase64Contents = CompressionUtils.createTarGzip(submitterLocalJars.map( - Utils.resolveURI(_).getPath)) - KubernetesCreateSubmissionRequest( - appResource = resolvedAppResource, - mainClass = mainClass, - appArgs = appArgs, - secret = secretBase64String, - sparkProperties = sparkConf.getAll.toMap, - uploadedJarsBase64Contents = uploadJarsBase64Contents, - uploadedFilesBase64Contents = uploadFilesBase64Contents, - driverPodKubernetesCredentials = driverPodKubernetesCredentials) - } - - private def buildDriverSubmissionClient( - kubernetesClient: KubernetesClient, - driverServiceManager: DriverServiceManager, - service: Service, - sslConfiguration: DriverSubmitSslConfiguration): KubernetesSparkRestApi = { - val serviceUris = driverServiceManager.getDriverServiceSubmissionServerUris(service) - require(serviceUris.nonEmpty, "No uris found to contact the driver!") - HttpClientUtil.createClient[KubernetesSparkRestApi]( - uris = serviceUris, - maxRetriesPerServer = 10, - sslSocketFactory = sslConfiguration - .driverSubmitClientSslContext - .getSocketFactory, - trustContext = sslConfiguration - .driverSubmitClientTrustManager - .orNull, - connectTimeoutMillis = 5000) - } - - private def parseKeyValuePairs( - maybeKeyValues: Option[String], - configKey: String, - keyValueType: String): Map[String, String] = { - maybeKeyValues.map(keyValues => { - keyValues.split(",").map(_.trim).filterNot(_.isEmpty).map(keyValue => { - keyValue.split("=", 2).toSeq match { - case Seq(k, v) => - (k, v) - case _ => - throw new SparkException(s"Custom $keyValueType set by $configKey must be a" + - s" comma-separated list of key-value pairs, with format =." + - s" Got value: $keyValue. All values: $keyValues") - } - }).toMap - }).getOrElse(Map.empty[String, String]) - } - - private def getDriverServiceManager: DriverServiceManager = { - val driverServiceManagerLoader = ServiceLoader.load(classOf[DriverServiceManager]) - val matchingServiceManagers = driverServiceManagerLoader - .iterator() - .asScala - .filter(_.getServiceManagerType == driverServiceManagerType) - .toList - require(matchingServiceManagers.nonEmpty, - s"No driver service manager found matching type $driverServiceManagerType") - require(matchingServiceManagers.size == 1, "Multiple service managers found" + - s" matching type $driverServiceManagerType, got: " + - matchingServiceManagers.map(_.getClass).toList.mkString(",")) - matchingServiceManagers.head - } -} - -private[spark] object Client extends Logging { - - private[spark] val SECURE_RANDOM = new SecureRandom() - - def main(args: Array[String]): Unit = { - require(args.length >= 2, s"Too few arguments. Usage: ${getClass.getName} " + - s" []") - val mainAppResource = args(0) - val mainClass = args(1) - val appArgs = args.drop(2) - val sparkConf = new SparkConf(true) - new Client( - mainAppResource = mainAppResource, - mainClass = mainClass, - sparkConf = sparkConf, - appArgs = appArgs).run() - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverServiceManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverServiceManager.scala deleted file mode 100644 index c7d394fcf00ad..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverServiceManager.scala +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit.v1 - -import io.fabric8.kubernetes.api.model.{Service, ServiceBuilder} -import io.fabric8.kubernetes.client.KubernetesClient - -import org.apache.spark.SparkConf - -/** - * Implementations of this interface are responsible for exposing the driver pod by: - * - Creating a Kubernetes Service that is backed by the driver pod, and - * - Providing one or more URIs that the service can be reached at from the submission client. - * - * In general, one should not need to implement custom variants of this interface. Consider - * if the built-in service managers, NodePort and ExternalAnnotation, suit your needs first. - * - * This API is in an alpha state and may break without notice. - */ -trait DriverServiceManager { - - protected var kubernetesClient: KubernetesClient = _ - protected var serviceName: String = _ - protected var sparkConf: SparkConf = _ - - /** - * The tag that identifies this service manager type. This service manager will be loaded - * only if the Spark configuration spark.kubernetes.driver.serviceManagerType matches this - * value. - */ - def getServiceManagerType: String - - final def start( - kubernetesClient: KubernetesClient, - serviceName: String, - sparkConf: SparkConf): Unit = { - this.kubernetesClient = kubernetesClient - this.serviceName = serviceName - this.sparkConf = sparkConf - onStart(kubernetesClient, serviceName, sparkConf) - } - - /** - * Guaranteed to be called before {@link createDriverService} or - * {@link getDriverServiceSubmissionServerUris} is called. - */ - protected def onStart( - kubernetesClient: KubernetesClient, - serviceName: String, - sparkConf: SparkConf): Unit = {} - - /** - * Customize the driver service that overlays on the driver pod. - * - * Implementations are expected to take the service template and adjust it - * according to the particular needs of how the Service will be accessed by - * URIs provided in {@link getDriverServiceSubmissionServerUris}. - * - * @param driverServiceTemplate Base settings for the driver service. - * @return The same ServiceBuilder object with any required customizations. - */ - def customizeDriverService(driverServiceTemplate: ServiceBuilder): ServiceBuilder - - /** - * Return the set of URIs that can be used to reach the submission server that - * is running on the driver pod. - */ - def getDriverServiceSubmissionServerUris(driverService: Service): Set[String] - - /** - * Called when the Spark application failed to start. Allows the service - * manager to clean up any state it may have created that should not be persisted - * in the case of an unsuccessful launch. Note that stop() is still called - * regardless if this method is called. - */ - def handleSubmissionError(cause: Throwable): Unit = {} - - final def stop(): Unit = onStop() - - /** - * Perform any cleanup of this service manager. - * the super implementation. - */ - protected def onStop(): Unit = {} -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala deleted file mode 100644 index 174e9c57a65ca..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala +++ /dev/null @@ -1,354 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit.v1 - -import java.io.{File, FileInputStream} -import java.security.{KeyStore, SecureRandom} -import javax.net.ssl.{SSLContext, TrustManagerFactory, X509TrustManager} - -import com.google.common.base.Charsets -import com.google.common.io.{BaseEncoding, Files} -import io.fabric8.kubernetes.api.model.{EnvVar, EnvVarBuilder, Secret, Volume, VolumeBuilder, VolumeMount, VolumeMountBuilder} -import io.fabric8.kubernetes.client.KubernetesClient -import scala.collection.JavaConverters._ - -import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException, SSLOptions} -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils -import org.apache.spark.deploy.rest.kubernetes.v1.PemsToKeyStoreConverter -import org.apache.spark.util.Utils - -/** - * Raw SSL configuration as the user specified in SparkConf for setting up the driver - * submission server. - */ -private case class DriverSubmitSslConfigurationParameters( - storeBasedSslOptions: SSLOptions, - isKeyStoreLocalFile: Boolean, - driverSubmitServerKeyPem: Option[File], - isDriverSubmitKeyPemLocalFile: Boolean, - driverSubmitServerCertPem: Option[File], - isDriverSubmitServerCertPemLocalFile: Boolean, - submissionClientCertPem: Option[File]) - -/** - * Resolved from translating options provided in - * {@link DriverSubmitSslConfigurationParameters} into Kubernetes volumes, environment variables - * for the driver pod, Kubernetes secrets, client-side trust managers, and the client-side SSL - * context. This is used for setting up the SSL connection for the submission server where the - * application local dependencies and configuration is provided from. - */ -private[spark] case class DriverSubmitSslConfiguration( - enabled: Boolean, - sslPodEnvVars: Array[EnvVar], - sslPodVolume: Option[Volume], - sslPodVolumeMount: Option[VolumeMount], - sslSecret: Option[Secret], - driverSubmitClientTrustManager: Option[X509TrustManager], - driverSubmitClientSslContext: SSLContext) - -/** - * Provides the SSL configuration for bootstrapping the driver pod to listen for the driver - * submission over SSL, and then supply the client-side configuration for establishing the - * SSL connection. This is done in two phases: first, interpreting the raw configuration - * values from the SparkConf object; then second, converting the configuration parameters - * into the appropriate Kubernetes constructs, namely the volume and volume mount to add to the - * driver pod, and the secret to create at the API server; and finally, constructing the - * client-side trust manager and SSL context for sending the local dependencies. - */ -private[spark] class DriverSubmitSslConfigurationProvider( - sparkConf: SparkConf, - kubernetesAppId: String, - kubernetesClient: KubernetesClient, - kubernetesResourceCleaner: KubernetesResourceCleaner) { - private val SECURE_RANDOM = new SecureRandom() - private val sslSecretsName = s"$SUBMISSION_SSL_SECRETS_PREFIX-$kubernetesAppId" - private val sslSecretsDirectory = DRIVER_CONTAINER_SUBMISSION_SECRETS_BASE_DIR + - s"/$kubernetesAppId-ssl" - - def getSslConfiguration(): DriverSubmitSslConfiguration = { - val sslConfigurationParameters = parseSslConfigurationParameters() - if (sslConfigurationParameters.storeBasedSslOptions.enabled) { - val storeBasedSslOptions = sslConfigurationParameters.storeBasedSslOptions - val keyStoreSecret = resolveFileToSecretMapping( - sslConfigurationParameters.isKeyStoreLocalFile, - SUBMISSION_SSL_KEYSTORE_SECRET_NAME, - storeBasedSslOptions.keyStore, - "KeyStore") - val keyStorePathEnv = resolveFilePathEnv( - sslConfigurationParameters.isKeyStoreLocalFile, - ENV_SUBMISSION_KEYSTORE_FILE, - SUBMISSION_SSL_KEYSTORE_SECRET_NAME, - storeBasedSslOptions.keyStore) - val storePasswordSecret = storeBasedSslOptions.keyStorePassword.map(password => { - val passwordBase64 = BaseEncoding.base64().encode(password.getBytes(Charsets.UTF_8)) - (SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME, passwordBase64) - }).toMap - val storePasswordLocationEnv = storeBasedSslOptions.keyStorePassword.map(_ => { - new EnvVarBuilder() - .withName(ENV_SUBMISSION_KEYSTORE_PASSWORD_FILE) - .withValue(s"$sslSecretsDirectory/$SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME") - .build() - }) - val storeKeyPasswordSecret = storeBasedSslOptions.keyPassword.map(password => { - val passwordBase64 = BaseEncoding.base64().encode(password.getBytes(Charsets.UTF_8)) - (SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME, passwordBase64) - }).toMap - val storeKeyPasswordEnv = storeBasedSslOptions.keyPassword.map(_ => { - new EnvVarBuilder() - .withName(ENV_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE) - .withValue(s"$sslSecretsDirectory/$SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME") - .build() - }) - val storeTypeEnv = storeBasedSslOptions.keyStoreType.map(storeType => { - new EnvVarBuilder() - .withName(ENV_SUBMISSION_KEYSTORE_TYPE) - .withValue(storeType) - .build() - }) - val keyPemSecret = resolveFileToSecretMapping( - sslConfigurationParameters.isDriverSubmitKeyPemLocalFile, - secretName = SUBMISSION_SSL_KEY_PEM_SECRET_NAME, - secretType = "Key pem", - secretFile = sslConfigurationParameters.driverSubmitServerKeyPem) - val keyPemLocationEnv = resolveFilePathEnv( - sslConfigurationParameters.isDriverSubmitKeyPemLocalFile, - envName = ENV_SUBMISSION_KEY_PEM_FILE, - secretName = SUBMISSION_SSL_KEY_PEM_SECRET_NAME, - maybeFile = sslConfigurationParameters.driverSubmitServerKeyPem) - val certPemSecret = resolveFileToSecretMapping( - sslConfigurationParameters.isDriverSubmitServerCertPemLocalFile, - secretName = SUBMISSION_SSL_CERT_PEM_SECRET_NAME, - secretType = "Cert pem", - secretFile = sslConfigurationParameters.driverSubmitServerCertPem) - val certPemLocationEnv = resolveFilePathEnv( - sslConfigurationParameters.isDriverSubmitServerCertPemLocalFile, - envName = ENV_SUBMISSION_CERT_PEM_FILE, - secretName = SUBMISSION_SSL_CERT_PEM_SECRET_NAME, - maybeFile = sslConfigurationParameters.driverSubmitServerCertPem) - val useSslEnv = new EnvVarBuilder() - .withName(ENV_SUBMISSION_USE_SSL) - .withValue("true") - .build() - val sslVolume = new VolumeBuilder() - .withName(SUBMISSION_SSL_SECRETS_VOLUME_NAME) - .withNewSecret() - .withSecretName(sslSecretsName) - .endSecret() - .build() - val sslVolumeMount = new VolumeMountBuilder() - .withName(SUBMISSION_SSL_SECRETS_VOLUME_NAME) - .withReadOnly(true) - .withMountPath(sslSecretsDirectory) - .build() - val allSecrets = keyStoreSecret ++ - storePasswordSecret ++ - storeKeyPasswordSecret ++ - keyPemSecret ++ - certPemSecret - val sslSecret = kubernetesClient.secrets().createNew() - .withNewMetadata() - .withName(sslSecretsName) - .endMetadata() - .withData(allSecrets.asJava) - .withType("Opaque") - .done() - kubernetesResourceCleaner.registerOrUpdateResource(sslSecret) - val allSslEnvs = keyStorePathEnv ++ - storePasswordLocationEnv ++ - storeKeyPasswordEnv ++ - storeTypeEnv ++ - keyPemLocationEnv ++ - Array(useSslEnv) ++ - certPemLocationEnv - val (driverSubmitClientTrustManager, driverSubmitClientSslContext) = - buildSslConnectionConfiguration(sslConfigurationParameters) - DriverSubmitSslConfiguration( - true, - allSslEnvs.toArray, - Some(sslVolume), - Some(sslVolumeMount), - Some(sslSecret), - driverSubmitClientTrustManager, - driverSubmitClientSslContext) - } else { - DriverSubmitSslConfiguration( - false, - Array[EnvVar](), - None, - None, - None, - None, - SSLContext.getDefault) - } - } - - private def resolveFilePathEnv( - isLocal: Boolean, - envName: String, - secretName: String, - maybeFile: Option[File]): Option[EnvVar] = { - maybeFile.map(file => { - val pemPath = if (isLocal) { - s"$sslSecretsDirectory/$secretName" - } else { - file.getAbsolutePath - } - new EnvVarBuilder() - .withName(envName) - .withValue(pemPath) - .build() - }) - } - - private def resolveFileToSecretMapping( - isLocal: Boolean, - secretName: String, - secretFile: Option[File], - secretType: String): Map[String, String] = { - secretFile.filter(_ => isLocal).map(file => { - if (!file.isFile) { - throw new SparkException(s"$secretType specified at ${file.getAbsolutePath} is not" + - s" a file or does not exist.") - } - val keyStoreBytes = Files.toByteArray(file) - (secretName, BaseEncoding.base64().encode(keyStoreBytes)) - }).toMap - } - - private def parseSslConfigurationParameters(): DriverSubmitSslConfigurationParameters = { - val maybeKeyStore = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE) - val maybeTrustStore = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE) - val maybeKeyPem = sparkConf.get(DRIVER_SUBMIT_SSL_KEY_PEM) - val maybeDriverSubmitServerCertPem = sparkConf.get(DRIVER_SUBMIT_SSL_SERVER_CERT_PEM) - val maybeDriverSubmitClientCertPem = sparkConf.get(DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM) - validatePemsDoNotConflictWithStores( - maybeKeyStore, - maybeTrustStore, - maybeKeyPem, - maybeDriverSubmitServerCertPem, - maybeDriverSubmitClientCertPem) - val resolvedSparkConf = sparkConf.clone() - val (isLocalKeyStore, resolvedKeyStore) = resolveLocalFile(maybeKeyStore, "keyStore") - resolvedKeyStore.foreach { - resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE, _) - } - val (isLocalDriverSubmitServerCertPem, resolvedDriverSubmitServerCertPem) = - resolveLocalFile(maybeDriverSubmitServerCertPem, "server cert PEM") - val (isLocalKeyPem, resolvedKeyPem) = resolveLocalFile(maybeKeyPem, "key PEM") - maybeTrustStore.foreach { trustStore => - require(KubernetesFileUtils.isUriLocalFile(trustStore), s"Invalid trustStore URI" + - s" $trustStore; trustStore URI for submit server must have no scheme, or scheme file://") - resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE, - Utils.resolveURI(trustStore).getPath) - } - val driverSubmitClientCertPem = maybeDriverSubmitClientCertPem.map { driverSubmitClientCert => - require(KubernetesFileUtils.isUriLocalFile(driverSubmitClientCert), - "Invalid client certificate PEM URI $driverSubmitClientCert: client certificate URI must" + - " have no scheme, or scheme file://") - Utils.resolveURI(driverSubmitClientCert).getPath - } - val securityManager = new SparkSecurityManager(resolvedSparkConf) - val storeBasedSslOptions = securityManager.getSSLOptions(DRIVER_SUBMIT_SSL_NAMESPACE) - DriverSubmitSslConfigurationParameters( - storeBasedSslOptions, - isLocalKeyStore, - resolvedKeyPem.map(new File(_)), - isLocalKeyPem, - resolvedDriverSubmitServerCertPem.map(new File(_)), - isLocalDriverSubmitServerCertPem, - driverSubmitClientCertPem.map(new File(_))) - } - - private def resolveLocalFile(file: Option[String], - fileType: String): (Boolean, Option[String]) = { - file.map { f => - require(isValidSslFileScheme(f), s"Invalid $fileType URI $f, $fileType URI" + - s" for submit server must have scheme file:// or local:// (no scheme defaults to file://") - val isLocal = KubernetesFileUtils.isUriLocalFile(f) - (isLocal, Option.apply(Utils.resolveURI(f).getPath)) - }.getOrElse(false, None) - } - - private def validatePemsDoNotConflictWithStores( - maybeKeyStore: Option[String], - maybeTrustStore: Option[String], - maybeKeyPem: Option[String], - maybeDriverSubmitServerCertPem: Option[String], - maybeSubmitClientCertPem: Option[String]) = { - maybeKeyPem.orElse(maybeDriverSubmitServerCertPem).foreach { _ => - require(maybeKeyStore.isEmpty, - "Cannot specify server PEM files and key store files; must specify only one or the other.") - } - maybeKeyPem.foreach { _ => - require(maybeDriverSubmitServerCertPem.isDefined, - "When specifying the key PEM file, the server certificate PEM file must also be provided.") - } - maybeDriverSubmitServerCertPem.foreach { _ => - require(maybeKeyPem.isDefined, - "When specifying the server certificate PEM file, the key PEM file must also be provided.") - } - maybeTrustStore.foreach { _ => - require(maybeSubmitClientCertPem.isEmpty, - "Cannot specify client cert file and truststore file; must specify only one or the other.") - } - } - - private def isValidSslFileScheme(rawUri: String): Boolean = { - val resolvedScheme = Option.apply(Utils.resolveURI(rawUri).getScheme).getOrElse("file") - resolvedScheme == "file" || resolvedScheme == "local" - } - - private def buildSslConnectionConfiguration( - sslConfigurationParameters: DriverSubmitSslConfigurationParameters) - : (Option[X509TrustManager], SSLContext) = { - val maybeTrustStore = sslConfigurationParameters.submissionClientCertPem.map { certPem => - PemsToKeyStoreConverter.convertCertPemToTrustStore( - certPem, - sslConfigurationParameters.storeBasedSslOptions.trustStoreType) - }.orElse(sslConfigurationParameters.storeBasedSslOptions.trustStore.map { trustStoreFile => - if (!trustStoreFile.isFile) { - throw new SparkException(s"TrustStore file at ${trustStoreFile.getAbsolutePath}" + - s" does not exist or is not a file.") - } - val trustStore = KeyStore.getInstance( - sslConfigurationParameters - .storeBasedSslOptions - .trustStoreType - .getOrElse(KeyStore.getDefaultType)) - Utils.tryWithResource(new FileInputStream(trustStoreFile)) { trustStoreStream => - val trustStorePassword = sslConfigurationParameters - .storeBasedSslOptions - .trustStorePassword - .map(_.toCharArray) - .orNull - trustStore.load(trustStoreStream, trustStorePassword) - } - trustStore - }) - maybeTrustStore.map { trustStore => - val trustManagerFactory = TrustManagerFactory.getInstance( - TrustManagerFactory.getDefaultAlgorithm) - trustManagerFactory.init(trustStore) - val trustManagers = trustManagerFactory.getTrustManagers - val sslContext = SSLContext.getInstance("TLSv1.2") - sslContext.init(null, trustManagers, SECURE_RANDOM) - (Option.apply(trustManagers(0).asInstanceOf[X509TrustManager]), sslContext) - }.getOrElse((Option.empty[X509TrustManager], SSLContext.getDefault)) - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/ExternalSuppliedUrisDriverServiceManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/ExternalSuppliedUrisDriverServiceManager.scala deleted file mode 100644 index 4c784aeb5692f..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/ExternalSuppliedUrisDriverServiceManager.scala +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit.v1 - -import java.util.concurrent.TimeUnit - -import com.google.common.util.concurrent.SettableFuture -import io.fabric8.kubernetes.api.model.{Service, ServiceBuilder} -import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watch, Watcher} -import io.fabric8.kubernetes.client.Watcher.Action -import scala.collection.JavaConverters._ - -import org.apache.spark.SparkConf -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.internal.Logging -import org.apache.spark.util.Utils - -/** - * Creates the service with an annotation that is expected to be detected by another process - * which the user provides and is not built in this project. When the external process detects - * the creation of the service with the appropriate annotation, it is expected to populate the - * value of a second annotation that is the URI of the driver submission server. - */ -private[spark] class ExternalSuppliedUrisDriverServiceManager - extends DriverServiceManager with Logging { - - private val externalUriFuture = SettableFuture.create[String] - private var externalUriSetWatch: Option[Watch] = None - - override def onStart( - kubernetesClient: KubernetesClient, - serviceName: String, - sparkConf: SparkConf): Unit = { - externalUriSetWatch = Some(kubernetesClient - .services() - .withName(serviceName) - .watch(new ExternalUriSetWatcher(externalUriFuture))) - } - - override def getServiceManagerType: String = ExternalSuppliedUrisDriverServiceManager.TYPE - - override def customizeDriverService(driverServiceTemplate: ServiceBuilder): ServiceBuilder = { - require(serviceName != null, "Service name was null; was start() called?") - driverServiceTemplate - .editMetadata() - .addToAnnotations(ANNOTATION_PROVIDE_EXTERNAL_URI, "true") - .endMetadata() - .editSpec() - .withType("ClusterIP") - .endSpec() - } - - override def getDriverServiceSubmissionServerUris(driverService: Service): Set[String] = { - val timeoutSeconds = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_TIMEOUT) - require(externalUriSetWatch.isDefined, "The watch that listens for the provision of" + - " the external URI was not started; was start() called?") - Set(externalUriFuture.get(timeoutSeconds, TimeUnit.SECONDS)) - } - - override def onStop(): Unit = { - Utils.tryLogNonFatalError { - externalUriSetWatch.foreach(_.close()) - externalUriSetWatch = None - } - } -} - -private[spark] object ExternalSuppliedUrisDriverServiceManager { - val TYPE = "ExternalAnnotation" -} - -private[spark] class ExternalUriSetWatcher(externalUriFuture: SettableFuture[String]) - extends Watcher[Service] with Logging { - - override def eventReceived(action: Action, service: Service): Unit = { - if (action == Action.MODIFIED && !externalUriFuture.isDone) { - service - .getMetadata - .getAnnotations - .asScala - .get(ANNOTATION_RESOLVED_EXTERNAL_URI) - .foreach(externalUriFuture.set) - } - } - - override def onClose(cause: KubernetesClientException): Unit = { - logDebug("External URI set watcher closed.", cause) - } -} - diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/KubernetesResourceCleaner.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/KubernetesResourceCleaner.scala deleted file mode 100644 index 266ec652ed8ae..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/KubernetesResourceCleaner.scala +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit.v1 - -import io.fabric8.kubernetes.api.model.HasMetadata -import io.fabric8.kubernetes.client.KubernetesClient -import scala.collection.mutable - -import org.apache.spark.internal.Logging -import org.apache.spark.util.Utils - -private[spark] class KubernetesResourceCleaner extends Logging { - - private val resources = mutable.HashMap.empty[(String, String), HasMetadata] - - // Synchronized because deleteAllRegisteredResourcesFromKubernetes may be called from a - // shutdown hook - def registerOrUpdateResource(resource: HasMetadata): Unit = synchronized { - resources.put((resource.getMetadata.getName, resource.getKind), resource) - } - - def unregisterResource(resource: HasMetadata): Unit = synchronized { - resources.remove((resource.getMetadata.getName, resource.getKind)) - } - - def deleteAllRegisteredResourcesFromKubernetes(kubernetesClient: KubernetesClient): Unit = { - synchronized { - val resourceCount = resources.size - logInfo(s"Deleting ${resourceCount} registered Kubernetes resources...") - resources.values.foreach { resource => - Utils.tryLogNonFatalError { - kubernetesClient.resource(resource).delete() - } - } - resources.clear() - logInfo(s"Deleted ${resourceCount} registered Kubernetes resources.") - } - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/NodePortUrisDriverServiceManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/NodePortUrisDriverServiceManager.scala deleted file mode 100644 index 965d71917403e..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/NodePortUrisDriverServiceManager.scala +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit.v1 - -import io.fabric8.kubernetes.api.model.{Service, ServiceBuilder} -import scala.collection.JavaConverters._ - -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.internal.Logging - -/** - * Creates the service with an open NodePort. The URI to reach the submission server is thus - * at the address of any of the nodes through the service's node port. - */ -private[spark] class NodePortUrisDriverServiceManager extends DriverServiceManager with Logging { - - override def getServiceManagerType: String = NodePortUrisDriverServiceManager.TYPE - - override def customizeDriverService(driverServiceTemplate: ServiceBuilder): ServiceBuilder = { - driverServiceTemplate.editSpec().withType("NodePort").endSpec() - } - - override def getDriverServiceSubmissionServerUris(driverService: Service): Set[String] = { - val urlScheme = if (sparkConf.get(DRIVER_SUBMIT_SSL_ENABLED)) { - "https" - } else { - logWarning("Submitting application details, application secret, Kubernetes credentials," + - " and local jars to the cluster over an insecure connection. You should configure SSL" + - " to secure this step.") - "http" - } - val servicePort = driverService.getSpec.getPorts.asScala - .filter(_.getName == SUBMISSION_SERVER_PORT_NAME) - .head.getNodePort - val nodeUrls = kubernetesClient.nodes.list.getItems.asScala - .filterNot(node => node.getSpec.getUnschedulable != null && - node.getSpec.getUnschedulable) - .flatMap(_.getStatus.getAddresses.asScala) - // The list contains hostnames, internal and external IP addresses. - // (https://kubernetes.io/docs/admin/node/#addresses) - // we want only external IP addresses and legacyHostIP addresses in our list - // legacyHostIPs are deprecated and will be removed in the future. - // (https://github.com/kubernetes/kubernetes/issues/9267) - .filter(address => address.getType == "ExternalIP" || address.getType == "LegacyHostIP") - .map(address => { - s"$urlScheme://${address.getAddress}:$servicePort" - }).toSet - require(nodeUrls.nonEmpty, "No nodes found to contact the driver!") - nodeUrls - } -} - -private[spark] object NodePortUrisDriverServiceManager { - val TYPE = "NodePort" -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestApi.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/FileFetcher.scala similarity index 56% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestApi.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/FileFetcher.scala index 270e7ea0e77bf..d050e0a41a15a 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestApi.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/FileFetcher.scala @@ -14,25 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v1 +package org.apache.spark.deploy.rest.kubernetes -import javax.ws.rs.{Consumes, GET, Path, POST, Produces} -import javax.ws.rs.core.MediaType +import java.io.File -import org.apache.spark.deploy.rest.CreateSubmissionResponse - -@Path("/v1/submissions/") -trait KubernetesSparkRestApi { - - @POST - @Consumes(Array(MediaType.APPLICATION_JSON)) - @Produces(Array(MediaType.APPLICATION_JSON)) - @Path("/create") - def submitApplication(request: KubernetesCreateSubmissionRequest): CreateSubmissionResponse - - @GET - @Consumes(Array(MediaType.APPLICATION_JSON)) - @Produces(Array(MediaType.APPLICATION_JSON)) - @Path("/ping") - def ping(): PingResponse +// Extracted for testing so that unit tests don't have to depend on Utils.fetchFile +private[spark] trait FileFetcher { + def fetchFile(uri: String, targetDir: File): Unit } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala similarity index 95% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala index 7f21087159145..9bdc224f10c90 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala @@ -14,8 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.io.File import java.util.concurrent.TimeUnit @@ -30,8 +29,8 @@ import scala.concurrent.duration.Duration import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SSLOptions} import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.deploy.kubernetes.{CompressionUtils, KubernetesCredentials} import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.CompressionUtils import org.apache.spark.internal.Logging import org.apache.spark.util.{ThreadUtils, Utils} @@ -63,26 +62,6 @@ private class DownloadTarGzCallback(downloadDir: File) extends WaitableCallback[ } } } - -// Extracted for testing so that unit tests don't have to depend on Utils.fetchFile -private[v2] trait FileFetcher { - def fetchFile(uri: String, targetDir: File): Unit -} - -private class FileFetcherImpl(sparkConf: SparkConf, securityManager: SparkSecurityManager) - extends FileFetcher { - def fetchFile(uri: String, targetDir: File): Unit = { - Utils.fetchFile( - url = uri, - targetDir = targetDir, - conf = sparkConf, - securityMgr = securityManager, - hadoopConf = SparkHadoopUtil.get.newConfiguration(sparkConf), - timestamp = System.currentTimeMillis(), - useCache = false) - } -} - /** * Process that fetches files from a resource staging server and/or arbitrary remote locations. * @@ -97,6 +76,7 @@ private[spark] class KubernetesSparkDependencyDownloadInitContainer( fileFetcher: FileFetcher, resourceStagingServerSslOptions: SSLOptions) extends Logging { + private implicit val downloadExecutor = ExecutionContext.fromExecutorService( ThreadUtils.newDaemonCachedThreadPool("download-executor")) private val maybeResourceStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) @@ -184,8 +164,7 @@ private[spark] class KubernetesSparkDependencyDownloadInitContainer( val resourceSecret = Files.toString(resourceSecretLocation, Charsets.UTF_8) val downloadResourceCallback = new DownloadTarGzCallback(resourceDownloadDir) logInfo(downloadStartMessage) - service.downloadResources(resourceId, resourceSecret) - .enqueue(downloadResourceCallback) + service.downloadResources(resourceId, resourceSecret).enqueue(downloadResourceCallback) downloadResourceCallback.waitForCompletion(downloadTimeoutMinutes, TimeUnit.MINUTES) logInfo(downloadFinishedMessage) } @@ -211,6 +190,27 @@ private[spark] class KubernetesSparkDependencyDownloadInitContainer( } } +private class FileFetcherImpl(sparkConf: SparkConf, securityManager: SparkSecurityManager) + extends FileFetcher { + def fetchFile(uri: String, targetDir: File): Unit = { + Utils.fetchFile( + url = uri, + targetDir = targetDir, + conf = sparkConf, + securityMgr = securityManager, + hadoopConf = SparkHadoopUtil.get.newConfiguration(sparkConf), + timestamp = System.currentTimeMillis(), + useCache = false) + } +} + +private case class StagedResources( + resourceSecret: String, + podLabels: Map[String, String], + podNamespace: String, + resourcesFile: File, + kubernetesCredentials: KubernetesCredentials) + object KubernetesSparkDependencyDownloadInitContainer extends Logging { def main(args: Array[String]): Unit = { logInfo("Starting init-container to download Spark application dependencies.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/PemsToKeyStoreConverter.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/PemsToKeyStoreConverter.scala index 178956a136d1c..17f90118e150d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/PemsToKeyStoreConverter.scala @@ -14,10 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v1 +package org.apache.spark.deploy.rest.kubernetes import java.io.{File, FileInputStream, FileOutputStream, InputStreamReader} -import java.nio.file.Paths import java.security.{KeyStore, PrivateKey} import java.security.cert.Certificate import java.util.UUID diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServer.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServer.scala index 4ecb6369ff3b0..34594ba518b62 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServer.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.io.File diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSslOptionsProvider.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSslOptionsProvider.scala index 0dd0b08433def..cb1e65421c013 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSslOptionsProvider.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.io.File import java.security.SecureRandom @@ -26,7 +26,6 @@ import org.apache.commons.lang3.RandomStringUtils import org.apache.spark.{SecurityManager, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.kubernetes.OptionRequirements import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.rest.kubernetes.v1.PemsToKeyStoreConverter import org.apache.spark.internal.Logging private[spark] trait ResourceStagingServerSslOptionsProvider { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingService.scala similarity index 97% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingService.scala index 5dbe55b72bd8b..525711e78c01c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingService.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.io.InputStream import javax.ws.rs.{Consumes, GET, HeaderParam, Path, PathParam, POST, Produces} @@ -23,7 +23,7 @@ import javax.ws.rs.core.{MediaType, StreamingOutput} import org.glassfish.jersey.media.multipart.FormDataParam import org.apache.spark.deploy.kubernetes.KubernetesCredentials -import org.apache.spark.deploy.kubernetes.submit.v2.SubmittedResourceIdAndSecret +import org.apache.spark.deploy.kubernetes.submit.SubmittedResourceIdAndSecret /** * Service that receives application data that can be retrieved later on. This is primarily used diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImpl.scala similarity index 91% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImpl.scala index 34c3192ae6780..abe956da9914d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImpl.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.io.{File, FileOutputStream, InputStream, OutputStream} import java.security.SecureRandom @@ -27,7 +27,7 @@ import scala.collection.concurrent.TrieMap import org.apache.spark.SparkException import org.apache.spark.deploy.kubernetes.KubernetesCredentials -import org.apache.spark.deploy.kubernetes.submit.v2.SubmittedResourceIdAndSecret +import org.apache.spark.deploy.kubernetes.submit.SubmittedResourceIdAndSecret import org.apache.spark.internal.Logging import org.apache.spark.util.Utils @@ -92,10 +92,3 @@ private[spark] class ResourceStagingServiceImpl(dependenciesRootDir: File) override def ping(): String = "pong" } - -private case class StagedResources( - resourceSecret: String, - podLabels: Map[String, String], - podNamespace: String, - resourcesFile: File, - kubernetesCredentials: KubernetesCredentials) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala similarity index 93% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala index e0079a372f0d9..3c2fe8ebbc3c8 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala @@ -14,13 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import okhttp3.{RequestBody, ResponseBody} import retrofit2.Call import retrofit2.http.{Multipart, Path, Streaming} -import org.apache.spark.deploy.kubernetes.submit.v2.SubmittedResourceIdAndSecret +import org.apache.spark.deploy.kubernetes.submit.SubmittedResourceIdAndSecret /** * Retrofit-compatible variant of {@link ResourceStagingService}. For documentation on diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitClientFactory.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala index f906423524944..a374982444f79 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitClientFactory.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.io.FileInputStream import java.security.{KeyStore, SecureRandom} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/SparkConfPropertiesParser.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/SparkConfPropertiesParser.scala similarity index 94% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/SparkConfPropertiesParser.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/SparkConfPropertiesParser.scala index cf9decab127c5..9e2b8a780df29 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/SparkConfPropertiesParser.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/SparkConfPropertiesParser.scala @@ -14,13 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.io.{File, FileInputStream} import java.util.Properties import com.google.common.collect.Maps -import scala.collection.JavaConverters.mapAsScalaMapConverter +import scala.collection.JavaConverters._ import org.apache.spark.SparkConf import org.apache.spark.internal.config.{ConfigReader, SparkConfigProvider} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/HttpClientUtil.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/HttpClientUtil.scala deleted file mode 100644 index ea1abed72c07f..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/HttpClientUtil.scala +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.rest.kubernetes.v1 - -import java.io.IOException -import java.net.{InetSocketAddress, ProxySelector, SocketAddress, URI} -import java.util.Collections -import javax.net.ssl.{SSLContext, SSLSocketFactory, X509TrustManager} - -import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} -import com.fasterxml.jackson.module.scala.DefaultScalaModule -import feign.{Client, Feign, Request, Response} -import feign.Request.Options -import feign.jackson.{JacksonDecoder, JacksonEncoder} -import feign.jaxrs.JAXRSContract -import io.fabric8.kubernetes.client.Config -import okhttp3.OkHttpClient -import scala.reflect.ClassTag - -import org.apache.spark.SparkException -import org.apache.spark.internal.Logging -import org.apache.spark.status.api.v1.JacksonMessageWriter - -private[spark] object HttpClientUtil extends Logging { - - def createClient[T: ClassTag]( - uris: Set[String], - maxRetriesPerServer: Int = 1, - sslSocketFactory: SSLSocketFactory = SSLContext.getDefault.getSocketFactory, - trustContext: X509TrustManager = null, - readTimeoutMillis: Int = 20000, - connectTimeoutMillis: Int = 20000): T = { - var httpClientBuilder = new OkHttpClient.Builder() - Option.apply(trustContext).foreach(context => { - httpClientBuilder = httpClientBuilder.sslSocketFactory(sslSocketFactory, context) - }) - val uriObjects = uris.map(URI.create) - val httpUris = uriObjects.filter(uri => uri.getScheme == "http") - val httpsUris = uriObjects.filter(uri => uri.getScheme == "https") - val maybeAllProxy = Option.apply(System.getProperty(Config.KUBERNETES_ALL_PROXY)) - val maybeHttpProxy = Option.apply(System.getProperty(Config.KUBERNETES_HTTP_PROXY)) - .orElse(maybeAllProxy) - .map(uriStringToProxy) - val maybeHttpsProxy = Option.apply(System.getProperty(Config.KUBERNETES_HTTPS_PROXY)) - .orElse(maybeAllProxy) - .map(uriStringToProxy) - val maybeNoProxy = Option.apply(System.getProperty(Config.KUBERNETES_NO_PROXY)) - .map(_.split(",")) - .toSeq - .flatten - val proxySelector = new ProxySelector { - override def select(uri: URI): java.util.List[java.net.Proxy] = { - val directProxy = java.net.Proxy.NO_PROXY - val resolvedProxy = maybeNoProxy.find( _ == uri.getHost) - .map( _ => directProxy) - .orElse(uri.getScheme match { - case "http" => - logDebug(s"Looking up http proxies to route $uri") - maybeHttpProxy.filter { _ => - matchingUriExists(uri, httpUris) - } - case "https" => - logDebug(s"Looking up https proxies to route $uri") - maybeHttpsProxy.filter { _ => - matchingUriExists(uri, httpsUris) - } - case _ => None - }).getOrElse(directProxy) - logDebug(s"Routing $uri through ${resolvedProxy.address()} with proxy" + - s" type ${resolvedProxy.`type`()}") - Collections.singletonList(resolvedProxy) - } - - override def connectFailed(uri: URI, sa: SocketAddress, ioe: IOException) = { - throw new SparkException(s"Failed to connect to proxy through uri $uri," + - s" socket address: $sa", ioe) - } - } - httpClientBuilder = httpClientBuilder.proxySelector(proxySelector) - val objectMapper = new ObjectMapper() - .registerModule(new DefaultScalaModule) - .setDateFormat(JacksonMessageWriter.makeISODateFormat) - objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) - val target = new MultiServerFeignTarget[T](uris.toSeq, maxRetriesPerServer) - val baseHttpClient = new feign.okhttp.OkHttpClient(httpClientBuilder.build()) - val resetTargetHttpClient = new Client { - override def execute(request: Request, options: Options): Response = { - val response = baseHttpClient.execute(request, options) - if (response.status() / 100 == 2) { - target.reset() - } - response - } - } - Feign.builder() - .client(resetTargetHttpClient) - .contract(new JAXRSContract) - .encoder(new JacksonEncoder(objectMapper)) - .decoder(new JacksonDecoder(objectMapper)) - .options(new Options(connectTimeoutMillis, readTimeoutMillis)) - .retryer(target) - .target(target) - } - - private def matchingUriExists(uri: URI, httpUris: Set[URI]): Boolean = { - httpUris.exists(httpUri => { - httpUri.getScheme == uri.getScheme && httpUri.getHost == uri.getHost && - httpUri.getPort == uri.getPort - }) - } - - private def uriStringToProxy(uriString: String): java.net.Proxy = { - val uriObject = URI.create(uriString) - new java.net.Proxy(java.net.Proxy.Type.HTTP, - new InetSocketAddress(uriObject.getHost, uriObject.getPort)) - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala deleted file mode 100644 index bdd4a85da8f85..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.rest.kubernetes.v1 - -import com.fasterxml.jackson.annotation.{JsonIgnore, JsonSubTypes, JsonTypeInfo} - -import org.apache.spark.SPARK_VERSION -import org.apache.spark.deploy.kubernetes.KubernetesCredentials -import org.apache.spark.deploy.rest.{SubmitRestProtocolRequest, SubmitRestProtocolResponse} -import org.apache.spark.util.Utils - -case class KubernetesCreateSubmissionRequest( - appResource: AppResource, - mainClass: String, - appArgs: Array[String], - sparkProperties: Map[String, String], - secret: String, - driverPodKubernetesCredentials: KubernetesCredentials, - uploadedJarsBase64Contents: TarGzippedData, - uploadedFilesBase64Contents: TarGzippedData) extends SubmitRestProtocolRequest { - @JsonIgnore - override val messageType: String = s"kubernetes.v1.${Utils.getFormattedClassName(this)}" - override val action = messageType - message = "create" - clientSparkVersion = SPARK_VERSION -} - -case class TarGzippedData( - dataBase64: String, - blockSize: Int = 10240, - recordSize: Int = 512, - encoding: String -) - -@JsonTypeInfo( - use = JsonTypeInfo.Id.NAME, - include = JsonTypeInfo.As.PROPERTY, - property = "type") -@JsonSubTypes(value = Array( - new JsonSubTypes.Type(value = classOf[UploadedAppResource], name = "UploadedAppResource"), - new JsonSubTypes.Type(value = classOf[ContainerAppResource], name = "ContainerLocalAppResource"), - new JsonSubTypes.Type(value = classOf[RemoteAppResource], name = "RemoteAppResource"))) -abstract class AppResource - -case class UploadedAppResource( - resourceBase64Contents: String, - name: String = "spark-app-resource") extends AppResource - -case class ContainerAppResource(resourcePath: String) extends AppResource - -case class RemoteAppResource(resource: String) extends AppResource - -class PingResponse extends SubmitRestProtocolResponse { - val text = "pong" - message = "pong" - serverSparkVersion = SPARK_VERSION - @JsonIgnore - override val messageType: String = s"kubernetes.v1.${Utils.getFormattedClassName(this)}" - override val action: String = messageType -} - diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala deleted file mode 100644 index 5cd24a8f9b75e..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala +++ /dev/null @@ -1,483 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.rest.kubernetes.v1 - -import java.io.{File, FileOutputStream, StringReader} -import java.net.URI -import java.nio.file.Paths -import java.security.SecureRandom -import java.util.concurrent.CountDownLatch -import java.util.concurrent.atomic.AtomicInteger -import javax.servlet.http.{HttpServletRequest, HttpServletResponse} - -import com.google.common.base.Charsets -import com.google.common.io.{BaseEncoding, ByteStreams, Files} -import org.apache.commons.codec.binary.Base64 -import org.apache.commons.lang3.RandomStringUtils -import scala.collection.mutable -import scala.collection.mutable.ArrayBuffer - -import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SparkException, SSLOptions} -import org.apache.spark.deploy.SparkHadoopUtil -import org.apache.spark.deploy.kubernetes.{CompressionUtils, KubernetesCredentials} -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils -import org.apache.spark.deploy.rest._ -import org.apache.spark.internal.config.OptionalConfigEntry -import org.apache.spark.util.{ShutdownHookManager, ThreadUtils, Utils} - -private case class KubernetesSparkRestServerArguments( - host: Option[String] = None, - port: Option[Int] = None, - useSsl: Boolean = false, - secretFile: Option[String] = None, - keyStoreFile: Option[String] = None, - keyStorePasswordFile: Option[String] = None, - keyStoreType: Option[String] = None, - keyPasswordFile: Option[String] = None, - keyPemFile: Option[String] = None, - certPemFile: Option[String] = None) { - def validate(): KubernetesSparkRestServerArguments = { - require(host.isDefined, "Hostname not set via --hostname.") - require(port.isDefined, "Port not set via --port") - require(secretFile.isDefined, "Secret file not set via --secret-file") - this - } -} - -private object KubernetesSparkRestServerArguments { - def fromArgsArray(inputArgs: Array[String]): KubernetesSparkRestServerArguments = { - var args = inputArgs.toList - var resolvedArguments = KubernetesSparkRestServerArguments() - while (args.nonEmpty) { - resolvedArguments = args match { - case "--hostname" :: value :: tail => - args = tail - resolvedArguments.copy(host = Some(value)) - case "--port" :: value :: tail => - args = tail - resolvedArguments.copy(port = Some(value.toInt)) - case "--secret-file" :: value :: tail => - args = tail - resolvedArguments.copy(secretFile = Some(value)) - case "--use-ssl" :: value :: tail => - args = tail - resolvedArguments.copy(useSsl = value.toBoolean) - case "--keystore-file" :: value :: tail => - args = tail - resolvedArguments.copy(keyStoreFile = Some(value)) - case "--keystore-password-file" :: value :: tail => - args = tail - resolvedArguments.copy(keyStorePasswordFile = Some(value)) - case "--keystore-type" :: value :: tail => - args = tail - resolvedArguments.copy(keyStoreType = Some(value)) - case "--keystore-key-password-file" :: value :: tail => - args = tail - resolvedArguments.copy(keyPasswordFile = Some(value)) - case "--key-pem-file" :: value :: tail => - args = tail - resolvedArguments.copy(keyPemFile = Some(value)) - case "--cert-pem-file" :: value :: tail => - args = tail - resolvedArguments.copy(certPemFile = Some(value)) - // TODO polish usage message - case Nil => resolvedArguments - case unknown => throw new IllegalStateException(s"Unknown argument(s) found: $unknown") - } - } - resolvedArguments.validate() - } -} - -/** - * Runs in the driver pod and receives a request to run an application. Note that - * unlike the submission rest server in standalone mode, this server is expected - * to be used to run one application only, and then shut down once that application - * is complete. - */ -private[spark] class KubernetesSparkRestServer( - host: String, - port: Int, - conf: SparkConf, - expectedApplicationSecret: Array[Byte], - shutdownLock: CountDownLatch, - exitCode: AtomicInteger, - sslOptions: SSLOptions = new SSLOptions) - extends RestSubmissionServer(host, port, conf, sslOptions) { - - private val SERVLET_LOCK = new Object - private val javaExecutable = s"${System.getenv("JAVA_HOME")}/bin/java" - private val sparkHome = System.getenv("SPARK_HOME") - private val securityManager = new SecurityManager(conf) - override protected lazy val contextToServlet = Map[String, RestServlet]( - s"$baseContext/create/*" -> submitRequestServlet, - s"$baseContext/ping/*" -> pingServlet) - - private val pingServlet = new PingServlet - override protected val submitRequestServlet: SubmitRequestServlet - = new KubernetesSubmitRequestServlet - // TODO - override protected val statusRequestServlet: StatusRequestServlet = null - override protected val killRequestServlet: KillRequestServlet = null - - private class PingServlet extends RestServlet { - protected override def doGet( - request: HttpServletRequest, - response: HttpServletResponse): Unit = { - sendResponse(new PingResponse, response) - } - } - - private class KubernetesSubmitRequestServlet extends SubmitRequestServlet { - - private val waitForProcessCompleteExecutor = ThreadUtils - .newDaemonSingleThreadExecutor("wait-for-spark-app-complete") - private var startedApplication = false - - // TODO validating the secret should be done as part of a header of the request. - // Instead here we have to specify the secret in the body. - override protected def handleSubmit( - requestMessageJson: String, - requestMessage: SubmitRestProtocolMessage, - responseServlet: HttpServletResponse): SubmitRestProtocolResponse = { - SERVLET_LOCK.synchronized { - if (startedApplication) { - throw new IllegalStateException("Application has already been submitted.") - } else { - requestMessage match { - case KubernetesCreateSubmissionRequest( - appResource, - mainClass, - appArgs, - sparkProperties, - secret, - driverPodKubernetesCredentials, - uploadedJars, - uploadedFiles) => - val decodedSecret = Base64.decodeBase64(secret) - if (!expectedApplicationSecret.sameElements(decodedSecret)) { - responseServlet.setStatus(HttpServletResponse.SC_UNAUTHORIZED) - handleError("Unauthorized to submit application.") - } else { - val tempDir = Utils.createTempDir() - val resolvedAppResource = resolveAppResource(appResource, tempDir) - val writtenJars = writeUploadedJars(uploadedJars, tempDir) - val writtenFiles = writeUploadedFiles(uploadedFiles) - val resolvedSparkProperties = new mutable.HashMap[String, String] - resolvedSparkProperties ++= sparkProperties - val originalJars = sparkProperties.get("spark.jars") - .map(_.split(",")) - .getOrElse(Array.empty) - - // The driver at this point has handed us the value of spark.jars verbatim as - // specified in spark-submit. At this point, remove all jars that were local - // to the submitting user's disk, and replace them with the paths that were - // written to disk above. - val onlyContainerLocalOrRemoteJars = KubernetesFileUtils - .getNonSubmitterLocalFiles(originalJars) - val resolvedJars = (writtenJars ++ - onlyContainerLocalOrRemoteJars ++ - Array(resolvedAppResource.sparkJarPath)).toSet - if (resolvedJars.nonEmpty) { - resolvedSparkProperties("spark.jars") = resolvedJars.mkString(",") - } else { - resolvedSparkProperties.remove("spark.jars") - } - - // Determining the driver classpath is similar. It's the combination of: - // - Jars written from uploads - // - Jars in (spark.jars + mainAppResource) that has a "local" prefix - // - spark.driver.extraClasspath - // - Spark core jars from the installation - val sparkCoreJars = new File(sparkHome, "jars").listFiles().map(_.getAbsolutePath) - val driverExtraClasspath = sparkProperties - .get("spark.driver.extraClassPath") - .map(_.split(",")) - .getOrElse(Array.empty[String]) - val onlyContainerLocalJars = KubernetesFileUtils - .getOnlyContainerLocalFiles(originalJars) - val driverClasspath = driverExtraClasspath ++ - Seq(resolvedAppResource.localPath) ++ - writtenJars ++ - onlyContainerLocalJars ++ - sparkCoreJars - - // Resolve spark.files similarly to spark.jars. - val originalFiles = sparkProperties.get("spark.files") - .map(_.split(",")) - .getOrElse(Array.empty[String]) - val onlyContainerLocalOrRemoteFiles = KubernetesFileUtils - .getNonSubmitterLocalFiles(originalFiles) - val resolvedFiles = writtenFiles ++ onlyContainerLocalOrRemoteFiles - if (resolvedFiles.nonEmpty) { - resolvedSparkProperties("spark.files") = resolvedFiles.mkString(",") - } else { - resolvedSparkProperties.remove("spark.files") - } - resolvedSparkProperties ++= writeKubernetesCredentials( - driverPodKubernetesCredentials, tempDir) - - val command = new ArrayBuffer[String] - command += javaExecutable - command += "-cp" - command += s"${driverClasspath.mkString(":")}" - for (prop <- resolvedSparkProperties) { - command += s"-D${prop._1}=${prop._2}" - } - val driverMemory = resolvedSparkProperties.getOrElse("spark.driver.memory", "1g") - command += s"-Xms$driverMemory" - command += s"-Xmx$driverMemory" - val extraJavaOpts = resolvedSparkProperties.get("spark.driver.extraJavaOptions") - .map(Utils.splitCommandString) - .getOrElse(Seq.empty) - command ++= extraJavaOpts - command += mainClass - command ++= appArgs - val pb = new ProcessBuilder(command: _*).inheritIO() - val process = pb.start() - ShutdownHookManager.addShutdownHook(() => { - logInfo("Received stop command, shutting down the running Spark application...") - process.destroy() - shutdownLock.countDown() - }) - waitForProcessCompleteExecutor.submit(new Runnable { - override def run(): Unit = { - // set the REST service's exit code to the exit code of the driver subprocess - exitCode.set(process.waitFor) - SERVLET_LOCK.synchronized { - logInfo("Spark application complete. Shutting down submission server...") - KubernetesSparkRestServer.this.stop - shutdownLock.countDown() - } - } - }) - startedApplication = true - val response = new CreateSubmissionResponse - response.success = true - response.submissionId = null - response.message = "success" - response.serverSparkVersion = sparkVersion - response - } - case unexpected => - responseServlet.setStatus(HttpServletResponse.SC_BAD_REQUEST) - handleError(s"Received message of unexpected type ${unexpected.messageType}.") - } - } - } - } - - private def writeUploadedJars(jars: TarGzippedData, rootTempDir: File): - Seq[String] = { - val resolvedDirectory = new File(rootTempDir, "jars") - if (!resolvedDirectory.mkdir()) { - throw new IllegalStateException(s"Failed to create jars dir at " + - resolvedDirectory.getAbsolutePath) - } - CompressionUtils.unpackAndWriteCompressedFiles(jars, resolvedDirectory) - } - - private def writeUploadedFiles(files: TarGzippedData): Seq[String] = { - val workingDir = Paths.get("").toFile.getAbsoluteFile - CompressionUtils.unpackAndWriteCompressedFiles(files, workingDir) - } - - private def writeKubernetesCredentials( - kubernetesCredentials: KubernetesCredentials, - rootTempDir: File): Map[String, String] = { - val resolvedDirectory = new File(rootTempDir, "kubernetes-credentials") - if (!resolvedDirectory.mkdir()) { - throw new IllegalStateException(s"Failed to create credentials dir at " - + resolvedDirectory.getAbsolutePath) - } - val oauthTokenFile = writeRawStringCredentialAndGetConf("oauth-token.txt", resolvedDirectory, - KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN, - kubernetesCredentials.oauthTokenBase64.map { base64 => - new String(BaseEncoding.base64().decode(base64), Charsets.UTF_8) - }) - val caCertFile = writeBase64CredentialAndGetConf("ca.crt", resolvedDirectory, - KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE, kubernetesCredentials.caCertDataBase64) - val clientKeyFile = writeBase64CredentialAndGetConf("key.key", resolvedDirectory, - KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE, kubernetesCredentials.clientKeyDataBase64) - val clientCertFile = writeBase64CredentialAndGetConf("cert.crt", resolvedDirectory, - KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE, kubernetesCredentials.clientCertDataBase64) - (oauthTokenFile ++ caCertFile ++ clientKeyFile ++ clientCertFile).toMap - } - - private def writeRawStringCredentialAndGetConf( - fileName: String, - dir: File, - conf: OptionalConfigEntry[String], - credential: Option[String]): Option[(String, String)] = { - credential.map { cred => - val credentialFile = new File(dir, fileName) - Files.write(cred, credentialFile, Charsets.UTF_8) - (conf.key, credentialFile.getAbsolutePath) - } - } - - private def writeBase64CredentialAndGetConf( - fileName: String, - dir: File, - conf: OptionalConfigEntry[String], - credential: Option[String]): Option[(String, String)] = { - credential.map { cred => - val credentialFile = new File(dir, fileName) - Files.write(BaseEncoding.base64().decode(cred), credentialFile) - (conf.key, credentialFile.getAbsolutePath) - } - } - - /** - * Retrieve the path on the driver container where the main app resource is, and what value it - * ought to have in the spark.jars property. The two may be different because for non-local - * dependencies, we have to fetch the resource (if it is not "local") but still want to use - * the full URI in spark.jars. - */ - private def resolveAppResource(appResource: AppResource, tempDir: File): - ResolvedAppResource = { - appResource match { - case UploadedAppResource(resourceContentsBase64, resourceName) => - val resourceFile = new File(tempDir, resourceName) - val resourceFilePath = resourceFile.getAbsolutePath - if (resourceFile.createNewFile()) { - Utils.tryWithResource(new StringReader(resourceContentsBase64)) { reader => - Utils.tryWithResource(new FileOutputStream(resourceFile)) { os => - Utils.tryWithResource(BaseEncoding.base64().decodingStream(reader)) { - decodingStream => - ByteStreams.copy(decodingStream, os) - } - } - } - ResolvedAppResource(resourceFile.getAbsolutePath, resourceFile.getAbsolutePath) - } else { - throw new IllegalStateException(s"Failed to write main app resource file" + - s" to $resourceFilePath") - } - case ContainerAppResource(resource) => - ResolvedAppResource(Utils.resolveURI(resource).getPath, resource) - case RemoteAppResource(resource) => - Utils.fetchFile(resource, tempDir, conf, - securityManager, SparkHadoopUtil.get.newConfiguration(conf), - System.currentTimeMillis(), useCache = false) - val fileName = Utils.decodeFileNameInURI(URI.create(resource)) - val downloadedFile = new File(tempDir, fileName) - val downloadedFilePath = downloadedFile.getAbsolutePath - if (!downloadedFile.isFile) { - throw new IllegalStateException(s"Main app resource is not a file or" + - s" does not exist at $downloadedFilePath") - } - ResolvedAppResource(downloadedFilePath, resource) - } - } - } - - private case class ResolvedAppResource(localPath: String, sparkJarPath: String) -} - -private[spark] object KubernetesSparkRestServer { - private val barrier = new CountDownLatch(1) - private val SECURE_RANDOM = new SecureRandom() - - def main(args: Array[String]): Unit = { - val parsedArguments = KubernetesSparkRestServerArguments.fromArgsArray(args) - val secretFile = new File(parsedArguments.secretFile.get) - require(secretFile.isFile, "Secret file specified by --secret-file is not a file, or" + - " does not exist.") - val sslOptions = if (parsedArguments.useSsl) { - validateSslOptions(parsedArguments) - val keyPassword = parsedArguments - .keyPasswordFile - .map(new File(_)) - .map(Files.toString(_, Charsets.UTF_8)) - // If key password isn't set but we're using PEM files, generate a password - .orElse(parsedArguments.keyPemFile.map(_ => randomPassword())) - val keyStorePassword = parsedArguments - .keyStorePasswordFile - .map(new File(_)) - .map(Files.toString(_, Charsets.UTF_8)) - // If keystore password isn't set but we're using PEM files, generate a password - .orElse(parsedArguments.keyPemFile.map(_ => randomPassword())) - val resolvedKeyStore = parsedArguments.keyStoreFile.map(new File(_)).orElse( - for { - keyPemFile <- parsedArguments.keyPemFile - certPemFile <- parsedArguments.certPemFile - resolvedKeyStorePassword <- keyStorePassword - resolvedKeyPassword <- keyPassword - } yield { - PemsToKeyStoreConverter.convertPemsToTempKeyStoreFile( - new File(keyPemFile), - new File(certPemFile), - "provided-key", - resolvedKeyStorePassword, - resolvedKeyPassword, - parsedArguments.keyStoreType) - }) - new SSLOptions( - enabled = true, - keyStore = resolvedKeyStore, - keyStoreType = parsedArguments.keyStoreType, - keyStorePassword = keyStorePassword, - keyPassword = keyPassword) - } else { - new SSLOptions - } - val secretBytes = Files.toByteArray(secretFile) - val sparkConf = new SparkConf(true) - val exitCode = new AtomicInteger(0) - val server = new KubernetesSparkRestServer( - parsedArguments.host.get, - parsedArguments.port.get, - sparkConf, - secretBytes, - barrier, - exitCode, - sslOptions) - server.start() - ShutdownHookManager.addShutdownHook(() => { - try { - server.stop() - } finally { - barrier.countDown() - } - }) - barrier.await() - System.exit(exitCode.get()) - } - - private def validateSslOptions(parsedArguments: KubernetesSparkRestServerArguments): Unit = { - parsedArguments.keyStoreFile.foreach { _ => - require(parsedArguments.keyPemFile.orElse(parsedArguments.certPemFile).isEmpty, - "Cannot provide both key/cert PEM files and a keyStore file; select one or the other" + - " for configuring SSL.") - } - parsedArguments.keyPemFile.foreach { _ => - require(parsedArguments.certPemFile.isDefined, - "When providing the key PEM file, the certificate PEM file must also be provided.") - } - parsedArguments.certPemFile.foreach { _ => - require(parsedArguments.keyPemFile.isDefined, - "When providing the certificate PEM file, the key PEM file must also be provided.") - } - } - - private def randomPassword(): String = { - RandomStringUtils.random(1024, 0, Integer.MAX_VALUE, false, false, null, SECURE_RANDOM) - } -} - diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/MultiServerFeignTarget.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/MultiServerFeignTarget.scala deleted file mode 100644 index 56ff82ea2fc33..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/MultiServerFeignTarget.scala +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.rest.kubernetes.v1 - -import feign.{Request, RequestTemplate, RetryableException, Retryer, Target} -import scala.reflect.ClassTag -import scala.util.Random - -import org.apache.spark.internal.Logging - -private[kubernetes] class MultiServerFeignTarget[T : ClassTag]( - private val servers: Seq[String], - private val maxRetriesPerServer: Int = 1, - private val delayBetweenRetriesMillis: Int = 1000) extends Target[T] with Retryer with Logging { - require(servers.nonEmpty, "Must provide at least one server URI.") - - private val threadLocalShuffledServers = new ThreadLocal[Seq[String]] { - override def initialValue(): Seq[String] = Random.shuffle(servers) - } - private val threadLocalCurrentAttempt = new ThreadLocal[Int] { - override def initialValue(): Int = 0 - } - - override def `type`(): Class[T] = { - implicitly[ClassTag[T]].runtimeClass.asInstanceOf[Class[T]] - } - - /** - * Cloning the target is done on every request, for use on the current - * thread - thus it's important that clone returns a "fresh" target. - */ - override def clone(): Retryer = { - reset() - this - } - - override def name(): String = { - s"${getClass.getSimpleName} with servers [${servers.mkString(",")}]" - } - - override def apply(requestTemplate: RequestTemplate): Request = { - if (!requestTemplate.url().startsWith("http")) { - requestTemplate.insert(0, url()) - } - requestTemplate.request() - } - - override def url(): String = threadLocalShuffledServers.get.head - - override def continueOrPropagate(e: RetryableException): Unit = { - threadLocalCurrentAttempt.set(threadLocalCurrentAttempt.get + 1) - val currentAttempt = threadLocalCurrentAttempt.get - if (threadLocalCurrentAttempt.get < maxRetriesPerServer) { - logWarning(s"Attempt $currentAttempt of $maxRetriesPerServer failed for" + - s" server ${url()}. Retrying request...", e) - Thread.sleep(delayBetweenRetriesMillis) - } else { - val previousUrl = url() - threadLocalShuffledServers.set(threadLocalShuffledServers.get.drop(1)) - if (threadLocalShuffledServers.get.isEmpty) { - logError(s"Failed request to all servers $maxRetriesPerServer times.", e) - throw e - } else { - logWarning(s"Failed request to $previousUrl $maxRetriesPerServer times." + - s" Trying to access ${url()} instead.", e) - threadLocalCurrentAttempt.set(0) - } - } - } - - def reset(): Unit = { - threadLocalShuffledServers.set(Random.shuffle(servers)) - threadLocalCurrentAttempt.set(0) - } -} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala index 886484ffb4692..8de0f56f007dc 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala @@ -30,7 +30,7 @@ import org.bouncycastle.cert.jcajce.{JcaX509CertificateConverter, JcaX509v3Certi import org.bouncycastle.openssl.jcajce.JcaPEMWriter import org.bouncycastle.operator.jcajce.JcaContentSignerBuilder -import org.apache.spark.deploy.kubernetes.submit.v2.{KeyAndCertPem, KeyStoreAndTrustStore} +import org.apache.spark.deploy.kubernetes.submit.{KeyAndCertPem, KeyStoreAndTrustStore} import org.apache.spark.util.Utils private[spark] object SSLUtils { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala similarity index 99% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala index 9ad46e52747fd..d4d3882bb8bab 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.io.File @@ -35,7 +35,6 @@ import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.deploy.kubernetes.SparkPodInitContainerBootstrap import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.submit.LoggingPodStatusWatcher class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val JARS_RESOURCE = SubmittedResourceIdAndSecret("jarsId", "jarsSecret") diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolverSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala similarity index 98% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolverSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala index 6804f0010b6a5..ca5cd1fff9b74 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolverSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import org.apache.spark.SparkFunSuite diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala similarity index 99% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala index d4413076fb092..c1005a176408c 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala @@ -14,16 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import io.fabric8.kubernetes.api.model.{PodBuilder, SecretBuilder} import org.scalatest.prop.TableDrivenPropertyChecks import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.KubernetesCredentials import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.KubernetesCredentials class DriverPodKubernetesCredentialsMounterSuite extends SparkFunSuite with TableDrivenPropertyChecks { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfigurationSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfigurationSuite.scala similarity index 97% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfigurationSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfigurationSuite.scala index 62bfd127d17e2..ead1d49b8a37c 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfigurationSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfigurationSuite.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.deploy.kubernetes.config._ diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SSLFilePairs.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SSLFilePairs.scala similarity index 94% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SSLFilePairs.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SSLFilePairs.scala index 3d3ff7ad7011a..5240128743b76 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SSLFilePairs.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SSLFilePairs.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.io.File diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilderSuite.scala similarity index 98% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilderSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilderSuite.scala index 7c6fbf5ce6da2..f1e1ff7013496 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilderSuite.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.io.StringReader import java.util.Properties diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPluginSuite.scala similarity index 96% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPluginSuite.scala index 09b41dc1bcaaf..8431b77c9e85f 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPluginSuite.scala @@ -14,11 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit -import java.io.File - -import org.apache.spark.{SparkFunSuite, SSLOptions} +import org.apache.spark.SparkFunSuite import org.apache.spark.deploy.kubernetes.config._ class SubmittedDependencyInitContainerConfigPluginSuite extends SparkFunSuite { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilderSuite.scala similarity index 97% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilderSuite.scala index 358edbecf8708..83fd568e7a3aa 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilderSuite.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.io.File @@ -24,7 +24,7 @@ import io.fabric8.kubernetes.api.model.Secret import scala.collection.JavaConverters._ import scala.collection.Map -import org.apache.spark.{SparkFunSuite, SSLOptions} +import org.apache.spark.SparkFunSuite import org.apache.spark.util.Utils class SubmittedDependencySecretBuilderSuite extends SparkFunSuite { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala similarity index 97% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala index 7b259aa2c3a0c..8693ff4e15372 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File} import java.util.UUID @@ -35,7 +35,7 @@ import retrofit2.{Call, Response} import org.apache.spark.{SparkFunSuite, SSLOptions} import org.apache.spark.deploy.kubernetes.CompressionUtils -import org.apache.spark.deploy.rest.kubernetes.v2.{ResourceStagingServiceRetrofit, RetrofitClientFactory} +import org.apache.spark.deploy.rest.kubernetes.{ResourceStagingServiceRetrofit, RetrofitClientFactory} import org.apache.spark.util.Utils private[spark] class SubmittedDependencyUploaderSuite extends SparkFunSuite with BeforeAndAfter { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainerSuite.scala similarity index 98% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainerSuite.scala index c551fbc01d060..f2fdf026390cd 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainerSuite.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.io.{ByteArrayOutputStream, File} import java.util.UUID @@ -32,7 +32,7 @@ import org.scalatest.BeforeAndAfter import org.scalatest.mock.MockitoSugar._ import retrofit2.{Call, Callback, Response} -import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkFunSuite, SSLOptions} +import org.apache.spark.{SparkConf, SparkFunSuite, SSLOptions} import org.apache.spark.deploy.kubernetes.CompressionUtils import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.util.Utils diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSslOptionsProviderSuite.scala similarity index 99% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSslOptionsProviderSuite.scala index c33d8beb2c397..3bb318d713a54 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSslOptionsProviderSuite.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.io.{File, FileInputStream, StringWriter} import java.security.KeyStore diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala similarity index 99% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala index 4ffb0d4dfa887..0604e0d6494ae 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.net.ServerSocket import javax.ws.rs.core.MediaType diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImplSuite.scala similarity index 98% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImplSuite.scala index 9677d12681a16..53396a3f27a1a 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImplSuite.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.io.{ByteArrayInputStream, File} import java.nio.file.Paths diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-v2/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-v2/Dockerfile deleted file mode 100644 index 40f9459dc06dc..0000000000000 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-v2/Dockerfile +++ /dev/null @@ -1,43 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -FROM openjdk:8-alpine - -# If this docker file is being used in the context of building your images from a Spark distribution, the docker build -# command should be invoked from the top level directory of the Spark distribution. E.g.: -# docker build -t spark-driver:latest -f dockerfiles/driver/Dockerfile . - -RUN apk upgrade --update -RUN apk add --update bash -RUN mkdir -p /opt/spark -RUN touch /opt/spark/RELEASE - -ADD jars /opt/spark/jars -ADD examples /opt/spark/examples -ADD bin /opt/spark/bin -ADD sbin /opt/spark/sbin -ADD conf /opt/spark/conf - -ENV SPARK_HOME /opt/spark - -WORKDIR /opt/spark - -CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ - if ! [ -z ${SPARK_MOUNTED_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ - if ! [ -z ${SPARK_SUBMIT_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_SUBMIT_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ - if ! [ -z ${SPARK_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ - exec ${JAVA_HOME}/bin/java $SPARK_DRIVER_JAVA_OPTS -cp $SPARK_CLASSPATH -Xms$SPARK_DRIVER_MEMORY -Xmx$SPARK_DRIVER_MEMORY $SPARK_DRIVER_CLASS $SPARK_DRIVER_ARGS diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index 8ab7a58704505..40f9459dc06dc 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -36,16 +36,8 @@ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark -CMD SSL_ARGS="" && \ - if ! [ -z ${SPARK_SUBMISSION_USE_SSL+x} ]; then SSL_ARGS="$SSL_ARGS --use-ssl $SPARK_SUBMISSION_USE_SSL"; fi && \ - if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-file $SPARK_SUBMISSION_KEYSTORE_FILE"; fi && \ - if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_TYPE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-type $SPARK_SUBMISSION_KEYSTORE_TYPE"; fi && \ - if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-password-file $SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE"; fi && \ - if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-key-password-file $SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE"; fi && \ - if ! [ -z ${SPARK_SUBMISSION_KEY_PEM_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --key-pem-file $SPARK_SUBMISSION_KEY_PEM_FILE"; fi && \ - if ! [ -z ${SPARK_SUBMISSION_CERT_PEM_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --cert-pem-file $SPARK_SUBMISSION_CERT_PEM_FILE"; fi && \ - exec bin/spark-class org.apache.spark.deploy.rest.kubernetes.v1.KubernetesSparkRestServer \ - --hostname $HOSTNAME \ - --port $SPARK_SUBMISSION_SERVER_PORT \ - --secret-file $SPARK_SUBMISSION_SECRET_LOCATION \ - ${SSL_ARGS} +CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ + if ! [ -z ${SPARK_MOUNTED_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_SUBMIT_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_SUBMIT_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + exec ${JAVA_HOME}/bin/java $SPARK_DRIVER_JAVA_OPTS -cp $SPARK_CLASSPATH -Xms$SPARK_DRIVER_MEMORY -Xmx$SPARK_DRIVER_MEMORY $SPARK_DRIVER_CLASS $SPARK_DRIVER_ARGS diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-init/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile similarity index 95% rename from resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-init/Dockerfile rename to resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile index 59029a6c08b4a..bb249a4ea86b6 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-init/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile @@ -35,4 +35,4 @@ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark -ENTRYPOINT [ "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.v2.KubernetesSparkDependencyDownloadInitContainer" ] +ENTRYPOINT [ "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.KubernetesSparkDependencyDownloadInitContainer" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile index 15e1ce75815df..125749c71c79a 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile @@ -35,4 +35,4 @@ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark -ENTRYPOINT [ "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.v2.ResourceStagingServer" ] +ENTRYPOINT [ "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.ResourceStagingServer" ] diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 56fcf692b8ff7..d23bfcdbc5251 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -17,31 +17,257 @@ package org.apache.spark.deploy.kubernetes.integrationtest import java.nio.file.Paths +import java.util.UUID import com.google.common.base.Charsets import com.google.common.io.Files -import org.scalatest.Suite -import org.scalatest.concurrent.PatienceConfiguration +import io.fabric8.kubernetes.client.internal.readiness.Readiness +import org.scalatest.BeforeAndAfter +import org.scalatest.concurrent.{Eventually, PatienceConfiguration} import org.scalatest.time.{Minutes, Seconds, Span} +import scala.collection.JavaConverters._ -import org.apache.spark.SparkFunSuite -import org.apache.spark.deploy.kubernetes.integrationtest.backend.{IntegrationTestBackend, IntegrationTestBackendFactory} +import org.apache.spark.{SparkConf, SparkFunSuite, SSLOptions} +import org.apache.spark.deploy.kubernetes.SSLUtils +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackendFactory +import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube +import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND +import org.apache.spark.deploy.kubernetes.submit.{Client, KeyAndCertPem} +import org.apache.spark.launcher.SparkLauncher -private[spark] class KubernetesSuite extends SparkFunSuite { - private val testBackend: IntegrationTestBackend = IntegrationTestBackendFactory.getTestBackend() +private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { + import KubernetesSuite._ + private val testBackend = IntegrationTestBackendFactory.getTestBackend() + + private val APP_LOCATOR_LABEL = UUID.randomUUID().toString.replaceAll("-", "") + private var kubernetesTestComponents: KubernetesTestComponents = _ + private var sparkConf: SparkConf = _ + private var resourceStagingServerLauncher: ResourceStagingServerLauncher = _ + private var staticAssetServerLauncher: StaticAssetServerLauncher = _ override def beforeAll(): Unit = { testBackend.initialize() + kubernetesTestComponents = new KubernetesTestComponents(testBackend.getKubernetesClient) + resourceStagingServerLauncher = new ResourceStagingServerLauncher( + kubernetesTestComponents.kubernetesClient.inNamespace(kubernetesTestComponents.namespace)) + staticAssetServerLauncher = new StaticAssetServerLauncher( + kubernetesTestComponents.kubernetesClient.inNamespace(kubernetesTestComponents.namespace)) } override def afterAll(): Unit = { testBackend.cleanUp() } - override def nestedSuites: scala.collection.immutable.IndexedSeq[Suite] = { - Vector( - new KubernetesV1Suite(testBackend), - new KubernetesV2Suite(testBackend)) + before { + sparkConf = kubernetesTestComponents.newSparkConf() + .set(INIT_CONTAINER_DOCKER_IMAGE, s"spark-init:latest") + .set(DRIVER_DOCKER_IMAGE, s"spark-driver:latest") + .set(KUBERNETES_DRIVER_LABELS, s"spark-app-locator=$APP_LOCATOR_LABEL") + kubernetesTestComponents.createNamespace() + } + + after { + kubernetesTestComponents.deleteNamespace() + } + + test("Simple submission test with the resource staging server.") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + + launchStagingServer(SSLOptions(), None) + runSparkPiAndVerifyCompletion(SUBMITTER_LOCAL_MAIN_APP_RESOURCE) + } + + test("Enable SSL on the resource staging server") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + + val keyStoreAndTrustStore = SSLUtils.generateKeyStoreTrustStorePair( + ipAddress = Minikube.getMinikubeIp, + keyStorePassword = "keyStore", + keyPassword = "key", + trustStorePassword = "trustStore") + sparkConf.set(RESOURCE_STAGING_SERVER_SSL_ENABLED, true) + .set("spark.ssl.kubernetes.resourceStagingServer.keyStore", + keyStoreAndTrustStore.keyStore.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.trustStore", + keyStoreAndTrustStore.trustStore.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.keyStorePassword", "keyStore") + .set("spark.ssl.kubernetes.resourceStagingServer.keyPassword", "key") + .set("spark.ssl.kubernetes.resourceStagingServer.trustStorePassword", "trustStore") + launchStagingServer(SSLOptions( + enabled = true, + keyStore = Some(keyStoreAndTrustStore.keyStore), + trustStore = Some(keyStoreAndTrustStore.trustStore), + keyStorePassword = Some("keyStore"), + keyPassword = Some("key"), + trustStorePassword = Some("trustStore")), + None) + runSparkPiAndVerifyCompletion(SUBMITTER_LOCAL_MAIN_APP_RESOURCE) + } + + test("Use container-local resources without the resource staging server") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + + sparkConf.setJars(Seq(CONTAINER_LOCAL_HELPER_JAR_PATH)) + runSparkPiAndVerifyCompletion(CONTAINER_LOCAL_MAIN_APP_RESOURCE) + } + + test("Dynamic executor scaling basic test") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + + launchStagingServer(SSLOptions(), None) + createShuffleServiceDaemonSet() + + sparkConf.setJars(Seq(CONTAINER_LOCAL_HELPER_JAR_PATH)) + sparkConf.set("spark.dynamicAllocation.enabled", "true") + sparkConf.set("spark.shuffle.service.enabled", "true") + sparkConf.set("spark.kubernetes.shuffle.labels", "app=spark-shuffle-service") + sparkConf.set("spark.kubernetes.shuffle.namespace", kubernetesTestComponents.namespace) + sparkConf.set("spark.app.name", "group-by-test") + runSparkGroupByTestAndVerifyCompletion(SUBMITTER_LOCAL_MAIN_APP_RESOURCE) + } + + test("Use remote resources without the resource staging server.") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + val assetServerUri = staticAssetServerLauncher.launchStaticAssetServer() + sparkConf.setJars(Seq( + s"$assetServerUri/${EXAMPLES_JAR_FILE.getName}", + s"$assetServerUri/${HELPER_JAR_FILE.getName}" + )) + runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) + } + + test("Mix remote resources with submitted ones.") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + launchStagingServer(SSLOptions(), None) + val assetServerUri = staticAssetServerLauncher.launchStaticAssetServer() + sparkConf.setJars(Seq( + SUBMITTER_LOCAL_MAIN_APP_RESOURCE, s"$assetServerUri/${HELPER_JAR_FILE.getName}" + )) + runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) + } + + test("Use key and certificate PEM files for TLS.") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + val keyAndCertificate = SSLUtils.generateKeyCertPemPair(Minikube.getMinikubeIp) + launchStagingServer( + SSLOptions(enabled = true), + Some(keyAndCertificate)) + sparkConf.set(RESOURCE_STAGING_SERVER_SSL_ENABLED, true) + .set( + RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM.key, keyAndCertificate.certPem.getAbsolutePath) + runSparkPiAndVerifyCompletion(SUBMITTER_LOCAL_MAIN_APP_RESOURCE) + } + + test("Use client key and client cert file when requesting executors") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + sparkConf.setJars(Seq( + CONTAINER_LOCAL_MAIN_APP_RESOURCE, + CONTAINER_LOCAL_HELPER_JAR_PATH)) + sparkConf.set(KUBERNETES_DRIVER_CLIENT_KEY_FILE, + kubernetesTestComponents.clientConfig.getClientKeyFile) + sparkConf.set(KUBERNETES_DRIVER_CLIENT_CERT_FILE, + kubernetesTestComponents.clientConfig.getClientCertFile) + sparkConf.set(KUBERNETES_DRIVER_CA_CERT_FILE, + kubernetesTestComponents.clientConfig.getCaCertFile) + runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) + } + + private def launchStagingServer( + resourceStagingServerSslOptions: SSLOptions, keyAndCertPem: Option[KeyAndCertPem]): Unit = { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + + val resourceStagingServerPort = resourceStagingServerLauncher.launchStagingServer( + resourceStagingServerSslOptions, keyAndCertPem) + val resourceStagingServerUriScheme = if (resourceStagingServerSslOptions.enabled) { + "https" + } else { + "http" + } + sparkConf.set(RESOURCE_STAGING_SERVER_URI, + s"$resourceStagingServerUriScheme://" + + s"${Minikube.getMinikubeIp}:$resourceStagingServerPort") + } + + private def runSparkPiAndVerifyCompletion(appResource: String): Unit = { + Client.run(sparkConf, appResource, SPARK_PI_MAIN_CLASS, Array.empty[String]) + val driverPod = kubernetesTestComponents.kubernetesClient + .pods() + .withLabel("spark-app-locator", APP_LOCATOR_LABEL) + .list() + .getItems + .get(0) + Eventually.eventually(TIMEOUT, INTERVAL) { + assert(kubernetesTestComponents.kubernetesClient + .pods() + .withName(driverPod.getMetadata.getName) + .getLog + .contains("Pi is roughly 3"), "The application did not compute the value of pi.") + } + } + + private def runSparkGroupByTestAndVerifyCompletion(appResource: String): Unit = { + Client.run( + sparkConf = sparkConf, + appArgs = Array.empty[String], + mainClass = GROUP_BY_MAIN_CLASS, + mainAppResource = appResource) + val driverPod = kubernetesTestComponents.kubernetesClient + .pods() + .withLabel("spark-app-locator", APP_LOCATOR_LABEL) + .list() + .getItems + .get(0) + Eventually.eventually(TIMEOUT, INTERVAL) { + assert(kubernetesTestComponents.kubernetesClient + .pods() + .withName(driverPod.getMetadata.getName) + .getLog + .contains("The Result is"), "The application did not complete.") + } + } + + private def createShuffleServiceDaemonSet(): Unit = { + val ds = kubernetesTestComponents.kubernetesClient.extensions().daemonSets() + .createNew() + .withNewMetadata() + .withName("shuffle") + .endMetadata() + .withNewSpec() + .withNewTemplate() + .withNewMetadata() + .withLabels(Map("app" -> "spark-shuffle-service").asJava) + .endMetadata() + .withNewSpec() + .addNewVolume() + .withName("shuffle-dir") + .withNewHostPath() + .withPath("/tmp") + .endHostPath() + .endVolume() + .addNewContainer() + .withName("shuffle") + .withImage("spark-shuffle:latest") + .withImagePullPolicy("IfNotPresent") + .addNewVolumeMount() + .withName("shuffle-dir") + .withMountPath("/tmp") + .endVolumeMount() + .endContainer() + .endSpec() + .endTemplate() + .endSpec() + .done() + + // wait for daemonset to become available. + Eventually.eventually(TIMEOUT, INTERVAL) { + val pods = kubernetesTestComponents.kubernetesClient.pods() + .withLabel("app", "spark-shuffle-service").list().getItems + + if (pods.size() == 0 || !Readiness.isReady(pods.get(0))) { + throw ShuffleNotReadyException + } + } } } @@ -70,5 +296,5 @@ private[spark] object KubernetesSuite { val GROUP_BY_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + ".integrationtest.jobs.GroupByTest" - case class ShuffleNotReadyException() extends Exception + case object ShuffleNotReadyException extends Exception } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala index 677c0db606a47..9ae0d9ade7dc2 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala @@ -17,18 +17,13 @@ package org.apache.spark.deploy.kubernetes.integrationtest import java.util.UUID -import javax.net.ssl.X509TrustManager - -import scala.collection.JavaConverters._ -import scala.reflect.ClassTag import io.fabric8.kubernetes.client.DefaultKubernetesClient -import io.fabric8.kubernetes.client.internal.SSLUtils import org.scalatest.concurrent.Eventually +import scala.collection.JavaConverters._ import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.rest.kubernetes.v1.HttpClientUtil private[spark] class KubernetesTestComponents(defaultClient: DefaultKubernetesClient) { @@ -73,26 +68,4 @@ private[spark] class KubernetesTestComponents(defaultClient: DefaultKubernetesCl .set("spark.testing", "false") .set(WAIT_FOR_APP_COMPLETION, false) } - - def getService[T: ClassTag]( - serviceName: String, - namespace: String, - servicePortName: String, - servicePath: String = ""): T = synchronized { - val kubernetesMaster = s"${defaultClient.getMasterUrl}" - - val url = s"${ - Array[String]( - s"${kubernetesClient.getMasterUrl}", - "api", "v1", "proxy", - "namespaces", namespace, - "services", serviceName).mkString("/") - }" + - s":$servicePortName$servicePath" - val userHome = System.getProperty("user.home") - val kubernetesConf = kubernetesClient.getConfiguration - val sslContext = SSLUtils.sslContext(kubernetesConf) - val trustManager = SSLUtils.trustManagers(kubernetesConf)(0).asInstanceOf[X509TrustManager] - HttpClientUtil.createClient[T](Set(url), 5, sslContext.getSocketFactory, trustManager) - } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala deleted file mode 100644 index 559cb281c7c62..0000000000000 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala +++ /dev/null @@ -1,339 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.integrationtest - -import java.util.concurrent.TimeUnit - -import scala.collection.JavaConverters._ - -import com.google.common.collect.ImmutableList -import com.google.common.util.concurrent.SettableFuture -import io.fabric8.kubernetes.api.model.Pod -import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} -import io.fabric8.kubernetes.client.Watcher.Action -import org.scalatest.{BeforeAndAfter, DoNotDiscover} -import org.scalatest.concurrent.Eventually - -import org.apache.spark.{SparkConf, SparkException, SparkFunSuite} -import org.apache.spark.deploy.kubernetes.SSLUtils -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackend -import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube -import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND -import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 -import org.apache.spark.deploy.kubernetes.submit.v1.{Client, ExternalSuppliedUrisDriverServiceManager} -import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} -import org.apache.spark.util.Utils - -@DoNotDiscover -private[spark] class KubernetesV1Suite(testBackend: IntegrationTestBackend) - extends SparkFunSuite with BeforeAndAfter { - - private var kubernetesTestComponents: KubernetesTestComponents = _ - private var sparkConf: SparkConf = _ - - override def beforeAll(): Unit = { - kubernetesTestComponents = new KubernetesTestComponents(testBackend.getKubernetesClient) - kubernetesTestComponents.createNamespace() - } - - override def afterAll(): Unit = { - kubernetesTestComponents.deleteNamespace() - } - - before { - Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { - val podsList = kubernetesTestComponents.kubernetesClient.pods().list() - assert(podsList == null - || podsList.getItems == null - || podsList.getItems.isEmpty - ) - val servicesList = kubernetesTestComponents.kubernetesClient.services().list() - assert(servicesList == null - || servicesList.getItems == null - || servicesList.getItems.isEmpty) - } - sparkConf = kubernetesTestComponents.newSparkConf() - } - - after { - val pods = kubernetesTestComponents.kubernetesClient.pods().list().getItems.asScala - pods.par.foreach(pod => { - kubernetesTestComponents.kubernetesClient.pods() - .withName(pod.getMetadata.getName) - .withGracePeriod(60) - .delete - }) - } - - private def getSparkMetricsService(sparkBaseAppName: String): SparkRestApiV1 = { - val serviceName = kubernetesTestComponents.kubernetesClient.services() - .withLabel("spark-app-name", sparkBaseAppName) - .list() - .getItems - .get(0) - .getMetadata - .getName - kubernetesTestComponents.getService[SparkRestApiV1](serviceName, - kubernetesTestComponents.namespace, "spark-ui-port") - } - - private def expectationsForStaticAllocation(sparkMetricsService: SparkRestApiV1): Unit = { - val apps = Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { - val result = sparkMetricsService - .getApplications(ImmutableList.of(ApplicationStatus.RUNNING, ApplicationStatus.COMPLETED)) - assert(result.size == 1 - && !result.head.id.equalsIgnoreCase("appid") - && !result.head.id.equalsIgnoreCase("{appId}")) - result - } - Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { - val result = sparkMetricsService.getExecutors(apps.head.id) - assert(result.size == 2) - assert(result.count(exec => exec.id != "driver") == 1) - result - } - Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { - val result = sparkMetricsService.getStages( - apps.head.id, Seq(StageStatus.COMPLETE).asJava) - assert(result.size == 1) - result - } - } - - test("Run a simple example") { - new Client( - sparkConf = sparkConf, - mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, - mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val sparkMetricsService = getSparkMetricsService("spark-pi") - expectationsForStaticAllocation(sparkMetricsService) - } - - test("Run with the examples jar on the docker image") { - sparkConf.setJars(Seq(KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) - new Client( - sparkConf = sparkConf, - mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, - mainAppResource = KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val sparkMetricsService = getSparkMetricsService("spark-pi") - expectationsForStaticAllocation(sparkMetricsService) - } - - test("Run with custom labels and annotations") { - sparkConf.set(KUBERNETES_DRIVER_LABELS, "label1=label1value,label2=label2value") - sparkConf.set(KUBERNETES_DRIVER_ANNOTATIONS, "annotation1=annotation1value," + - "annotation2=annotation2value") - new Client( - sparkConf = sparkConf, - mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, - mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val driverPodMetadata = kubernetesTestComponents.kubernetesClient - .pods - .withLabel("spark-app-name", "spark-pi") - .list() - .getItems - .get(0) - .getMetadata - val driverPodLabels = driverPodMetadata.getLabels - // We can't match all of the selectors directly since one of the selectors is based on the - // launch time. - assert(driverPodLabels.size === 5, "Unexpected number of pod labels.") - assert(driverPodLabels.get("spark-app-name") === "spark-pi", "Unexpected value for" + - " spark-app-name label.") - assert(driverPodLabels.get("spark-app-id").startsWith("spark-pi"), "Unexpected value for" + - " spark-app-id label (should be prefixed with the app name).") - assert(driverPodLabels.get("label1") === "label1value", "Unexpected value for label1") - assert(driverPodLabels.get("label2") === "label2value", "Unexpected value for label2") - val driverPodAnnotations = driverPodMetadata.getAnnotations - assert(driverPodAnnotations.size === 2, "Unexpected number of pod annotations.") - assert(driverPodAnnotations.get("annotation1") === "annotation1value", - "Unexpected value for annotation1") - assert(driverPodAnnotations.get("annotation2") === "annotation2value", - "Unexpected value for annotation2") - } - - test("Run with driver pod name") { - sparkConf.set(KUBERNETES_DRIVER_POD_NAME, "spark-pi") - new Client( - sparkConf = sparkConf, - mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, - mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val driverPodMetadata = kubernetesTestComponents.kubernetesClient - .pods() - .withName("spark-pi") - .get() - .getMetadata() - val driverName = driverPodMetadata.getName - assert(driverName === "spark-pi", "Unexpected driver pod name.") - } - - test("Enable SSL on the driver submit server") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - - val keyStoreAndTrustStore = SSLUtils.generateKeyStoreTrustStorePair( - Minikube.getMinikubeIp, - "changeit", - "changeit", - "changeit") - sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE, - s"file://${keyStoreAndTrustStore.keyStore.getAbsolutePath}") - sparkConf.set("spark.ssl.kubernetes.driversubmitserver.keyStorePassword", "changeit") - sparkConf.set("spark.ssl.kubernetes.driversubmitserver.keyPassword", "changeit") - sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE, - s"file://${keyStoreAndTrustStore.trustStore.getAbsolutePath}") - sparkConf.set("spark.ssl.kubernetes.driversubmitserver.trustStorePassword", "changeit") - sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) - new Client( - sparkConf = sparkConf, - mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, - mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - } - - test("Enable SSL on the driver submit server using PEM files") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - - val keyAndCertPem = SSLUtils.generateKeyCertPemPair(Minikube.getMinikubeIp) - sparkConf.set(DRIVER_SUBMIT_SSL_KEY_PEM, s"file://${keyAndCertPem.keyPem.getAbsolutePath}") - sparkConf.set( - DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM, s"file://${keyAndCertPem.certPem.getAbsolutePath}") - sparkConf.set( - DRIVER_SUBMIT_SSL_SERVER_CERT_PEM, s"file://${keyAndCertPem.certPem.getAbsolutePath}") - sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) - new Client( - sparkConf = sparkConf, - mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, - mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - } - - test("Added files should exist on the driver.") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - - sparkConf.set("spark.files", KubernetesSuite.TEST_EXISTENCE_FILE.getAbsolutePath) - sparkConf.setAppName("spark-file-existence-test") - val podCompletedFuture = SettableFuture.create[Boolean] - val watch = new Watcher[Pod] { - override def eventReceived(action: Action, pod: Pod): Unit = { - val containerStatuses = pod.getStatus.getContainerStatuses.asScala - val allSuccessful = containerStatuses.nonEmpty && containerStatuses - .forall(status => { - status.getState.getTerminated != null && status.getState.getTerminated.getExitCode == 0 - }) - if (allSuccessful) { - podCompletedFuture.set(true) - } else { - val failedContainers = containerStatuses.filter(container => { - container.getState.getTerminated != null && - container.getState.getTerminated.getExitCode != 0 - }) - if (failedContainers.nonEmpty) { - podCompletedFuture.setException(new SparkException( - "One or more containers in the driver failed with a nonzero exit code.")) - } - } - } - - override def onClose(e: KubernetesClientException): Unit = { - logWarning("Watch closed", e) - } - } - Utils.tryWithResource(kubernetesTestComponents.kubernetesClient - .pods - .withLabel("spark-app-name", "spark-file-existence-test") - .watch(watch)) { _ => - new Client( - sparkConf = sparkConf, - mainClass = KubernetesSuite.FILE_EXISTENCE_MAIN_CLASS, - mainAppResource = KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array(KubernetesSuite.TEST_EXISTENCE_FILE.getName, - KubernetesSuite.TEST_EXISTENCE_FILE_CONTENTS)).run() - assert(podCompletedFuture.get(60, TimeUnit.SECONDS), "Failed to run driver pod") - val driverPod = kubernetesTestComponents.kubernetesClient - .pods - .withLabel("spark-app-name", "spark-file-existence-test") - .list() - .getItems - .get(0) - val podLog = kubernetesTestComponents.kubernetesClient - .pods - .withName(driverPod.getMetadata.getName) - .getLog - assert(podLog.contains(s"File found at" + - s" /opt/spark/${KubernetesSuite.TEST_EXISTENCE_FILE.getName} with correct contents."), - "Job did not find the file as expected.") - } - } - - test("Use external URI provider") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - - val externalUriProviderWatch = - new ExternalUriProviderWatch(kubernetesTestComponents.kubernetesClient) - Utils.tryWithResource(kubernetesTestComponents.kubernetesClient.services() - .withLabel("spark-app-name", "spark-pi") - .watch(externalUriProviderWatch)) { _ => - sparkConf.set(DRIVER_SERVICE_MANAGER_TYPE, ExternalSuppliedUrisDriverServiceManager.TYPE) - new Client( - sparkConf = sparkConf, - mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, - mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val sparkMetricsService = getSparkMetricsService("spark-pi") - expectationsForStaticAllocation(sparkMetricsService) - assert(externalUriProviderWatch.annotationSet.get) - val driverService = kubernetesTestComponents.kubernetesClient - .services() - .withLabel("spark-app-name", "spark-pi") - .list() - .getItems - .asScala(0) - assert(driverService.getMetadata.getAnnotations.containsKey(ANNOTATION_PROVIDE_EXTERNAL_URI), - "External URI request annotation was not set on the driver service.") - // Unfortunately we can't check the correctness of the actual value of the URI, as it depends - // on the driver submission port set on the driver service but we remove that port from the - // service once the submission is complete. - assert(driverService.getMetadata.getAnnotations.containsKey(ANNOTATION_RESOLVED_EXTERNAL_URI), - "Resolved URI annotation not set on driver service.") - } - } - - test("Mount the Kubernetes credentials onto the driver pod") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - - sparkConf.set(KUBERNETES_DRIVER_CA_CERT_FILE, - kubernetesTestComponents.clientConfig.getCaCertFile) - sparkConf.set(KUBERNETES_DRIVER_CLIENT_KEY_FILE, - kubernetesTestComponents.clientConfig.getClientKeyFile) - sparkConf.set(KUBERNETES_DRIVER_CLIENT_CERT_FILE, - kubernetesTestComponents.clientConfig.getClientCertFile) - new Client( - sparkConf = sparkConf, - mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, - mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val sparkMetricsService = getSparkMetricsService("spark-pi") - expectationsForStaticAllocation(sparkMetricsService) - } - -} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala deleted file mode 100644 index e9900b90cb588..0000000000000 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.integrationtest - -import java.util.UUID - -import io.fabric8.kubernetes.client.internal.readiness.Readiness -import org.scalatest.{BeforeAndAfter, DoNotDiscover} -import org.scalatest.concurrent.Eventually -import scala.collection.JavaConverters._ - -import org.apache.spark.{SparkConf, SparkFunSuite, SSLOptions} -import org.apache.spark.deploy.kubernetes.SSLUtils -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackend -import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube -import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND -import org.apache.spark.deploy.kubernetes.submit.v2.{Client, KeyAndCertPem} -import org.apache.spark.launcher.SparkLauncher - -@DoNotDiscover -private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) - extends SparkFunSuite with BeforeAndAfter { - - private val APP_LOCATOR_LABEL = UUID.randomUUID().toString.replaceAll("-", "") - private var kubernetesTestComponents: KubernetesTestComponents = _ - private var sparkConf: SparkConf = _ - private var resourceStagingServerLauncher: ResourceStagingServerLauncher = _ - private var staticAssetServerLauncher: StaticAssetServerLauncher = _ - - override def beforeAll(): Unit = { - kubernetesTestComponents = new KubernetesTestComponents(testBackend.getKubernetesClient) - resourceStagingServerLauncher = new ResourceStagingServerLauncher( - kubernetesTestComponents.kubernetesClient.inNamespace(kubernetesTestComponents.namespace)) - staticAssetServerLauncher = new StaticAssetServerLauncher( - kubernetesTestComponents.kubernetesClient.inNamespace(kubernetesTestComponents.namespace)) - } - - before { - sparkConf = kubernetesTestComponents.newSparkConf() - .set(INIT_CONTAINER_DOCKER_IMAGE, s"spark-driver-init:latest") - .set(DRIVER_DOCKER_IMAGE, s"spark-driver-v2:latest") - .set(KUBERNETES_DRIVER_LABELS, s"spark-app-locator=$APP_LOCATOR_LABEL") - kubernetesTestComponents.createNamespace() - } - - after { - kubernetesTestComponents.deleteNamespace() - } - - test("Use submission v2.") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - - launchStagingServer(SSLOptions(), None) - runSparkPiAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) - } - - test("Enable SSL on the submission server") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - - val keyStoreAndTrustStore = SSLUtils.generateKeyStoreTrustStorePair( - ipAddress = Minikube.getMinikubeIp, - keyStorePassword = "keyStore", - keyPassword = "key", - trustStorePassword = "trustStore") - sparkConf.set(RESOURCE_STAGING_SERVER_SSL_ENABLED, true) - .set("spark.ssl.kubernetes.resourceStagingServer.keyStore", - keyStoreAndTrustStore.keyStore.getAbsolutePath) - .set("spark.ssl.kubernetes.resourceStagingServer.trustStore", - keyStoreAndTrustStore.trustStore.getAbsolutePath) - .set("spark.ssl.kubernetes.resourceStagingServer.keyStorePassword", "keyStore") - .set("spark.ssl.kubernetes.resourceStagingServer.keyPassword", "key") - .set("spark.ssl.kubernetes.resourceStagingServer.trustStorePassword", "trustStore") - launchStagingServer(SSLOptions( - enabled = true, - keyStore = Some(keyStoreAndTrustStore.keyStore), - trustStore = Some(keyStoreAndTrustStore.trustStore), - keyStorePassword = Some("keyStore"), - keyPassword = Some("key"), - trustStorePassword = Some("trustStore")), - None) - runSparkPiAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) - } - - test("Use container-local resources without the resource staging server") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - - sparkConf.setJars(Seq( - KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) - runSparkPiAndVerifyCompletion(KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE) - } - - test("Dynamic executor scaling basic test") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - - launchStagingServer(SSLOptions(), None) - createShuffleServiceDaemonSet() - - sparkConf.setJars(Seq(KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) - sparkConf.set("spark.dynamicAllocation.enabled", "true") - sparkConf.set("spark.shuffle.service.enabled", "true") - sparkConf.set("spark.kubernetes.shuffle.labels", "app=spark-shuffle-service") - sparkConf.set("spark.kubernetes.shuffle.namespace", kubernetesTestComponents.namespace) - sparkConf.set("spark.app.name", "group-by-test") - runSparkGroupByTestAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) - } - - test("Use remote resources without the resource staging server.") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - val assetServerUri = staticAssetServerLauncher.launchStaticAssetServer() - sparkConf.setJars(Seq( - s"$assetServerUri/${KubernetesSuite.EXAMPLES_JAR_FILE.getName}", - s"$assetServerUri/${KubernetesSuite.HELPER_JAR_FILE.getName}" - )) - runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) - } - - test("Mix remote resources with submitted ones.") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - launchStagingServer(SSLOptions(), None) - val assetServerUri = staticAssetServerLauncher.launchStaticAssetServer() - sparkConf.setJars(Seq( - KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - s"$assetServerUri/${KubernetesSuite.HELPER_JAR_FILE.getName}" - )) - runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) - } - - test("Use key and certificate PEM files for TLS.") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - val keyAndCertificate = SSLUtils.generateKeyCertPemPair(Minikube.getMinikubeIp) - launchStagingServer( - SSLOptions(enabled = true), - Some(keyAndCertificate)) - sparkConf.set(RESOURCE_STAGING_SERVER_SSL_ENABLED, true) - .set( - RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM.key, keyAndCertificate.certPem.getAbsolutePath) - runSparkPiAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) - } - - test("Use client key and client cert file when requesting executors") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - sparkConf.setJars(Seq( - KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, - KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) - sparkConf.set(KUBERNETES_DRIVER_CLIENT_KEY_FILE, - kubernetesTestComponents.clientConfig.getClientKeyFile) - sparkConf.set(KUBERNETES_DRIVER_CLIENT_CERT_FILE, - kubernetesTestComponents.clientConfig.getClientCertFile) - sparkConf.set(KUBERNETES_DRIVER_CA_CERT_FILE, - kubernetesTestComponents.clientConfig.getCaCertFile) - runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) - } - - private def launchStagingServer( - resourceStagingServerSslOptions: SSLOptions, keyAndCertPem: Option[KeyAndCertPem]): Unit = { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - - val resourceStagingServerPort = resourceStagingServerLauncher.launchStagingServer( - resourceStagingServerSslOptions, keyAndCertPem) - val resourceStagingServerUriScheme = if (resourceStagingServerSslOptions.enabled) { - "https" - } else { - "http" - } - sparkConf.set(RESOURCE_STAGING_SERVER_URI, - s"$resourceStagingServerUriScheme://" + - s"${Minikube.getMinikubeIp}:$resourceStagingServerPort") - } - - private def runSparkPiAndVerifyCompletion(appResource: String): Unit = { - Client.run(sparkConf, appResource, KubernetesSuite.SPARK_PI_MAIN_CLASS, Array.empty[String]) - val driverPod = kubernetesTestComponents.kubernetesClient - .pods() - .withLabel("spark-app-locator", APP_LOCATOR_LABEL) - .list() - .getItems - .get(0) - Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { - assert(kubernetesTestComponents.kubernetesClient - .pods() - .withName(driverPod.getMetadata.getName) - .getLog - .contains("Pi is roughly 3"), "The application did not compute the value of pi.") - } - } - - private def runSparkGroupByTestAndVerifyCompletion(appResource: String): Unit = { - Client.run( - sparkConf = sparkConf, - appArgs = Array.empty[String], - mainClass = KubernetesSuite.GROUP_BY_MAIN_CLASS, - mainAppResource = appResource) - val driverPod = kubernetesTestComponents.kubernetesClient - .pods() - .withLabel("spark-app-locator", APP_LOCATOR_LABEL) - .list() - .getItems - .get(0) - Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { - assert(kubernetesTestComponents.kubernetesClient - .pods() - .withName(driverPod.getMetadata.getName) - .getLog - .contains("The Result is"), "The application did not complete.") - } - } - - private def createShuffleServiceDaemonSet(): Unit = { - val ds = kubernetesTestComponents.kubernetesClient.extensions().daemonSets() - .createNew() - .withNewMetadata() - .withName("shuffle") - .endMetadata() - .withNewSpec() - .withNewTemplate() - .withNewMetadata() - .withLabels(Map("app" -> "spark-shuffle-service").asJava) - .endMetadata() - .withNewSpec() - .addNewVolume() - .withName("shuffle-dir") - .withNewHostPath() - .withPath("/tmp") - .endHostPath() - .endVolume() - .addNewContainer() - .withName("shuffle") - .withImage("spark-shuffle:latest") - .withImagePullPolicy("IfNotPresent") - .addNewVolumeMount() - .withName("shuffle-dir") - .withMountPath("/tmp") - .endVolumeMount() - .endContainer() - .endSpec() - .endTemplate() - .endSpec() - .done() - - // wait for daemonset to become available. - Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { - val pods = kubernetesTestComponents.kubernetesClient.pods() - .withLabel("app", "spark-shuffle-service").list().getItems() - - if (pods.size() == 0 || Readiness.isReady(pods.get(0))) { - throw KubernetesSuite.ShuffleNotReadyException() - } - } - } -} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala index 1ba54c131c196..e5e1b1f085f9f 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala @@ -26,7 +26,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.SSLOptions import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.submit.v2.{ContainerNameEqualityPredicate, KeyAndCertPem} +import org.apache.spark.deploy.kubernetes.submit.{ContainerNameEqualityPredicate, KeyAndCertPem} import org.apache.spark.util.Utils /** diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala index 0692cf55db848..3ff72829f88a7 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala @@ -28,11 +28,10 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, private val DOCKER_BUILD_PATH = Paths.get("target", "docker") // Dockerfile paths must be relative to the build path. - private val DRIVER_V1_DOCKER_FILE = "dockerfiles/driver/Dockerfile" - private val DRIVER_V2_DOCKER_FILE = "dockerfiles/driver-v2/Dockerfile" + private val DRIVER_DOCKER_FILE = "dockerfiles/driver/Dockerfile" private val EXECUTOR_DOCKER_FILE = "dockerfiles/executor/Dockerfile" private val SHUFFLE_SERVICE_DOCKER_FILE = "dockerfiles/shuffle-service/Dockerfile" - private val DRIVER_INIT_DOCKER_FILE = "dockerfiles/driver-init/Dockerfile" + private val INIT_CONTAINER_DOCKER_FILE = "dockerfiles/init-container/Dockerfile" private val STAGING_SERVER_DOCKER_FILE = "dockerfiles/resource-staging-server/Dockerfile" private val STATIC_ASSET_SERVER_DOCKER_FILE = "dockerfiles/integration-test-asset-server/Dockerfile" @@ -61,12 +60,11 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, def buildSparkDockerImages(): Unit = { Eventually.eventually(TIMEOUT, INTERVAL) { dockerClient.ping() } - buildImage("spark-driver", DRIVER_V1_DOCKER_FILE) + buildImage("spark-driver", DRIVER_DOCKER_FILE) buildImage("spark-executor", EXECUTOR_DOCKER_FILE) buildImage("spark-shuffle", SHUFFLE_SERVICE_DOCKER_FILE) - buildImage("spark-driver-v2", DRIVER_V2_DOCKER_FILE) buildImage("spark-resource-staging-server", STAGING_SERVER_DOCKER_FILE) - buildImage("spark-driver-init", DRIVER_INIT_DOCKER_FILE) + buildImage("spark-init", INIT_CONTAINER_DOCKER_FILE) buildImage("spark-integration-test-asset-server", STATIC_ASSET_SERVER_DOCKER_FILE) } From 56414f9a56cecd02906fed38b098ecd921871c2c Mon Sep 17 00:00:00 2001 From: mccheah Date: Tue, 23 May 2017 16:38:07 -0700 Subject: [PATCH 114/156] Added files should be in the working directories. (#294) * Added files should be in the working directories. * Revert unintentional changes * Fix test --- docs/running-on-kubernetes.md | 16 ++++++ .../SparkPodInitContainerBootstrap.scala | 4 ++ .../spark/deploy/kubernetes/config.scala | 4 +- .../spark/deploy/kubernetes/constants.scala | 1 + .../SparkPodInitContainerBootstrapSuite.scala | 10 ++++ .../src/main/docker/driver/Dockerfile | 1 + .../src/main/docker/executor/Dockerfile | 1 + .../jobs/FileExistenceTest.scala | 13 ++--- .../integrationtest/KubernetesSuite.scala | 54 +++++++++++-------- .../KubernetesTestComponents.scala | 2 +- 10 files changed, 75 insertions(+), 31 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 98393cbbbba2d..b18987f6af4a4 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -558,6 +558,22 @@ from the other deployment modes. See the [configuration page](configuration.html disk as a secret into the init-containers. + + spark.kubernetes.mountdependencies.jarsDownloadDir + /var/spark-data/spark-jars + + Location to download jars to in the driver and executors. This will be mounted as an empty directory volume + into the driver and executor containers. + + + + spark.kubernetes.mountdependencies.filesDownloadDir + /var/spark-data/spark-files + + Location to download files to in the driver and executors. This will be mounted as an empty directory volume + into the driver and executor containers. + + spark.kubernetes.report.interval 1s diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala index 0d4e82566643d..a4d0aeb23d01f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala @@ -93,6 +93,10 @@ private[spark] class SparkPodInitContainerBootstrapImpl( .endVolume() .editMatchingContainer(new ContainerNameEqualityPredicate(mainContainerName)) .addToVolumeMounts(sharedVolumeMounts: _*) + .addNewEnv() + .withName(ENV_MOUNTED_FILES_DIR) + .withValue(filesDownloadPath) + .endEnv() .endContainer() .endSpec() resourceStagingServerSecretPlugin.map { plugin => diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index bcb9a96cae960..c892b01314975 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -447,7 +447,7 @@ package object config extends Logging { " spark-submit, this directory must be empty and will be mounted as an empty directory" + " volume on the driver and executor pod.") .stringConf - .createWithDefault("/var/spark-data/spark-submitted-jars") + .createWithDefault("/var/spark-data/spark-jars") private[spark] val INIT_CONTAINER_FILES_DOWNLOAD_LOCATION = ConfigBuilder("spark.kubernetes.mountdependencies.filesDownloadDir") @@ -455,7 +455,7 @@ package object config extends Logging { " spark-submit, this directory must be empty and will be mounted as an empty directory" + " volume on the driver and executor pods.") .stringConf - .createWithDefault("/var/spark-data/spark-submitted-files") + .createWithDefault("/var/spark-data/spark-files") private[spark] val INIT_CONTAINER_MOUNT_TIMEOUT = ConfigBuilder("spark.kubernetes.mountdependencies.mountTimeout") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index ea11ca2ec8f21..5515e88a50fb0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -92,6 +92,7 @@ package object constants { private[spark] val ENV_DRIVER_MAIN_CLASS = "SPARK_DRIVER_CLASS" private[spark] val ENV_DRIVER_ARGS = "SPARK_DRIVER_ARGS" private[spark] val ENV_DRIVER_JAVA_OPTS = "SPARK_DRIVER_JAVA_OPTS" + private[spark] val ENV_MOUNTED_FILES_DIR = "SPARK_MOUNTED_FILES_DIR" // Annotation keys private[spark] val ANNOTATION_PROVIDE_EXTERNAL_URI = diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala index 6db7d3ff2da53..3feba80f800c7 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala @@ -111,6 +111,16 @@ class SparkPodInitContainerBootstrapSuite extends SparkFunSuite with BeforeAndAf }) } + test("Files download path is set as environment variable") { + val bootstrappedPod = bootstrapPodWithoutSubmittedDependencies() + val containers = bootstrappedPod.getSpec.getContainers.asScala + val maybeMainContainer = containers.find(_.getName === MAIN_CONTAINER_NAME) + assert(maybeMainContainer.exists { mainContainer => + mainContainer.getEnv.asScala.exists(envVar => + envVar.getName == ENV_MOUNTED_FILES_DIR && envVar.getValue == FILES_DOWNLOAD_PATH) + }) + } + test("Running with submitted dependencies modifies the init container with the plugin.") { val bootstrappedPod = bootstrapPodWithSubmittedDependencies() val podAnnotations = bootstrappedPod.getMetadata.getAnnotations.asScala diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index 40f9459dc06dc..c4c75642c9d22 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -40,4 +40,5 @@ CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ if ! [ -z ${SPARK_MOUNTED_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_SUBMIT_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_SUBMIT_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_MOUNTED_FILES_DIR} ]; then cp -R "$SPARK_MOUNTED_FILES_DIR/." .; fi && \ exec ${JAVA_HOME}/bin/java $SPARK_DRIVER_JAVA_OPTS -cp $SPARK_CLASSPATH -Xms$SPARK_DRIVER_MEMORY -Xmx$SPARK_DRIVER_MEMORY $SPARK_DRIVER_CLASS $SPARK_DRIVER_ARGS diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile index c5f1c43ff7cf4..e345f10056522 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -40,4 +40,5 @@ WORKDIR /opt/spark CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ if ! [ -z ${SPARK_MOUNTED_CLASSPATH}+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_EXECUTOR_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXECUTOR_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_MOUNTED_FILES_DIR} ]; then cp -R "$SPARK_MOUNTED_FILES_DIR/." .; fi && \ exec ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp $SPARK_CLASSPATH org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/FileExistenceTest.scala b/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/FileExistenceTest.scala index 8b8d5e05f6479..8994c998bffee 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/FileExistenceTest.scala +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/FileExistenceTest.scala @@ -28,7 +28,9 @@ private[spark] object FileExistenceTest { def main(args: Array[String]): Unit = { if (args.length < 2) { - throw new IllegalArgumentException("Usage: WordCount ") + throw new IllegalArgumentException( + s"Invalid args: ${args.mkString}, " + + "Usage: FileExistenceTest ") } // Can't use SparkContext.textFile since the file is local to the driver val file = Paths.get(args(0)).toFile @@ -39,16 +41,15 @@ private[spark] object FileExistenceTest { val contents = Files.toString(file, Charsets.UTF_8) if (args(1) != contents) { throw new SparkException(s"Contents do not match. Expected: ${args(1)}," + - s" actual, $contents") + s" actual: $contents") } else { println(s"File found at ${file.getAbsolutePath} with correct contents.") } // scalastyle:on println } - val spark = SparkSession.builder() - .appName("Test") - .getOrCreate() - spark.stop() + while (true) { + Thread.sleep(600000) + } } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index d23bfcdbc5251..95775d262a69d 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -16,6 +16,7 @@ */ package org.apache.spark.deploy.kubernetes.integrationtest +import java.io.File import java.nio.file.Paths import java.util.UUID @@ -35,11 +36,11 @@ import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minik import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND import org.apache.spark.deploy.kubernetes.submit.{Client, KeyAndCertPem} import org.apache.spark.launcher.SparkLauncher +import org.apache.spark.util.Utils private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { import KubernetesSuite._ private val testBackend = IntegrationTestBackendFactory.getTestBackend() - private val APP_LOCATOR_LABEL = UUID.randomUUID().toString.replaceAll("-", "") private var kubernetesTestComponents: KubernetesTestComponents = _ private var sparkConf: SparkConf = _ @@ -124,7 +125,11 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { sparkConf.set("spark.kubernetes.shuffle.labels", "app=spark-shuffle-service") sparkConf.set("spark.kubernetes.shuffle.namespace", kubernetesTestComponents.namespace) sparkConf.set("spark.app.name", "group-by-test") - runSparkGroupByTestAndVerifyCompletion(SUBMITTER_LOCAL_MAIN_APP_RESOURCE) + runSparkApplicationAndVerifyCompletion( + SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + GROUP_BY_MAIN_CLASS, + "The Result is", + Array.empty[String]) } test("Use remote resources without the resource staging server.") { @@ -173,6 +178,20 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) } + test("Added files should be placed in the driver's working directory.") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + val testExistenceFileTempDir = Utils.createTempDir(namePrefix = "test-existence-file-temp-dir") + val testExistenceFile = new File(testExistenceFileTempDir, "input.txt") + Files.write(TEST_EXISTENCE_FILE_CONTENTS, testExistenceFile, Charsets.UTF_8) + launchStagingServer(SSLOptions(), None) + sparkConf.set("spark.files", testExistenceFile.getAbsolutePath) + runSparkApplicationAndVerifyCompletion( + SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + FILE_EXISTENCE_MAIN_CLASS, + s"File found at /opt/spark/${testExistenceFile.getName} with correct contents.", + Array(testExistenceFile.getName, TEST_EXISTENCE_FILE_CONTENTS)) + } + private def launchStagingServer( resourceStagingServerSslOptions: SSLOptions, keyAndCertPem: Option[KeyAndCertPem]): Unit = { assume(testBackend.name == MINIKUBE_TEST_BACKEND) @@ -190,27 +209,19 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { } private def runSparkPiAndVerifyCompletion(appResource: String): Unit = { - Client.run(sparkConf, appResource, SPARK_PI_MAIN_CLASS, Array.empty[String]) - val driverPod = kubernetesTestComponents.kubernetesClient - .pods() - .withLabel("spark-app-locator", APP_LOCATOR_LABEL) - .list() - .getItems - .get(0) - Eventually.eventually(TIMEOUT, INTERVAL) { - assert(kubernetesTestComponents.kubernetesClient - .pods() - .withName(driverPod.getMetadata.getName) - .getLog - .contains("Pi is roughly 3"), "The application did not compute the value of pi.") - } + runSparkApplicationAndVerifyCompletion( + appResource, SPARK_PI_MAIN_CLASS, "Pi is roughly 3", Array.empty[String]) } - private def runSparkGroupByTestAndVerifyCompletion(appResource: String): Unit = { + private def runSparkApplicationAndVerifyCompletion( + appResource: String, + mainClass: String, + expectedLogOnCompletion: String, + appArgs: Array[String]): Unit = { Client.run( sparkConf = sparkConf, - appArgs = Array.empty[String], - mainClass = GROUP_BY_MAIN_CLASS, + appArgs = appArgs, + mainClass = mainClass, mainAppResource = appResource) val driverPod = kubernetesTestComponents.kubernetesClient .pods() @@ -223,7 +234,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .pods() .withName(driverPod.getMetadata.getName) .getLog - .contains("The Result is"), "The application did not complete.") + .contains(expectedLogOnCompletion), "The application did not complete.") } } @@ -285,8 +296,6 @@ private[spark] object KubernetesSuite { val CONTAINER_LOCAL_HELPER_JAR_PATH = s"local:///opt/spark/examples/" + s"integration-tests-jars/${HELPER_JAR_FILE.getName}" - val TEST_EXISTENCE_FILE = Paths.get("test-data", "input.txt").toFile - val TEST_EXISTENCE_FILE_CONTENTS = Files.toString(TEST_EXISTENCE_FILE, Charsets.UTF_8) val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) val SPARK_PI_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + @@ -295,6 +304,7 @@ private[spark] object KubernetesSuite { ".integrationtest.jobs.FileExistenceTest" val GROUP_BY_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + ".integrationtest.jobs.GroupByTest" + val TEST_EXISTENCE_FILE_CONTENTS = "contents" case object ShuffleNotReadyException extends Exception } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala index 9ae0d9ade7dc2..0ca1f482269db 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala @@ -63,7 +63,7 @@ private[spark] class KubernetesTestComponents(defaultClient: DefaultKubernetesCl .set("spark.executor.memory", "500m") .set("spark.executor.cores", "1") .set("spark.executors.instances", "1") - .set("spark.app.name", "spark-pi") + .set("spark.app.name", "spark-test-app") .set("spark.ui.enabled", "true") .set("spark.testing", "false") .set(WAIT_FOR_APP_COMPLETION, false) From fe03c7c18454aa02fe4695e81dde833d6f4d20f0 Mon Sep 17 00:00:00 2001 From: mccheah Date: Tue, 23 May 2017 20:53:09 -0700 Subject: [PATCH 115/156] Add missing license (#296) --- conf/kubernetes-resource-staging-server.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/conf/kubernetes-resource-staging-server.yaml b/conf/kubernetes-resource-staging-server.yaml index de0da3edcb901..11f5d3a13b9e3 100644 --- a/conf/kubernetes-resource-staging-server.yaml +++ b/conf/kubernetes-resource-staging-server.yaml @@ -1,3 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# --- apiVersion: extensions/v1beta1 kind: Deployment From 38814043070bfdd522c2c9350b9411b943888cee Mon Sep 17 00:00:00 2001 From: mccheah Date: Tue, 23 May 2017 22:57:27 -0700 Subject: [PATCH 116/156] Remove some leftover code and fix a constant. (#297) * Remove some leftover code and fix a constant. * Fix build --- .../spark/deploy/kubernetes/constants.scala | 54 +++---------- .../ExternalUriProviderWatch.scala | 75 ------------------- 2 files changed, 9 insertions(+), 120 deletions(-) delete mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ExternalUriProviderWatch.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 5515e88a50fb0..950c1f6efe4e8 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -23,21 +23,7 @@ package object constants { private[spark] val SPARK_APP_NAME_LABEL = "spark-app-name" private[spark] val SPARK_EXECUTOR_ID_LABEL = "spark-exec-id" - // Secrets - private[spark] val DRIVER_CONTAINER_SUBMISSION_SECRETS_BASE_DIR = - "/var/run/secrets/spark-submission" - private[spark] val SUBMISSION_APP_SECRET_NAME = "spark-submission-server-secret" - private[spark] val SUBMISSION_APP_SECRET_PREFIX = "spark-submission-server-secret" - private[spark] val SUBMISSION_APP_SECRET_VOLUME_NAME = "spark-submission-secret-volume" - private[spark] val SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME = - "spark-submission-server-key-password" - private[spark] val SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME = - "spark-submission-server-keystore-password" - private[spark] val SUBMISSION_SSL_KEYSTORE_SECRET_NAME = "spark-submission-server-keystore" - private[spark] val SUBMISSION_SSL_SECRETS_PREFIX = "spark-submission-server-ssl" - private[spark] val SUBMISSION_SSL_SECRETS_VOLUME_NAME = "spark-submission-server-ssl-secrets" - private[spark] val SUBMISSION_SSL_KEY_PEM_SECRET_NAME = "spark-submission-server-key-pem" - private[spark] val SUBMISSION_SSL_CERT_PEM_SECRET_NAME = "spark-submission-server-cert-pem" + // Credentials secrets private[spark] val DRIVER_CREDENTIALS_SECRETS_BASE_DIR = "/mnt/secrets/spark-kubernetes-credentials" private[spark] val DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME = "ca-cert" @@ -54,30 +40,15 @@ package object constants { s"$DRIVER_CREDENTIALS_SECRETS_BASE_DIR/$DRIVER_CREDENTIALS_OAUTH_TOKEN_SECRET_NAME" private[spark] val DRIVER_CREDENTIALS_SECRET_VOLUME_NAME = "kubernetes-credentials" - // Default and fixed ports private[spark] val SUBMISSION_SERVER_PORT = 7077 private[spark] val DEFAULT_DRIVER_PORT = 7078 private[spark] val DEFAULT_BLOCKMANAGER_PORT = 7079 private[spark] val DEFAULT_UI_PORT = 4040 - private[spark] val UI_PORT_NAME = "spark-ui-port" - private[spark] val SUBMISSION_SERVER_PORT_NAME = "submit-server" private[spark] val BLOCK_MANAGER_PORT_NAME = "blockmanager" - private[spark] val DRIVER_PORT_NAME = "driver" private[spark] val EXECUTOR_PORT_NAME = "executor" // Environment Variables - private[spark] val ENV_SUBMISSION_SECRET_LOCATION = "SPARK_SUBMISSION_SECRET_LOCATION" - private[spark] val ENV_SUBMISSION_SERVER_PORT = "SPARK_SUBMISSION_SERVER_PORT" - private[spark] val ENV_SUBMISSION_KEYSTORE_FILE = "SPARK_SUBMISSION_KEYSTORE_FILE" - private[spark] val ENV_SUBMISSION_KEYSTORE_PASSWORD_FILE = - "SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE" - private[spark] val ENV_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE = - "SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE" - private[spark] val ENV_SUBMISSION_KEYSTORE_TYPE = "SPARK_SUBMISSION_KEYSTORE_TYPE" - private[spark] val ENV_SUBMISSION_KEY_PEM_FILE = "SPARK_SUBMISSION_KEY_PEM_FILE" - private[spark] val ENV_SUBMISSION_CERT_PEM_FILE = "SPARK_SUBMISSION_CERT_PEM_FILE" - private[spark] val ENV_SUBMISSION_USE_SSL = "SPARK_SUBMISSION_USE_SSL" private[spark] val ENV_EXECUTOR_PORT = "SPARK_EXECUTOR_PORT" private[spark] val ENV_DRIVER_URL = "SPARK_DRIVER_URL" private[spark] val ENV_EXECUTOR_CORES = "SPARK_EXECUTOR_CORES" @@ -87,27 +58,14 @@ package object constants { private[spark] val ENV_EXECUTOR_POD_IP = "SPARK_EXECUTOR_POD_IP" private[spark] val ENV_DRIVER_MEMORY = "SPARK_DRIVER_MEMORY" private[spark] val ENV_SUBMIT_EXTRA_CLASSPATH = "SPARK_SUBMIT_EXTRA_CLASSPATH" - private[spark] val ENV_EXECUTOR_EXTRA_CLASSPATH = "SPARK_SUBMIT_EXTRA_CLASSPATH" + private[spark] val ENV_EXECUTOR_EXTRA_CLASSPATH = "SPARK_EXECUTOR_EXTRA_CLASSPATH" private[spark] val ENV_MOUNTED_CLASSPATH = "SPARK_MOUNTED_CLASSPATH" private[spark] val ENV_DRIVER_MAIN_CLASS = "SPARK_DRIVER_CLASS" private[spark] val ENV_DRIVER_ARGS = "SPARK_DRIVER_ARGS" private[spark] val ENV_DRIVER_JAVA_OPTS = "SPARK_DRIVER_JAVA_OPTS" private[spark] val ENV_MOUNTED_FILES_DIR = "SPARK_MOUNTED_FILES_DIR" - // Annotation keys - private[spark] val ANNOTATION_PROVIDE_EXTERNAL_URI = - "spark-job.alpha.apache.org/provideExternalUri" - private[spark] val ANNOTATION_RESOLVED_EXTERNAL_URI = - "spark-job.alpha.apache.org/resolvedExternalUri" - - // Miscellaneous - private[spark] val DRIVER_CONTAINER_NAME = "spark-kubernetes-driver" - private[spark] val DRIVER_SUBMIT_SSL_NAMESPACE = "kubernetes.driversubmitserver" - private[spark] val KUBERNETES_MASTER_INTERNAL_URL = "https://kubernetes.default.svc" - private[spark] val MEMORY_OVERHEAD_FACTOR = 0.10 - private[spark] val MEMORY_OVERHEAD_MIN = 384L - - // V2 submission init container + // Bootstrapping dependencies with the init-container private[spark] val INIT_CONTAINER_ANNOTATION = "pod.beta.kubernetes.io/init-containers" private[spark] val INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH = "/mnt/secrets/spark-init" @@ -127,4 +85,10 @@ package object constants { s"$INIT_CONTAINER_PROPERTIES_FILE_DIR/$INIT_CONTAINER_PROPERTIES_FILE_NAME" private[spark] val DEFAULT_SHUFFLE_MOUNT_NAME = "shuffle" private[spark] val INIT_CONTAINER_SECRET_VOLUME_NAME = "spark-init-secret" + + // Miscellaneous + private[spark] val DRIVER_CONTAINER_NAME = "spark-kubernetes-driver" + private[spark] val KUBERNETES_MASTER_INTERNAL_URL = "https://kubernetes.default.svc" + private[spark] val MEMORY_OVERHEAD_FACTOR = 0.10 + private[spark] val MEMORY_OVERHEAD_MIN = 384L } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ExternalUriProviderWatch.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ExternalUriProviderWatch.scala deleted file mode 100644 index f402d240bfc33..0000000000000 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ExternalUriProviderWatch.scala +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.integrationtest - -import java.util.concurrent.atomic.AtomicBoolean - -import io.fabric8.kubernetes.api.model.Service -import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watcher} -import io.fabric8.kubernetes.client.Watcher.Action -import scala.collection.JavaConverters._ - -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube -import org.apache.spark.internal.Logging - -/** - * A slightly unrealistic implementation of external URI provision, but works - * for tests - essentially forces the service to revert back to being exposed - * on NodePort. - */ -private[spark] class ExternalUriProviderWatch(kubernetesClient: KubernetesClient) - extends Watcher[Service] with Logging { - - // Visible for testing - val annotationSet = new AtomicBoolean(false) - - override def eventReceived(action: Action, service: Service): Unit = { - if (action == Action.ADDED) { - service.getMetadata - .getAnnotations - .asScala - .get(ANNOTATION_PROVIDE_EXTERNAL_URI).foreach { _ => - if (!annotationSet.getAndSet(true)) { - val nodePortService = kubernetesClient.services().withName(service.getMetadata.getName) - .edit() - .editSpec() - .withType("NodePort") - .endSpec() - .done() - val submissionServerPort = nodePortService - .getSpec() - .getPorts - .asScala - .find(_.getName == SUBMISSION_SERVER_PORT_NAME) - .map(_.getNodePort) - .getOrElse(throw new IllegalStateException("Submission server port not found.")) - val resolvedNodePortUri = s"http://${Minikube.getMinikubeIp}:$submissionServerPort" - kubernetesClient.services().withName(service.getMetadata.getName).edit() - .editMetadata() - .addToAnnotations(ANNOTATION_RESOLVED_EXTERNAL_URI, resolvedNodePortUri) - .endMetadata() - .done() - } - } - } - } - - override def onClose(cause: KubernetesClientException): Unit = { - logWarning("External URI provider watch closed.", cause) - } -} From b84cb66e906af4cf70fcab45f0f2ed00528ee235 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Thu, 25 May 2017 10:43:35 -0700 Subject: [PATCH 117/156] Adding restart policy fix for v2 (#303) --- .../scala/org/apache/spark/deploy/kubernetes/submit/Client.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index bfb0bc3ffb0f3..a8029a28009c2 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -119,6 +119,7 @@ private[spark] class Client( .addToAnnotations(parsedCustomAnnotations.asJava) .endMetadata() .withNewSpec() + .withRestartPolicy("Never") .addToContainers(driverContainer) .endSpec() From dbf7a39075098e2508e965fa5013b63bfedcb9cb Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 25 May 2017 22:13:05 -0700 Subject: [PATCH 118/156] Add all dockerfiles to distributions. (#307) --- dev/make-distribution.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index 62706b0fffedc..6f9dfa0e39072 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -176,11 +176,9 @@ echo "Build flags: $@" >> "$DISTDIR/RELEASE" cp "$SPARK_HOME"/assembly/target/scala*/jars/* "$DISTDIR/jars/" # Copy docker files -mkdir -p "$DISTDIR/dockerfiles/driver" -mkdir -p "$DISTDIR/dockerfiles/executor" +mkdir -p "$DISTDIR/dockerfiles" DOCKERFILES_SRC="$SPARK_HOME/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker" -cp "$DOCKERFILES_SRC/driver/Dockerfile" "$DISTDIR/dockerfiles/driver/Dockerfile" -cp "$DOCKERFILES_SRC/executor/Dockerfile" "$DISTDIR/dockerfiles/executor/Dockerfile" +cp -R "$DOCKERFILES_SRC/." "$DISTDIR/dockerfiles/." # Only create the yarn directory if the yarn artifacts were build. if [ -f "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar ]; then From 2a2cfb6e72f6a020b3c3b0f0fab6ddef7674dab0 Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 25 May 2017 22:46:53 -0700 Subject: [PATCH 119/156] Add proxy configuration to retrofit clients. (#301) * Add proxy configuration to retrofit clients. * Add logging --- .../kubernetes/RetrofitClientFactory.scala | 34 +++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala index a374982444f79..e38a3d9ad928e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala @@ -17,31 +17,56 @@ package org.apache.spark.deploy.rest.kubernetes import java.io.FileInputStream +import java.net.{InetSocketAddress, URI} import java.security.{KeyStore, SecureRandom} import javax.net.ssl.{SSLContext, TrustManagerFactory, X509TrustManager} import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule +import io.fabric8.kubernetes.client.Config import okhttp3.{Dispatcher, OkHttpClient} import retrofit2.Retrofit import retrofit2.converter.jackson.JacksonConverterFactory import retrofit2.converter.scalars.ScalarsConverterFactory import org.apache.spark.SSLOptions +import org.apache.spark.internal.Logging import org.apache.spark.util.{ThreadUtils, Utils} private[spark] trait RetrofitClientFactory { def createRetrofitClient[T](baseUrl: String, serviceType: Class[T], sslOptions: SSLOptions): T } -private[spark] object RetrofitClientFactoryImpl extends RetrofitClientFactory { +private[spark] object RetrofitClientFactoryImpl extends RetrofitClientFactory with Logging { private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) private val SECURE_RANDOM = new SecureRandom() def createRetrofitClient[T](baseUrl: String, serviceType: Class[T], sslOptions: SSLOptions): T = { val dispatcher = new Dispatcher(ThreadUtils.newDaemonCachedThreadPool(s"http-client-$baseUrl")) - val okHttpClientBuilder = new OkHttpClient.Builder().dispatcher(dispatcher) + val serviceUri = URI.create(baseUrl) + val maybeAllProxy = Option.apply(System.getProperty(Config.KUBERNETES_ALL_PROXY)) + val serviceUriScheme = serviceUri.getScheme + val maybeHttpProxy = (if (serviceUriScheme.equalsIgnoreCase("https")) { + Option.apply(System.getProperty(Config.KUBERNETES_HTTPS_PROXY)) + } else if (serviceUriScheme.equalsIgnoreCase("http")) { + Option.apply(System.getProperty(Config.KUBERNETES_HTTP_PROXY)) + } else { + maybeAllProxy + }).map(uriStringToProxy) + val maybeNoProxy = Option.apply(System.getProperty(Config.KUBERNETES_NO_PROXY)) + .map(_.split(",")) + .toSeq + .flatten + val resolvedProxy = maybeNoProxy.find(_ == serviceUri.getHost) + .map( _ => java.net.Proxy.NO_PROXY) + .orElse(maybeHttpProxy) + .getOrElse(java.net.Proxy.NO_PROXY) + val okHttpClientBuilder = new OkHttpClient.Builder() + .dispatcher(dispatcher) + .proxy(resolvedProxy) + logDebug(s"Proxying to $baseUrl through address ${resolvedProxy.address()} with proxy of" + + s" type ${resolvedProxy.`type`()}") sslOptions.trustStore.foreach { trustStoreFile => require(trustStoreFile.isFile, s"TrustStore provided at ${trustStoreFile.getAbsolutePath}" + " does not exist, or is not a file.") @@ -69,4 +94,9 @@ private[spark] object RetrofitClientFactoryImpl extends RetrofitClientFactory { .create(serviceType) } + private def uriStringToProxy(uriString: String): java.net.Proxy = { + val uriObject = URI.create(uriString) + new java.net.Proxy(java.net.Proxy.Type.HTTP, + new InetSocketAddress(uriObject.getHost, uriObject.getPort)) + } } From d31d81aaf513ee0530dbfa4be066d29d879cbf5c Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Thu, 25 May 2017 23:29:16 -0700 Subject: [PATCH 120/156] Fix an HDFS data locality bug in case cluster node names are short host names (#291) * Fix an HDFS data locality bug in case cluster node names are not full host names * Add a NOTE about InetAddress caching --- .../kubernetes/KubernetesTaskSetManager.scala | 26 +++- .../KubernetesTaskSetManagerSuite.scala | 117 ++++++++++++++++++ 2 files changed, 141 insertions(+), 2 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManagerSuite.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala index 5cea95be382f0..51566d03a7a6c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala @@ -16,6 +16,8 @@ */ package org.apache.spark.scheduler.cluster.kubernetes +import java.net.InetAddress + import scala.collection.mutable.ArrayBuffer import org.apache.spark.scheduler.{TaskSchedulerImpl, TaskSet, TaskSetManager} @@ -23,7 +25,9 @@ import org.apache.spark.scheduler.{TaskSchedulerImpl, TaskSet, TaskSetManager} private[spark] class KubernetesTaskSetManager( sched: TaskSchedulerImpl, taskSet: TaskSet, - maxTaskFailures: Int) extends TaskSetManager(sched, taskSet, maxTaskFailures) { + maxTaskFailures: Int, + inetAddressUtil: InetAddressUtil = new InetAddressUtil) + extends TaskSetManager(sched, taskSet, maxTaskFailures) { /** * Overrides the lookup to use not only the executor pod IP, but also the cluster node @@ -52,8 +56,16 @@ private[spark] class KubernetesTaskSetManager( if (pendingTasksClusterNodeIP.nonEmpty) { logDebug(s"Got preferred task list $pendingTasksClusterNodeIP for executor host " + s"$executorIP using cluster node IP $clusterNodeIP") + pendingTasksClusterNodeIP + } else { + val clusterNodeFullName = inetAddressUtil.getFullHostName(clusterNodeIP) + val pendingTasksClusterNodeFullName = super.getPendingTasksForHost(clusterNodeFullName) + if (pendingTasksClusterNodeFullName.nonEmpty) { + logDebug(s"Got preferred task list $pendingTasksClusterNodeFullName " + + s"for executor host $executorIP using cluster node full name $clusterNodeFullName") + } + pendingTasksClusterNodeFullName } - pendingTasksClusterNodeIP } } else { pendingTasksExecutorIP // Empty @@ -61,3 +73,13 @@ private[spark] class KubernetesTaskSetManager( } } } + +// To support mocks in unit tests. +private[kubernetes] class InetAddressUtil { + + // NOTE: This does issue a network call to DNS. Caching is done internally by the InetAddress + // class for both hits and misses. + def getFullHostName(ipAddress: String): String = { + InetAddress.getByName(ipAddress).getCanonicalHostName + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManagerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManagerSuite.scala new file mode 100644 index 0000000000000..7618c137ab22b --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManagerSuite.scala @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.scheduler.cluster.kubernetes + +import scala.collection.mutable.ArrayBuffer + +import io.fabric8.kubernetes.api.model.{Pod, PodSpec, PodStatus} +import org.mockito.Mockito._ + +import org.apache.spark.{SparkContext, SparkFunSuite} +import org.apache.spark.scheduler.{FakeTask, FakeTaskScheduler, HostTaskLocation, TaskLocation} + +class KubernetesTaskSetManagerSuite extends SparkFunSuite { + + val sc = new SparkContext("local", "test") + val sched = new FakeTaskScheduler(sc, + ("execA", "10.0.0.1"), ("execB", "10.0.0.2"), ("execC", "10.0.0.3")) + val backend = mock(classOf[KubernetesClusterSchedulerBackend]) + sched.backend = backend + + test("Find pending tasks for executors using executor pod IP addresses") { + val taskSet = FakeTask.createTaskSet(3, + Seq(TaskLocation("10.0.0.1", "execA")), // Task 0 runs on executor pod 10.0.0.1. + Seq(TaskLocation("10.0.0.1", "execA")), // Task 1 runs on executor pod 10.0.0.1. + Seq(TaskLocation("10.0.0.2", "execB")) // Task 2 runs on executor pod 10.0.0.2. + ) + + val manager = new KubernetesTaskSetManager(sched, taskSet, maxTaskFailures = 2) + assert(manager.getPendingTasksForHost("10.0.0.1") == ArrayBuffer(1, 0)) + assert(manager.getPendingTasksForHost("10.0.0.2") == ArrayBuffer(2)) + } + + test("Find pending tasks for executors using cluster node names that executor pods run on") { + val taskSet = FakeTask.createTaskSet(2, + Seq(HostTaskLocation("kube-node1")), // Task 0's partition belongs to datanode on kube-node1 + Seq(HostTaskLocation("kube-node1")) // Task 1's partition belongs to datanode on kube-node2 + ) + val spec1 = mock(classOf[PodSpec]) + when(spec1.getNodeName).thenReturn("kube-node1") + val pod1 = mock(classOf[Pod]) + when(pod1.getSpec).thenReturn(spec1) + when(backend.getExecutorPodByIP("10.0.0.1")).thenReturn(Some(pod1)) + + val manager = new KubernetesTaskSetManager(sched, taskSet, maxTaskFailures = 2) + assert(manager.getPendingTasksForHost("10.0.0.1") == ArrayBuffer(1, 0)) + } + + test("Find pending tasks for executors using cluster node IPs that executor pods run on") { + val taskSet = FakeTask.createTaskSet(2, + Seq(HostTaskLocation("196.0.0.5")), // Task 0's partition belongs to datanode on 196.0.0.5. + Seq(HostTaskLocation("196.0.0.5")) // Task 1's partition belongs to datanode on 196.0.0.5. + ) + val spec1 = mock(classOf[PodSpec]) + when(spec1.getNodeName).thenReturn("kube-node1") + val pod1 = mock(classOf[Pod]) + when(pod1.getSpec).thenReturn(spec1) + val status1 = mock(classOf[PodStatus]) + when(status1.getHostIP).thenReturn("196.0.0.5") + when(pod1.getStatus).thenReturn(status1) + when(backend.getExecutorPodByIP("10.0.0.1")).thenReturn(Some(pod1)) + val manager = new KubernetesTaskSetManager(sched, taskSet, maxTaskFailures = 2) + assert(manager.getPendingTasksForHost("10.0.0.1") == ArrayBuffer(1, 0)) + } + + test("Find pending tasks for executors using cluster node FQDNs that executor pods run on") { + val taskSet = FakeTask.createTaskSet(2, + Seq(HostTaskLocation("kube-node1.domain1")), // Task 0's partition belongs to datanode here. + Seq(HostTaskLocation("kube-node1.domain1")) // task 1's partition belongs to datanode here. + ) + val spec1 = mock(classOf[PodSpec]) + when(spec1.getNodeName).thenReturn("kube-node1") + val pod1 = mock(classOf[Pod]) + when(pod1.getSpec).thenReturn(spec1) + val status1 = mock(classOf[PodStatus]) + when(status1.getHostIP).thenReturn("196.0.0.5") + when(pod1.getStatus).thenReturn(status1) + val inetAddressUtil = mock(classOf[InetAddressUtil]) + when(inetAddressUtil.getFullHostName("196.0.0.5")).thenReturn("kube-node1.domain1") + when(backend.getExecutorPodByIP("10.0.0.1")).thenReturn(Some(pod1)) + + val manager = new KubernetesTaskSetManager(sched, taskSet, maxTaskFailures = 2, inetAddressUtil) + assert(manager.getPendingTasksForHost("10.0.0.1") == ArrayBuffer(1, 0)) + } + + test("Return empty pending tasks for executors when all look up fail") { + val taskSet = FakeTask.createTaskSet(1, + Seq(HostTaskLocation("kube-node1.domain1")) // task 0's partition belongs to datanode here. + ) + val spec1 = mock(classOf[PodSpec]) + when(spec1.getNodeName).thenReturn("kube-node2") + val pod1 = mock(classOf[Pod]) + when(pod1.getSpec).thenReturn(spec1) + val status1 = mock(classOf[PodStatus]) + when(status1.getHostIP).thenReturn("196.0.0.6") + when(pod1.getStatus).thenReturn(status1) + val inetAddressUtil = mock(classOf[InetAddressUtil]) + when(inetAddressUtil.getFullHostName("196.0.0.6")).thenReturn("kube-node2.domain1") + when(backend.getExecutorPodByIP("10.0.0.1")).thenReturn(Some(pod1)) + + val manager = new KubernetesTaskSetManager(sched, taskSet, maxTaskFailures = 2, inetAddressUtil) + assert(manager.getPendingTasksForHost("10.0.0.1") == ArrayBuffer()) + } +} From 0702e18e0b0ea71209d63e454d9c0a9f90a2dc8c Mon Sep 17 00:00:00 2001 From: mccheah Date: Tue, 30 May 2017 16:01:58 -0700 Subject: [PATCH 121/156] Remove leading slash from Retrofit interface. (#308) --- .../rest/kubernetes/ResourceStagingServiceRetrofit.scala | 4 ++-- .../deploy/rest/kubernetes/RetrofitClientFactory.scala | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala index 3c2fe8ebbc3c8..c0da44838aba3 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala @@ -29,7 +29,7 @@ import org.apache.spark.deploy.kubernetes.submit.SubmittedResourceIdAndSecret private[spark] trait ResourceStagingServiceRetrofit { @Multipart - @retrofit2.http.POST("/api/v0/resources/") + @retrofit2.http.POST("api/v0/resources/") def uploadResources( @retrofit2.http.Part("podLabels") podLabels: RequestBody, @retrofit2.http.Part("podNamespace") podNamespace: RequestBody, @@ -38,7 +38,7 @@ private[spark] trait ResourceStagingServiceRetrofit { kubernetesCredentials: RequestBody): Call[SubmittedResourceIdAndSecret] @Streaming - @retrofit2.http.GET("/api/v0/resources/{resourceId}") + @retrofit2.http.GET("api/v0/resources/{resourceId}") def downloadResources( @Path("resourceId") resourceId: String, @retrofit2.http.Header("Authorization") resourceSecret: String): Call[ResponseBody] diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala index e38a3d9ad928e..5046cb479054c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala @@ -85,8 +85,13 @@ private[spark] object RetrofitClientFactoryImpl extends RetrofitClientFactory wi okHttpClientBuilder.sslSocketFactory(sslContext.getSocketFactory, trustManagers(0).asInstanceOf[X509TrustManager]) } + val resolvedBaseUrl = if (!baseUrl.endsWith("/")) { + s"$baseUrl/" + } else { + baseUrl + } new Retrofit.Builder() - .baseUrl(baseUrl) + .baseUrl(resolvedBaseUrl) .addConverterFactory(ScalarsConverterFactory.create()) .addConverterFactory(JacksonConverterFactory.create(OBJECT_MAPPER)) .client(okHttpClientBuilder.build()) From 9be8f20f48c1385c0dfa6d5c12f2e211c70e3e00 Mon Sep 17 00:00:00 2001 From: mccheah Date: Wed, 31 May 2017 15:01:13 -0700 Subject: [PATCH 122/156] Use tini in Docker images (#320) --- .../docker-minimal-bundle/src/main/docker/driver/Dockerfile | 4 ++-- .../docker-minimal-bundle/src/main/docker/executor/Dockerfile | 4 ++-- .../src/main/docker/init-container/Dockerfile | 4 ++-- .../src/main/docker/resource-staging-server/Dockerfile | 4 ++-- .../src/main/docker/shuffle-service/Dockerfile | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index c4c75642c9d22..fa651ff43aaa0 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -22,7 +22,7 @@ FROM openjdk:8-alpine # docker build -t spark-driver:latest -f dockerfiles/driver/Dockerfile . RUN apk upgrade --update -RUN apk add --update bash +RUN apk add --update bash tini RUN mkdir -p /opt/spark RUN touch /opt/spark/RELEASE @@ -41,4 +41,4 @@ CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ if ! [ -z ${SPARK_SUBMIT_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_SUBMIT_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_MOUNTED_FILES_DIR} ]; then cp -R "$SPARK_MOUNTED_FILES_DIR/." .; fi && \ - exec ${JAVA_HOME}/bin/java $SPARK_DRIVER_JAVA_OPTS -cp $SPARK_CLASSPATH -Xms$SPARK_DRIVER_MEMORY -Xmx$SPARK_DRIVER_MEMORY $SPARK_DRIVER_CLASS $SPARK_DRIVER_ARGS + exec /sbin/tini -- ${JAVA_HOME}/bin/java $SPARK_DRIVER_JAVA_OPTS -cp $SPARK_CLASSPATH -Xms$SPARK_DRIVER_MEMORY -Xmx$SPARK_DRIVER_MEMORY $SPARK_DRIVER_CLASS $SPARK_DRIVER_ARGS diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile index e345f10056522..fbad43b6255b9 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -22,7 +22,7 @@ FROM openjdk:8-alpine # docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . RUN apk upgrade --update -RUN apk add --update bash +RUN apk add --update bash tini RUN mkdir -p /opt/spark RUN touch /opt/spark/RELEASE @@ -41,4 +41,4 @@ CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ if ! [ -z ${SPARK_MOUNTED_CLASSPATH}+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_EXECUTOR_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXECUTOR_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_MOUNTED_FILES_DIR} ]; then cp -R "$SPARK_MOUNTED_FILES_DIR/." .; fi && \ - exec ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp $SPARK_CLASSPATH org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP + exec /sbin/tini -- ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp $SPARK_CLASSPATH org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile index bb249a4ea86b6..40557a7465a8a 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile @@ -22,7 +22,7 @@ FROM openjdk:8-alpine # docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . RUN apk upgrade --update -RUN apk add --update bash +RUN apk add --update bash tini RUN mkdir -p /opt/spark RUN touch /opt/spark/RELEASE @@ -35,4 +35,4 @@ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark -ENTRYPOINT [ "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.KubernetesSparkDependencyDownloadInitContainer" ] +ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.KubernetesSparkDependencyDownloadInitContainer" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile index 125749c71c79a..c8b13c44207bc 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile @@ -22,7 +22,7 @@ FROM openjdk:8-alpine # docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . RUN apk upgrade --update -RUN apk add --update bash +RUN apk add --update bash tini RUN mkdir -p /opt/spark RUN touch /opt/spark/RELEASE @@ -35,4 +35,4 @@ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark -ENTRYPOINT [ "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.ResourceStagingServer" ] +ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.ResourceStagingServer" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile index 630d3408519ac..06aac56ba2f52 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile @@ -22,7 +22,7 @@ FROM openjdk:8-alpine # docker build -t spark-shuffle:latest -f dockerfiles/shuffle/Dockerfile . RUN apk upgrade --update -RUN apk add --update bash +RUN apk add --update bash tini RUN mkdir -p /opt/spark RUN touch /opt/spark/RELEASE @@ -36,4 +36,4 @@ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark -CMD ["/bin/sh","-c","/opt/spark/bin/spark-class org.apache.spark.deploy.ExternalShuffleService 1"] \ No newline at end of file +ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.ExternalShuffleService", "1" ] From e5623b78fc1536b7221bf31945b3add527959d75 Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 1 Jun 2017 12:05:16 -0700 Subject: [PATCH 123/156] Allow custom executor labels and annotations (#321) * Allow custom executor labels and annotations * Address comments. * Fix scalastyle. --- docs/running-on-kubernetes.md | 17 ++++++++++ .../spark/deploy/kubernetes/config.scala | 16 ++++++++++ .../deploy/kubernetes/submit/Client.scala | 31 ++++--------------- .../KubernetesClusterSchedulerBackend.scala | 26 ++++++++++++++-- 4 files changed, 62 insertions(+), 28 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index b18987f6af4a4..488efbe5eef36 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -476,6 +476,23 @@ from the other deployment modes. See the [configuration page](configuration.html pairs, where each annotation is in the format key=value. + + spark.kubernetes.executor.labels + (none) + + Custom labels that will be added to the executor pods. This should be a comma-separated list of label key-value + pairs, where each label is in the format key=value. Note that Spark also adds its own labels to the + executor pods for bookkeeping purposes. + + + + spark.kubernetes.executor.annotations + (none) + + Custom annotations that will be added to the executor pods. This should be a comma-separated list of annotation + key-value pairs, where each annotation is in the format key=value. + + spark.kubernetes.driver.pod.name (none) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index c892b01314975..d1341b15afaca 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -211,6 +211,22 @@ package object config extends Logging { .stringConf .createOptional + private[spark] val KUBERNETES_EXECUTOR_LABELS = + ConfigBuilder("spark.kubernetes.executor.labels") + .doc("Custom labels that will be added to the executor pods. This should be a" + + " comma-separated list of label key-value pairs, where each label is in the format" + + " key=value.") + .stringConf + .createOptional + + private[spark] val KUBERNETES_EXECUTOR_ANNOTATIONS = + ConfigBuilder("spark.kubernetes.executor.annotations") + .doc("Custom annotations that will be added to the executor pods. This should be a" + + " comma-separated list of annotation key-value pairs, where each annotation is in the" + + " format key=value.") + .stringConf + .createOptional + private[spark] val KUBERNETES_DRIVER_POD_NAME = ConfigBuilder("spark.kubernetes.driver.pod.name") .doc("Name of the driver pod.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index a8029a28009c2..743ec9d7707e0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -22,7 +22,8 @@ import java.util.Collections import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, OwnerReferenceBuilder, PodBuilder} import scala.collection.JavaConverters._ -import org.apache.spark.{SparkConf, SparkException} +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.ConfigurationUtils import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.rest.kubernetes.ResourceStagingServerSslOptionsProviderImpl @@ -75,18 +76,16 @@ private[spark] class Client( def run(): Unit = { validateNoDuplicateFileNames(sparkJars) validateNoDuplicateFileNames(sparkFiles) - val parsedCustomLabels = parseKeyValuePairs(customLabels, KUBERNETES_DRIVER_LABELS.key, - "labels") + val parsedCustomLabels = ConfigurationUtils.parseKeyValuePairs( + customLabels, KUBERNETES_DRIVER_LABELS.key, "labels") require(!parsedCustomLabels.contains(SPARK_APP_ID_LABEL), s"Label with key " + s" $SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping operations.") require(!parsedCustomLabels.contains(SPARK_APP_NAME_LABEL), s"Label with key" + s" $SPARK_APP_NAME_LABEL is not allowed as it is reserved for Spark bookkeeping operations.") val allLabels = parsedCustomLabels ++ Map(SPARK_APP_ID_LABEL -> kubernetesAppId, SPARK_APP_NAME_LABEL -> appName) - val parsedCustomAnnotations = parseKeyValuePairs( - customAnnotations, - KUBERNETES_DRIVER_ANNOTATIONS.key, - "annotations") + val parsedCustomAnnotations = ConfigurationUtils.parseKeyValuePairs( + customAnnotations, KUBERNETES_DRIVER_ANNOTATIONS.key, "annotations") Utils.tryWithResource(kubernetesClientProvider.get) { kubernetesClient => val driverExtraClasspathEnv = driverExtraClasspath.map { classPath => new EnvVarBuilder() @@ -237,24 +236,6 @@ private[spark] class Client( s" file name $fileName is shared by all of these URIs: $urisWithFileName") } } - - private def parseKeyValuePairs( - maybeKeyValues: Option[String], - configKey: String, - keyValueType: String): Map[String, String] = { - maybeKeyValues.map(keyValues => { - keyValues.split(",").map(_.trim).filterNot(_.isEmpty).map(keyValue => { - keyValue.split("=", 2).toSeq match { - case Seq(k, v) => - (k, v) - case _ => - throw new SparkException(s"Custom $keyValueType set by $configKey must be a" + - s" comma-separated list of key-value pairs, with format =." + - s" Got value: $keyValue. All values: $keyValues") - } - }).toMap - }).getOrElse(Map.empty[String, String]) - } } private[spark] object Client { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 5627f7c20de3d..7fcfa36a771fb 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -55,6 +55,23 @@ private[spark] class KubernetesClusterSchedulerBackend( private val executorExtraClasspath = conf.get( org.apache.spark.internal.config.EXECUTOR_CLASS_PATH) private val executorJarsDownloadDir = conf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION) + + private val executorLabels = ConfigurationUtils.parseKeyValuePairs( + conf.get(KUBERNETES_EXECUTOR_LABELS), + KUBERNETES_EXECUTOR_LABELS.key, + "executor labels") + require( + !executorLabels.contains(SPARK_APP_ID_LABEL), + s"Custom executor labels cannot contain $SPARK_APP_ID_LABEL as it is reserved for Spark.") + require( + !executorLabels.contains(SPARK_EXECUTOR_ID_LABEL), + s"Custom executor labels cannot contain $SPARK_EXECUTOR_ID_LABEL as it is reserved for" + + s" Spark.") + private val executorAnnotations = ConfigurationUtils.parseKeyValuePairs( + conf.get(KUBERNETES_EXECUTOR_ANNOTATIONS), + KUBERNETES_EXECUTOR_ANNOTATIONS.key, + "executor annotations") + private var shufflePodCache: Option[ShufflePodCache] = None private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) private val kubernetesNamespace = conf.get(KUBERNETES_NAMESPACE) @@ -250,8 +267,10 @@ private[spark] class KubernetesClusterSchedulerBackend( // executorId and applicationId val hostname = name.substring(Math.max(0, name.length - 63)) - val selectors = Map(SPARK_EXECUTOR_ID_LABEL -> executorId, - SPARK_APP_ID_LABEL -> applicationId()).asJava + val resolvedExecutorLabels = Map( + SPARK_EXECUTOR_ID_LABEL -> executorId, + SPARK_APP_ID_LABEL -> applicationId()) ++ + executorLabels val executorMemoryQuantity = new QuantityBuilder(false) .withAmount(s"${executorMemoryMb}M") .build() @@ -300,7 +319,8 @@ private[spark] class KubernetesClusterSchedulerBackend( val basePodBuilder = new PodBuilder() .withNewMetadata() .withName(name) - .withLabels(selectors) + .withLabels(resolvedExecutorLabels.asJava) + .withAnnotations(executorAnnotations.asJava) .withOwnerReferences() .addNewOwnerReference() .withController(true) From 5e2b205d8c54e8493878188462e45dc509f073b2 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Fri, 2 Jun 2017 11:59:43 -0700 Subject: [PATCH 124/156] Dynamic allocation, cleanup in case of driver death (#319) * Adding cleanup for shuffle service for driver death * Address comments + fix tests * Cleanly open and close resources. * Added unit test, reusing RegisterDriver * lint + fix mesos --- .../KubernetesExternalShuffleClient.java | 79 ++++++++ .../mesos/MesosExternalShuffleClient.java | 2 +- .../protocol/BlockTransferMessage.java | 1 - .../protocol/{mesos => }/RegisterDriver.java | 5 +- conf/kubernetes-shuffle-service.yaml | 3 +- .../mesos/MesosExternalShuffleService.scala | 3 +- .../KubernetesExternalShuffleService.scala | 179 ++++++++++++++++++ .../spark/deploy/kubernetes/constants.scala | 1 + .../deploy/kubernetes/submit/Client.scala | 7 +- .../DriverPodKubernetesClientProvider.scala | 10 +- .../KubernetesClusterSchedulerBackend.scala | 36 +++- .../kubernetes/submit/ClientV2Suite.scala | 34 +++- .../main/docker/shuffle-service/Dockerfile | 2 +- 13 files changed, 343 insertions(+), 19 deletions(-) create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/kubernetes/KubernetesExternalShuffleClient.java rename common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/{mesos => }/RegisterDriver.java (91%) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/kubernetes/KubernetesExternalShuffleClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/kubernetes/KubernetesExternalShuffleClient.java new file mode 100644 index 0000000000000..49cb5243e32dc --- /dev/null +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/kubernetes/KubernetesExternalShuffleClient.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.network.shuffle.kubernetes; + +import org.apache.spark.network.client.RpcResponseCallback; +import org.apache.spark.network.client.TransportClient; +import org.apache.spark.network.sasl.SecretKeyHolder; +import org.apache.spark.network.shuffle.ExternalShuffleClient; +import org.apache.spark.network.shuffle.protocol.RegisterDriver; +import org.apache.spark.network.util.TransportConf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * A client for talking to the external shuffle service in Kubernetes cluster mode. + * + * This is used by the each Spark executor to register with a corresponding external + * shuffle service on the cluster. The purpose is for cleaning up shuffle files + * reliably if the application exits unexpectedly. + */ +public class KubernetesExternalShuffleClient extends ExternalShuffleClient { + private static final Logger logger = LoggerFactory + .getLogger(KubernetesExternalShuffleClient.class); + + /** + * Creates an Kubernetes external shuffle client that wraps the {@link ExternalShuffleClient}. + * Please refer to docs on {@link ExternalShuffleClient} for more information. + */ + public KubernetesExternalShuffleClient( + TransportConf conf, + SecretKeyHolder secretKeyHolder, + boolean saslEnabled, + boolean saslEncryptionEnabled) { + super(conf, secretKeyHolder, saslEnabled, saslEncryptionEnabled); + } + + public void registerDriverWithShuffleService(String host, int port) throws IOException { + checkInit(); + ByteBuffer registerDriver = new RegisterDriver(appId, 0).toByteBuffer(); + TransportClient client = clientFactory.createClient(host, port); + client.sendRpc(registerDriver, new RegisterDriverCallback()); + } + + private class RegisterDriverCallback implements RpcResponseCallback { + @Override + public void onSuccess(ByteBuffer response) { + logger.info("Successfully registered app " + appId + " with external shuffle service."); + } + + @Override + public void onFailure(Throwable e) { + logger.warn("Unable to register app " + appId + " with external shuffle service. " + + "Please manually remove shuffle data after driver exit. Error: " + e); + } + } + + @Override + public void close() { + super.close(); + } +} diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/mesos/MesosExternalShuffleClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/mesos/MesosExternalShuffleClient.java index 42cedd9943150..e36cfd165db30 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/mesos/MesosExternalShuffleClient.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/mesos/MesosExternalShuffleClient.java @@ -32,7 +32,7 @@ import org.apache.spark.network.client.TransportClient; import org.apache.spark.network.sasl.SecretKeyHolder; import org.apache.spark.network.shuffle.ExternalShuffleClient; -import org.apache.spark.network.shuffle.protocol.mesos.RegisterDriver; +import org.apache.spark.network.shuffle.protocol.RegisterDriver; import org.apache.spark.network.util.TransportConf; /** diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java index 9af6759f5d5f3..6012a84599368 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java @@ -23,7 +23,6 @@ import io.netty.buffer.Unpooled; import org.apache.spark.network.protocol.Encodable; -import org.apache.spark.network.shuffle.protocol.mesos.RegisterDriver; import org.apache.spark.network.shuffle.protocol.mesos.ShuffleServiceHeartbeat; /** diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/mesos/RegisterDriver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterDriver.java similarity index 91% rename from common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/mesos/RegisterDriver.java rename to common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterDriver.java index d5f53ccb7f741..ac606e6539f3e 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/mesos/RegisterDriver.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterDriver.java @@ -15,19 +15,18 @@ * limitations under the License. */ -package org.apache.spark.network.shuffle.protocol.mesos; +package org.apache.spark.network.shuffle.protocol; import com.google.common.base.Objects; import io.netty.buffer.ByteBuf; import org.apache.spark.network.protocol.Encoders; -import org.apache.spark.network.shuffle.protocol.BlockTransferMessage; // Needed by ScalaDoc. See SPARK-7726 import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type; /** - * A message sent from the driver to register with the MesosExternalShuffleService. + * A message sent from the driver to register with an ExternalShuffleService. */ public class RegisterDriver extends BlockTransferMessage { private final String appId; diff --git a/conf/kubernetes-shuffle-service.yaml b/conf/kubernetes-shuffle-service.yaml index 3aeb1f54f301c..c0cc310cf4755 100644 --- a/conf/kubernetes-shuffle-service.yaml +++ b/conf/kubernetes-shuffle-service.yaml @@ -38,7 +38,8 @@ spec: # This is an official image that is built # from the dockerfiles/shuffle directory # in the spark distribution. - image: kubespark/spark-shuffle:v2.1.0-kubernetes-0.1.0-alpha.3 + image: spark-shuffle:latest + imagePullPolicy: IfNotPresent volumeMounts: - mountPath: '/tmp' name: temp-volume diff --git a/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosExternalShuffleService.scala b/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosExternalShuffleService.scala index 859aa836a3157..cbb03c7d3b1d6 100644 --- a/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosExternalShuffleService.scala +++ b/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosExternalShuffleService.scala @@ -29,7 +29,8 @@ import org.apache.spark.internal.Logging import org.apache.spark.network.client.{RpcResponseCallback, TransportClient} import org.apache.spark.network.shuffle.ExternalShuffleBlockHandler import org.apache.spark.network.shuffle.protocol.BlockTransferMessage -import org.apache.spark.network.shuffle.protocol.mesos.{RegisterDriver, ShuffleServiceHeartbeat} +import org.apache.spark.network.shuffle.protocol.RegisterDriver +import org.apache.spark.network.shuffle.protocol.mesos.ShuffleServiceHeartbeat import org.apache.spark.network.util.TransportConf import org.apache.spark.util.ThreadUtils diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala new file mode 100644 index 0000000000000..94292dae10f29 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.kubernetes + +import java.nio.ByteBuffer + +import io.fabric8.kubernetes.api.model.Pod +import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watch, Watcher} +import io.fabric8.kubernetes.client.Watcher.Action +import org.apache.commons.io.IOUtils +import scala.collection.JavaConverters._ +import scala.collection.mutable + +import org.apache.spark.{SecurityManager, SparkConf} +import org.apache.spark.deploy.ExternalShuffleService +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.internal.Logging +import org.apache.spark.network.client.{RpcResponseCallback, TransportClient} +import org.apache.spark.network.shuffle.ExternalShuffleBlockHandler +import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, RegisterDriver} +import org.apache.spark.network.util.TransportConf +import org.apache.spark.scheduler.cluster.kubernetes.DriverPodKubernetesClientProvider + +/** + * An RPC endpoint that receives registration requests from Spark drivers running on Kubernetes. + * It detects driver termination and calls the cleanup callback to [[ExternalShuffleService]]. + */ +private[spark] class KubernetesShuffleBlockHandler ( + transportConf: TransportConf, + kubernetesClientProvider: DriverPodKubernetesClientProvider) + extends ExternalShuffleBlockHandler(transportConf, null) with Logging { + + private val INIT_AND_STOP_LOCK = new Object + private val CONNECTED_APPS_LOCK = new Object + private val connectedApps = mutable.Set.empty[String] + private var shuffleWatch: Option[Watch] = None + private var kubernetesClient: Option[KubernetesClient] = None + + def start(): Unit = INIT_AND_STOP_LOCK.synchronized { + val client = kubernetesClientProvider.get + shuffleWatch = startShuffleWatcher(client) + kubernetesClient = Some(client) + } + + override def close(): Unit = { + try { + super.close() + } finally { + INIT_AND_STOP_LOCK.synchronized { + shuffleWatch.foreach(IOUtils.closeQuietly) + shuffleWatch = None + kubernetesClient.foreach(IOUtils.closeQuietly) + kubernetesClient = None + } + } + } + + protected override def handleMessage( + message: BlockTransferMessage, + client: TransportClient, + callback: RpcResponseCallback): Unit = { + message match { + case RegisterDriverParam(appId) => + val address = client.getSocketAddress + logInfo(s"Received registration request from app $appId (remote address $address).") + CONNECTED_APPS_LOCK.synchronized { + if (connectedApps.contains(appId)) { + logWarning(s"Received a registration request from app $appId, but it was already " + + s"registered") + } + connectedApps += appId + } + callback.onSuccess(ByteBuffer.allocate(0)) + case _ => super.handleMessage(message, client, callback) + } + } + + private def startShuffleWatcher(client: KubernetesClient): Option[Watch] = { + try { + Some(client + .pods() + .withLabels(Map(SPARK_ROLE_LABEL -> "driver").asJava) + .watch(new Watcher[Pod] { + override def eventReceived(action: Watcher.Action, p: Pod): Unit = { + action match { + case Action.DELETED | Action.ERROR => + val labels = p.getMetadata.getLabels + if (labels.containsKey(SPARK_APP_ID_LABEL)) { + val appId = labels.get(SPARK_APP_ID_LABEL) + CONNECTED_APPS_LOCK.synchronized { + if (connectedApps.contains(appId)) { + connectedApps -= appId + applicationRemoved(appId, true) + } + } + } + case Action.ADDED | Action.MODIFIED => + } + } + + override def onClose(e: KubernetesClientException): Unit = {} + })) + } catch { + case throwable: Throwable => + logWarning(s"Shuffle service cannot access Kubernetes. " + + s"Orphaned file cleanup is disabled.", throwable) + None + } + } + + /** An extractor object for matching [[RegisterDriver]] message. */ + private object RegisterDriverParam { + def unapply(r: RegisterDriver): Option[(String)] = + Some(r.getAppId) + } +} + +/** + * A wrapper of [[ExternalShuffleService]] that provides an additional endpoint for drivers + * to associate with. This allows the shuffle service to detect when a driver is terminated + * and can clean up the associated shuffle files. + */ +private[spark] class KubernetesExternalShuffleService( + conf: SparkConf, + securityManager: SecurityManager, + kubernetesClientProvider: DriverPodKubernetesClientProvider) + extends ExternalShuffleService(conf, securityManager) { + + private var shuffleBlockHandlers: mutable.Buffer[KubernetesShuffleBlockHandler] = _ + protected override def newShuffleBlockHandler( + tConf: TransportConf): ExternalShuffleBlockHandler = { + val newBlockHandler = new KubernetesShuffleBlockHandler(tConf, kubernetesClientProvider) + newBlockHandler.start() + + // TODO: figure out a better way of doing this. + // This is necessary because the constructor is not called + // when this class is initialized through ExternalShuffleService. + if (shuffleBlockHandlers == null) { + shuffleBlockHandlers = mutable.Buffer.empty[KubernetesShuffleBlockHandler] + } + shuffleBlockHandlers += newBlockHandler + newBlockHandler + } + + override def stop(): Unit = { + try { + super.stop() + } finally { + shuffleBlockHandlers.foreach(_.close()) + } + } +} + +private[spark] object KubernetesExternalShuffleService extends Logging { + def main(args: Array[String]): Unit = { + ExternalShuffleService.main(args, + (conf: SparkConf, sm: SecurityManager) => { + val kubernetesClientProvider = new DriverPodKubernetesClientProvider(conf) + new KubernetesExternalShuffleService(conf, sm, kubernetesClientProvider) + }) + } +} + + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 950c1f6efe4e8..e267c9ff7e1d1 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -22,6 +22,7 @@ package object constants { private[spark] val SPARK_APP_ID_LABEL = "spark-app-id" private[spark] val SPARK_APP_NAME_LABEL = "spark-app-name" private[spark] val SPARK_EXECUTOR_ID_LABEL = "spark-exec-id" + private[spark] val SPARK_ROLE_LABEL = "spark-role" // Credentials secrets private[spark] val DRIVER_CREDENTIALS_SECRETS_BASE_DIR = diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index 743ec9d7707e0..dc8a6da45495e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -82,10 +82,13 @@ private[spark] class Client( s" $SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping operations.") require(!parsedCustomLabels.contains(SPARK_APP_NAME_LABEL), s"Label with key" + s" $SPARK_APP_NAME_LABEL is not allowed as it is reserved for Spark bookkeeping operations.") - val allLabels = parsedCustomLabels ++ - Map(SPARK_APP_ID_LABEL -> kubernetesAppId, SPARK_APP_NAME_LABEL -> appName) + val allLabels = parsedCustomLabels ++ Map( + SPARK_APP_ID_LABEL -> kubernetesAppId, + SPARK_APP_NAME_LABEL -> appName, + SPARK_ROLE_LABEL -> "driver") val parsedCustomAnnotations = ConfigurationUtils.parseKeyValuePairs( customAnnotations, KUBERNETES_DRIVER_ANNOTATIONS.key, "annotations") + Utils.tryWithResource(kubernetesClientProvider.get) { kubernetesClient => val driverExtraClasspathEnv = driverExtraClasspath.map { classPath => new EnvVarBuilder() diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala index 50f2c218c22c4..cc2032219f885 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala @@ -29,7 +29,10 @@ import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.util.ThreadUtils -private[spark] class DriverPodKubernetesClientProvider(sparkConf: SparkConf, namespace: String) { +private[spark] class DriverPodKubernetesClientProvider( + sparkConf: SparkConf, + namespace: Option[String] = None) { + private val SERVICE_ACCOUNT_TOKEN = new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH) private val SERVICE_ACCOUNT_CA_CERT = new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH) private val oauthTokenFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN) @@ -45,7 +48,10 @@ private[spark] class DriverPodKubernetesClientProvider(sparkConf: SparkConf, nam val baseClientConfigBuilder = new ConfigBuilder() .withApiVersion("v1") .withMasterUrl(KUBERNETES_MASTER_INTERNAL_URL) - .withNamespace(namespace) + + // Build a namespaced client if specified. + val namespacedClientConfigBuilder = namespace + .map(baseClientConfigBuilder.withNamespace(_)).getOrElse(baseClientConfigBuilder) val configBuilder = oauthTokenFile .orElse(caCertFile) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 7fcfa36a771fb..257cee80fdea9 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -32,6 +32,8 @@ import org.apache.spark.{SparkContext, SparkEnv, SparkException} import org.apache.spark.deploy.kubernetes.{ConfigurationUtils, SparkPodInitContainerBootstrap} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.network.netty.SparkTransportConf +import org.apache.spark.network.shuffle.kubernetes.KubernetesExternalShuffleClient import org.apache.spark.rpc.{RpcCallContext, RpcEndpointAddress, RpcEnv} import org.apache.spark.scheduler.TaskSchedulerImpl import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.{RetrieveSparkAppConfig, SparkAppConfig} @@ -100,8 +102,8 @@ private[spark] class KubernetesClusterSchedulerBackend( private implicit val requestExecutorContext = ExecutionContext.fromExecutorService( ThreadUtils.newDaemonCachedThreadPool("kubernetes-executor-requests")) - private val kubernetesClient = new DriverPodKubernetesClientProvider(conf, kubernetesNamespace) - .get + private val kubernetesClient = new DriverPodKubernetesClientProvider(conf, + Some(kubernetesNamespace)).get private val driverPod = try { kubernetesClient.pods().inNamespace(kubernetesNamespace). @@ -134,6 +136,15 @@ private[spark] class KubernetesClusterSchedulerBackend( None } + // A client for talking to the external shuffle service + private val kubernetesExternalShuffleClient: Option[KubernetesExternalShuffleClient] = { + if (Utils.isDynamicAllocationEnabled(sc.conf)) { + Some(getShuffleClient()) + } else { + None + } + } + override val minRegisteredRatio = if (conf.getOption("spark.scheduler.minRegisteredResourcesRatio").isEmpty) { 0.8 @@ -183,6 +194,14 @@ private[spark] class KubernetesClusterSchedulerBackend( } } + private def getShuffleClient(): KubernetesExternalShuffleClient = { + new KubernetesExternalShuffleClient( + SparkTransportConf.fromSparkConf(conf, "shuffle"), + sc.env.securityManager, + sc.env.securityManager.isAuthenticationEnabled(), + sc.env.securityManager.isSaslEncryptionEnabled()) + } + private def getInitialTargetExecutorNumber(defaultNumExecutors: Int = 1): Int = { if (Utils.isDynamicAllocationEnabled(conf)) { val minNumExecutors = conf.getInt("spark.dynamicAllocation.minExecutors", 0) @@ -220,6 +239,7 @@ private[spark] class KubernetesClusterSchedulerBackend( .map { config => new ShufflePodCache( kubernetesClient, config.shuffleNamespace, config.shuffleLabels) } shufflePodCache.foreach(_.start()) + kubernetesExternalShuffleClient.foreach(_.init(applicationId())) } } @@ -227,6 +247,7 @@ private[spark] class KubernetesClusterSchedulerBackend( // stop allocation of new resources and caches. allocator.shutdown() shufflePodCache.foreach(_.stop()) + kubernetesExternalShuffleClient.foreach(_.close()) // send stop message to executors so they shut down cleanly super.stop() @@ -266,10 +287,10 @@ private[spark] class KubernetesClusterSchedulerBackend( // name as the hostname. This preserves uniqueness since the end of name contains // executorId and applicationId val hostname = name.substring(Math.max(0, name.length - 63)) - val resolvedExecutorLabels = Map( SPARK_EXECUTOR_ID_LABEL -> executorId, - SPARK_APP_ID_LABEL -> applicationId()) ++ + SPARK_APP_ID_LABEL -> applicationId(), + SPARK_ROLE_LABEL -> "executor") ++ executorLabels val executorMemoryQuantity = new QuantityBuilder(false) .withAmount(s"${executorMemoryMb}M") @@ -444,6 +465,8 @@ private[spark] class KubernetesClusterSchedulerBackend( rpcEnv: RpcEnv, sparkProperties: Seq[(String, String)]) extends DriverEndpoint(rpcEnv, sparkProperties) { + private val externalShufflePort = conf.getInt("spark.shuffle.service.port", 7337) + override def receiveAndReply( context: RpcCallContext): PartialFunction[Any, Unit] = { new PartialFunction[Any, Unit]() { @@ -466,6 +489,11 @@ private[spark] class KubernetesClusterSchedulerBackend( .get() val nodeName = runningExecutorPod.getSpec.getNodeName val shufflePodIp = shufflePodCache.get.getShufflePodForExecutor(nodeName) + + // Inform the shuffle pod about this application so it can watch. + kubernetesExternalShuffleClient.foreach( + _.registerDriverWithShuffleService(shufflePodIp, externalShufflePort)) + resolvedProperties = resolvedProperties ++ Seq( (SPARK_SHUFFLE_SERVICE_HOST.key, shufflePodIp)) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala index d4d3882bb8bab..ff6c710117318 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala @@ -31,10 +31,13 @@ import org.scalatest.BeforeAndAfter import scala.collection.JavaConverters._ import scala.collection.mutable -import org.apache.spark.{SparkConf, SparkFunSuite} -import org.apache.spark.deploy.kubernetes.SparkPodInitContainerBootstrap +import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.{KubernetesExternalShuffleService, KubernetesShuffleBlockHandler, SparkPodInitContainerBootstrap} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.network.netty.SparkTransportConf +import org.apache.spark.network.shuffle.kubernetes.KubernetesExternalShuffleClient +import org.apache.spark.scheduler.cluster.kubernetes.DriverPodKubernetesClientProvider class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val JARS_RESOURCE = SubmittedResourceIdAndSecret("jarsId", "jarsSecret") @@ -49,7 +52,8 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val ALL_EXPECTED_LABELS = Map( CUSTOM_LABEL_KEY -> CUSTOM_LABEL_VALUE, SPARK_APP_ID_LABEL -> APP_ID, - SPARK_APP_NAME_LABEL -> APP_NAME) + SPARK_APP_NAME_LABEL -> APP_NAME, + SPARK_ROLE_LABEL -> "driver") private val CUSTOM_ANNOTATION_KEY = "customAnnotation" private val CUSTOM_ANNOTATION_VALUE = "customAnnotationValue" private val INIT_CONTAINER_SECRET_NAME = "init-container-secret" @@ -305,6 +309,30 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { verify(loggingPodStatusWatcher).awaitCompletion() } + test("Run kubernetes shuffle service.") { + expectationsForNoMountedCredentials() + expectationsForNoDependencyUploader() + + val shuffleService = new KubernetesExternalShuffleService( + SPARK_CONF, + new SecurityManager(SPARK_CONF), + new DriverPodKubernetesClientProvider(SPARK_CONF)) + + val shuffleClient = new KubernetesExternalShuffleClient( + SparkTransportConf.fromSparkConf(SPARK_CONF, "shuffle"), + new SecurityManager(SPARK_CONF), + false, + false) + + shuffleService.start() + shuffleClient.init("newapp") + + // verifies that we can connect to the shuffle service and send + // it a message. + shuffleClient.registerDriverWithShuffleService("localhost", 7337) + shuffleService.stop() + } + private def expectationsForNoDependencyUploader(): Unit = { when(initContainerComponentsProvider .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile index 06aac56ba2f52..1f64376b89aae 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile @@ -36,4 +36,4 @@ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark -ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.ExternalShuffleService", "1" ] +ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.kubernetes.KubernetesExternalShuffleService", "1" ] From bb1b234084c6d4298e0499d8974af9413a1b864f Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Fri, 2 Jun 2017 16:08:10 -0700 Subject: [PATCH 125/156] Fix client to await the driver pod (#325) --- .../deploy/kubernetes/submit/LoggingPodStatusWatcher.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/LoggingPodStatusWatcher.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/LoggingPodStatusWatcher.scala index 1633a084e463c..4a8a7308b9fe4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/LoggingPodStatusWatcher.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/LoggingPodStatusWatcher.scala @@ -137,7 +137,7 @@ private[kubernetes] class LoggingPodStatusWatcherImpl( } override def awaitCompletion(): Unit = { - podCompletedFuture.countDown() + podCompletedFuture.await() logInfo(pod.map { p => s"Container final statuses:\n\n${containersDescription(p)}" }.getOrElse("No containers were found in the driver pod.")) From e37b0cfdbe65819fdda65481b937ed8846407a21 Mon Sep 17 00:00:00 2001 From: mccheah Date: Fri, 2 Jun 2017 20:25:03 -0700 Subject: [PATCH 126/156] Clean up resources that are not used by pods. (#305) * Clean up resources that are not used by pods. * Make client side send correct credentials. * Simplify cleanup logic. Cancellation is no longer instantaneous and we might clean up a little later than the given TTL. However, the tradeoff is a simpler implementation with clearer contracts about when things will and will not be cleaned up. * Remove class * Fix imports and line length. * Remove import. * Add a unit test for StagingResourcesStore. * Revamp cleanup process. - Delete resources immediately when owners do not exist - Delete resources if after they are first uploaded, they are not accessed for a certain period of time. - Resource owners are more specifically defined and can have a type (currently only uses pods) * Clarify log messages * Use a single set of credentials in resource staging server. Also refactors construction of Kubernetes Clients to unify the code paths. * Fix unit test. * Safe close if creating shuffle block handler fails * Use implicit class. * Address comments. * Fix broken test. --- docs/running-on-kubernetes.md | 63 ++++ .../KubernetesExternalShuffleService.scala | 64 ++-- .../SparkKubernetesClientFactory.scala | 103 ++++++ .../spark/deploy/kubernetes/config.scala | 173 ++++----- .../deploy/kubernetes/submit/Client.scala | 329 +++++++++--------- ...riverInitContainerComponentsProvider.scala | 4 +- ...riverPodKubernetesCredentialsMounter.scala | 71 ++-- ...KubernetesCredentialsMounterProvider.scala | 12 +- ...iverPodKubernetesCredentialsProvider.scala | 33 +- .../SubmissionKubernetesClientProvider.scala | 55 --- .../SubmittedDependencyUploaderImpl.scala | 30 +- ...SparkDependencyDownloadInitContainer.scala | 7 - .../kubernetes/ResourceStagingServer.scala | 31 +- .../kubernetes/ResourceStagingService.scala | 19 +- .../ResourceStagingServiceImpl.scala | 52 +-- .../ResourceStagingServiceRetrofit.scala | 6 +- .../rest/kubernetes/StagedResources.scala | 24 ++ .../kubernetes/StagedResourcesCleaner.scala | 150 ++++++++ .../kubernetes/StagedResourcesOwner.scala | 34 ++ .../kubernetes/StagedResourcesStore.scala | 108 ++++++ .../DriverPodKubernetesClientProvider.scala | 103 ------ .../kubernetes/KubernetesClusterManager.scala | 16 +- .../KubernetesClusterSchedulerBackend.scala | 8 +- .../kubernetes/submit/ClientV2Suite.scala | 32 +- ...PodKubernetesCredentialsMounterSuite.scala | 12 +- ...ubernetesExternalShuffleServiceSuite.scala | 49 +++ .../SubmittedDependencyUploaderSuite.scala | 74 ++-- .../ResourceStagingServerSuite.scala | 37 +- .../ResourceStagingServiceImplSuite.scala | 60 ---- .../StagedResourcesCleanerSuite.scala | 149 ++++++++ .../StagedResourcesStoreSuite.scala | 86 +++++ .../integrationtest/KubernetesSuite.scala | 9 +- 32 files changed, 1242 insertions(+), 761 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkKubernetesClientFactory.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmissionKubernetesClientProvider.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResources.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesCleaner.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesOwner.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesStore.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesExternalShuffleServiceSuite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImplSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesCleanerSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesStoreSuite.scala diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 488efbe5eef36..e9002bdfe0502 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -450,6 +450,69 @@ from the other deployment modes. See the [configuration page](configuration.html client cert file, and/or OAuth token. + + spark.kubernetes.authenticate.resourceStagingServer.caCertFile + (none) + + Path to the CA cert file for connecting to the Kubernetes API server over TLS from the resource staging server when + it monitors objects in determining when to clean up resource bundles. + + + + spark.kubernetes.authenticate.resourceStagingServer.clientKeyFile + (none) + + Path to the client key file for authenticating against the Kubernetes API server from the resource staging server + when it monitors objects in determining when to clean up resource bundles. The resource staging server must have + credentials that allow it to view API objects in any namespace. + + + + spark.kubernetes.authenticate.resourceStagingServer.clientCertFile + (none) + + Path to the client cert file for authenticating against the Kubernetes API server from the resource staging server + when it monitors objects in determining when to clean up resource bundles. The resource staging server must have + credentials that allow it to view API objects in any namespace. + + + + spark.kubernetes.authenticate.resourceStagingServer.oauthToken + (none) + + OAuth token value for authenticating against the Kubernetes API server from the resource staging server + when it monitors objects in determining when to clean up resource bundles. The resource staging server must have + credentials that allow it to view API objects in any namespace. Note that this cannot be set at the same time as + spark.kubernetes.authenticate.resourceStagingServer.oauthTokenFile. + + + + spark.kubernetes.authenticate.resourceStagingServer.oauthTokenFile + (none) + + File containing the OAuth token to use when authenticating against the against the Kubernetes API server from the + resource staging server, when it monitors objects in determining when to clean up resource bundles. The resource + staging server must have credentials that allow it to view API objects in any namespace. Note that this cannot be + set at the same time as spark.kubernetes.authenticate.resourceStagingServer.oauthToken. + + + + spark.kubernetes.authenticate.resourceStagingServer.useServiceAccountCredentials + true + + Whether or not to use a service account token and a service account CA certificate when the resource staging server + authenticates to Kubernetes. If this is set, interactions with Kubernetes will authenticate using a token located at + /var/run/secrets/kubernetes.io/serviceaccount/token and the CA certificate located at + /var/run/secrets/kubernetes.io/serviceaccount/ca.crt. Note that if + spark.kubernetes.authenticate.resourceStagingServer.oauthTokenFile is set, it takes precedence + over the usage of the service account token file. Also, if + spark.kubernetes.authenticate.resourceStagingServer.caCertFile is set, it takes precedence over using + the service account's CA certificate file. This generally should be set to true (the default value) when the + resource staging server is deployed as a Kubernetes pod, but should be set to false if the resource staging server + is deployed by other means (i.e. when running the staging server process outside of Kubernetes). The resource + staging server must have credentials that allow it to view API objects in any namespace. + + spark.kubernetes.executor.memoryOverhead executorMemory * 0.10, with minimum of 384 diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala index 94292dae10f29..01a8a9a6899fd 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala @@ -17,10 +17,11 @@ package org.apache.spark.deploy.kubernetes +import java.io.File import java.nio.ByteBuffer import io.fabric8.kubernetes.api.model.Pod -import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watch, Watcher} +import io.fabric8.kubernetes.client.{Config, KubernetesClient, KubernetesClientException, Watch, Watcher} import io.fabric8.kubernetes.client.Watcher.Action import org.apache.commons.io.IOUtils import scala.collection.JavaConverters._ @@ -28,13 +29,13 @@ import scala.collection.mutable import org.apache.spark.{SecurityManager, SparkConf} import org.apache.spark.deploy.ExternalShuffleService +import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.internal.Logging import org.apache.spark.network.client.{RpcResponseCallback, TransportClient} import org.apache.spark.network.shuffle.ExternalShuffleBlockHandler import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, RegisterDriver} import org.apache.spark.network.util.TransportConf -import org.apache.spark.scheduler.cluster.kubernetes.DriverPodKubernetesClientProvider /** * An RPC endpoint that receives registration requests from Spark drivers running on Kubernetes. @@ -42,19 +43,16 @@ import org.apache.spark.scheduler.cluster.kubernetes.DriverPodKubernetesClientPr */ private[spark] class KubernetesShuffleBlockHandler ( transportConf: TransportConf, - kubernetesClientProvider: DriverPodKubernetesClientProvider) + kubernetesClient: KubernetesClient) extends ExternalShuffleBlockHandler(transportConf, null) with Logging { private val INIT_AND_STOP_LOCK = new Object private val CONNECTED_APPS_LOCK = new Object private val connectedApps = mutable.Set.empty[String] private var shuffleWatch: Option[Watch] = None - private var kubernetesClient: Option[KubernetesClient] = None def start(): Unit = INIT_AND_STOP_LOCK.synchronized { - val client = kubernetesClientProvider.get - shuffleWatch = startShuffleWatcher(client) - kubernetesClient = Some(client) + shuffleWatch = startShuffleWatcher() } override def close(): Unit = { @@ -64,8 +62,7 @@ private[spark] class KubernetesShuffleBlockHandler ( INIT_AND_STOP_LOCK.synchronized { shuffleWatch.foreach(IOUtils.closeQuietly) shuffleWatch = None - kubernetesClient.foreach(IOUtils.closeQuietly) - kubernetesClient = None + IOUtils.closeQuietly(kubernetesClient) } } } @@ -90,9 +87,9 @@ private[spark] class KubernetesShuffleBlockHandler ( } } - private def startShuffleWatcher(client: KubernetesClient): Option[Watch] = { + private def startShuffleWatcher(): Option[Watch] = { try { - Some(client + Some(kubernetesClient .pods() .withLabels(Map(SPARK_ROLE_LABEL -> "driver").asJava) .watch(new Watcher[Pod] { @@ -137,31 +134,47 @@ private[spark] class KubernetesShuffleBlockHandler ( */ private[spark] class KubernetesExternalShuffleService( conf: SparkConf, - securityManager: SecurityManager, - kubernetesClientProvider: DriverPodKubernetesClientProvider) + securityManager: SecurityManager) extends ExternalShuffleService(conf, securityManager) { private var shuffleBlockHandlers: mutable.Buffer[KubernetesShuffleBlockHandler] = _ protected override def newShuffleBlockHandler( tConf: TransportConf): ExternalShuffleBlockHandler = { - val newBlockHandler = new KubernetesShuffleBlockHandler(tConf, kubernetesClientProvider) - newBlockHandler.start() - - // TODO: figure out a better way of doing this. - // This is necessary because the constructor is not called - // when this class is initialized through ExternalShuffleService. - if (shuffleBlockHandlers == null) { + val kubernetesClient = SparkKubernetesClientFactory.createKubernetesClient( + conf.get(KUBERNETES_SHUFFLE_APISERVER_URI), + None, + APISERVER_AUTH_SHUFFLE_SERVICE_CONF_PREFIX, + conf, + Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH)) + .filter( _ => conf.get(KUBERNETES_SHUFFLE_USE_SERVICE_ACCOUNT_CREDENTIALS)), + Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH)) + .filter( _ => conf.get(KUBERNETES_SHUFFLE_USE_SERVICE_ACCOUNT_CREDENTIALS))) + val newBlockHandler = new KubernetesShuffleBlockHandler(tConf, kubernetesClient) + try { + newBlockHandler.start() + // TODO: figure out a better way of doing this. + // This is necessary because the constructor is not called + // when this class is initialized through ExternalShuffleService. + if (shuffleBlockHandlers == null) { shuffleBlockHandlers = mutable.Buffer.empty[KubernetesShuffleBlockHandler] + } + shuffleBlockHandlers += newBlockHandler + newBlockHandler + } catch { + case e: Throwable => + logError("Failed to create Kubernetes shuffle block handler.", e) + newBlockHandler.close() + throw e } - shuffleBlockHandlers += newBlockHandler - newBlockHandler } override def stop(): Unit = { try { super.stop() } finally { - shuffleBlockHandlers.foreach(_.close()) + if (shuffleBlockHandlers != null) { + shuffleBlockHandlers.foreach(_.close()) + } } } } @@ -169,10 +182,7 @@ private[spark] class KubernetesExternalShuffleService( private[spark] object KubernetesExternalShuffleService extends Logging { def main(args: Array[String]): Unit = { ExternalShuffleService.main(args, - (conf: SparkConf, sm: SecurityManager) => { - val kubernetesClientProvider = new DriverPodKubernetesClientProvider(conf) - new KubernetesExternalShuffleService(conf, sm, kubernetesClientProvider) - }) + (conf: SparkConf, sm: SecurityManager) => new KubernetesExternalShuffleService(conf, sm)) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkKubernetesClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkKubernetesClientFactory.scala new file mode 100644 index 0000000000000..d2729a2db2fa0 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkKubernetesClientFactory.scala @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import java.io.File + +import com.google.common.base.Charsets +import com.google.common.io.Files +import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient, KubernetesClient} +import io.fabric8.kubernetes.client.utils.HttpClientUtils +import okhttp3.Dispatcher + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.util.ThreadUtils + +/** + * Spark-opinionated builder for Kubernetes clients. It uses a prefix plus common suffixes to + * parse configuration keys, similar to the manner in which Spark's SecurityManager parses SSL + * options for different components. + */ +private[spark] object SparkKubernetesClientFactory { + + def createKubernetesClient( + master: String, + namespace: Option[String], + kubernetesAuthConfPrefix: String, + sparkConf: SparkConf, + maybeServiceAccountToken: Option[File], + maybeServiceAccountCaCert: Option[File]): KubernetesClient = { + val oauthTokenFileConf = s"$kubernetesAuthConfPrefix.$OAUTH_TOKEN_FILE_CONF_SUFFIX" + val oauthTokenConf = s"$kubernetesAuthConfPrefix.$OAUTH_TOKEN_CONF_SUFFIX" + val oauthTokenFile = sparkConf.getOption(oauthTokenFileConf) + .map(new File(_)) + .orElse(maybeServiceAccountToken) + val oauthTokenValue = sparkConf.getOption(oauthTokenConf) + OptionRequirements.requireNandDefined( + oauthTokenFile, + oauthTokenValue, + s"Cannot specify OAuth token through both a file $oauthTokenFileConf and a" + + s" value $oauthTokenConf.") + + val caCertFile = sparkConf + .getOption(s"$kubernetesAuthConfPrefix.$CA_CERT_FILE_CONF_SUFFIX") + .orElse(maybeServiceAccountCaCert.map(_.getAbsolutePath)) + val clientKeyFile = sparkConf + .getOption(s"$kubernetesAuthConfPrefix.$CLIENT_KEY_FILE_CONF_SUFFIX") + val clientCertFile = sparkConf + .getOption(s"$kubernetesAuthConfPrefix.$CLIENT_CERT_FILE_CONF_SUFFIX") + val dispatcher = new Dispatcher( + ThreadUtils.newDaemonCachedThreadPool("kubernetes-dispatcher")) + val config = new ConfigBuilder() + .withApiVersion("v1") + .withMasterUrl(master) + .withWebsocketPingInterval(0) + .withOption(oauthTokenValue) { + (token, configBuilder) => configBuilder.withOauthToken(token) + }.withOption(oauthTokenFile) { + (file, configBuilder) => + configBuilder.withOauthToken(Files.toString(file, Charsets.UTF_8)) + }.withOption(caCertFile) { + (file, configBuilder) => configBuilder.withCaCertFile(file) + }.withOption(clientKeyFile) { + (file, configBuilder) => configBuilder.withClientKeyFile(file) + }.withOption(clientCertFile) { + (file, configBuilder) => configBuilder.withClientCertFile(file) + }.withOption(namespace) { + (ns, configBuilder) => configBuilder.withNamespace(ns) + }.build() + val baseHttpClient = HttpClientUtils.createHttpClient(config) + val httpClientWithCustomDispatcher = baseHttpClient.newBuilder() + .dispatcher(dispatcher) + .build() + new DefaultKubernetesClient(httpClientWithCustomDispatcher, config) + } + + private implicit class OptionConfigurableConfigBuilder(configBuilder: ConfigBuilder) { + + def withOption[T] + (option: Option[T]) + (configurator: ((T, ConfigBuilder) => ConfigBuilder)): OptionConfigurableConfigBuilder = { + new OptionConfigurableConfigBuilder(option.map { opt => + configurator(opt, configBuilder) + }.getOrElse(configBuilder)) + } + + def build(): Config = configBuilder.build() + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index d1341b15afaca..dd99e0f7a5ae0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -47,120 +47,32 @@ package object config extends Logging { .stringConf .createWithDefault(s"spark-executor:$sparkVersion") - private val APISERVER_SUBMIT_CONF_PREFIX = "spark.kubernetes.authenticate.submission" - private val APISERVER_DRIVER_CONF_PREFIX = "spark.kubernetes.authenticate.driver" - - private[spark] val KUBERNETES_SUBMIT_CA_CERT_FILE = - ConfigBuilder(s"$APISERVER_SUBMIT_CONF_PREFIX.caCertFile") - .doc("Path to the CA cert file for connecting to Kubernetes over SSL when creating" + - " Kubernetes resources for the driver. This file should be located on the submitting" + - " machine's disk.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_SUBMIT_CLIENT_KEY_FILE = - ConfigBuilder(s"$APISERVER_SUBMIT_CONF_PREFIX.clientKeyFile") - .doc("Path to the client key file for authenticating against the Kubernetes API server" + - " when initially creating Kubernetes resources for the driver. This file should be" + - " located on the submitting machine's disk.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_SUBMIT_CLIENT_CERT_FILE = - ConfigBuilder(s"$APISERVER_SUBMIT_CONF_PREFIX.clientCertFile") - .doc("Path to the client cert file for authenticating against the Kubernetes API server" + - " when initially creating Kubernetes resources for the driver. This file should be" + - " located on the submitting machine's disk.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_SUBMIT_OAUTH_TOKEN = - ConfigBuilder(s"$APISERVER_SUBMIT_CONF_PREFIX.oauthToken") - .doc("OAuth token to use when authenticating against the against the Kubernetes API server" + - " when initially creating Kubernetes resources for the driver. Note that unlike the other" + - " authentication options, this should be the exact string value of the token to use for" + - " the authentication.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_CA_CERT_FILE = - ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.caCertFile") - .doc("Path to the CA cert file for connecting to Kubernetes over TLS from the driver pod" + - " when requesting executors. This file should be located on the submitting machine's disk" + - " and will be uploaded to the driver pod.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_CLIENT_KEY_FILE = - ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.clientKeyFile") - .doc("Path to the client key file for authenticating against the Kubernetes API server from" + - " the driver pod when requesting executors. This file should be located on the submitting" + - " machine's disk, and will be uploaded to the driver pod.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_CLIENT_CERT_FILE = - ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.clientCertFile") - .doc("Path to the client cert file for authenticating against the Kubernetes API server" + - " from the driver pod when requesting executors. This file should be located on the" + - " submitting machine's disk, and will be uploaded to the driver pod.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_OAUTH_TOKEN = - ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.oauthToken") - .doc("OAuth token to use when authenticating against the Kubernetes API server from the" + - " driver pod when requesting executors. Note that unlike the other authentication options" + - " this should be the exact string value of the token to use for the authentication. This" + - " token value is mounted as a secret on the driver pod.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE = - ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.mounted.caCertFile") - .doc("Path on the driver pod's disk containing the CA cert file to use when authenticating" + - " against Kubernetes. Typically this is configured by spark-submit from mounting a" + - " secret from the submitting machine into the pod, and hence this configuration is marked" + - " as internal, but this can also be set manually to use a certificate that is mounted" + - " into the driver pod via other means.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE = - ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.mounted.clientKeyFile") - .doc("Path on the driver pod's disk containing the client key file to use when" + - " authenticating against Kubernetes. Typically this is configured by spark-submit from" + - " mounting a secret from the submitting machine into the pod, and hence this" + - " configuration is marked as internal, but this can also be set manually to" + - " use a key file that is mounted into the driver pod via other means.") - .internal() - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE = - ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.mounted.clientCertFile") - .doc("Path on the driver pod's disk containing the client cert file to use when" + - " authenticating against Kubernetes. Typically this is configured by spark-submit from" + - " mounting a secret from the submitting machine into the pod, and hence this" + - " configuration is marked as internal, but this can also be set manually to" + - " use a certificate that is mounted into the driver pod via other means.") - .internal() - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN = - ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.mounted.oauthTokenFile") - .doc("Path on the driver pod's disk containing the OAuth token file to use when" + - " authenticating against Kubernetes. Typically this is configured by spark-submit from" + - " mounting a secret from the submitting machine into the pod, and hence this" + - " configuration is marked as internal, but this can also be set manually to" + - " use a token that is mounted into the driver pod via other means.") - .internal() - .stringConf - .createOptional + private[spark] val APISERVER_AUTH_SUBMISSION_CONF_PREFIX = + "spark.kubernetes.authenticate.submission" + private[spark] val APISERVER_AUTH_DRIVER_CONF_PREFIX = + "spark.kubernetes.authenticate.driver" + private[spark] val APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX = + "spark.kubernetes.authenticate.driver.mounted" + private[spark] val APISERVER_AUTH_RESOURCE_STAGING_SERVER_CONF_PREFIX = + "spark.kubernetes.authenticate.resourceStagingServer" + private[spark] val APISERVER_AUTH_SHUFFLE_SERVICE_CONF_PREFIX = + "spark.kubernetes.authenticate.shuffleService" + private[spark] val OAUTH_TOKEN_CONF_SUFFIX = "oauthToken" + private[spark] val OAUTH_TOKEN_FILE_CONF_SUFFIX = "oauthTokenFile" + private[spark] val CLIENT_KEY_FILE_CONF_SUFFIX = "clientKeyFile" + private[spark] val CLIENT_CERT_FILE_CONF_SUFFIX = "clientCertFile" + private[spark] val CA_CERT_FILE_CONF_SUFFIX = "caCertFile" + + private[spark] val RESOURCE_STAGING_SERVER_USE_SERVICE_ACCOUNT_CREDENTIALS = + ConfigBuilder( + s"$APISERVER_AUTH_RESOURCE_STAGING_SERVER_CONF_PREFIX.useServiceAccountCredentials") + .doc("Use a service account token and CA certificate in the resource staging server to" + + " watch the API server's objects.") + .booleanConf + .createWithDefault(true) private[spark] val KUBERNETES_SERVICE_ACCOUNT_NAME = - ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.serviceAccountName") + ConfigBuilder(s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.serviceAccountName") .doc("Service account that is used when running the driver pod. The driver pod uses" + " this service account when requesting executor pods from the API server. If specific" + " credentials are given for the driver pod to use, the driver will favor" + @@ -259,6 +171,19 @@ package object config extends Logging { .stringConf .createOptional + private[spark] val KUBERNETES_SHUFFLE_APISERVER_URI = + ConfigBuilder("spark.kubernetes.shuffle.apiServer.url") + .doc("URL to the Kubernetes API server that the shuffle service will monitor for Spark pods.") + .stringConf + .createWithDefault(KUBERNETES_MASTER_INTERNAL_URL) + + private[spark] val KUBERNETES_SHUFFLE_USE_SERVICE_ACCOUNT_CREDENTIALS = + ConfigBuilder(s"$APISERVER_AUTH_SHUFFLE_SERVICE_CONF_PREFIX.useServiceAccountCredentials") + .doc("Whether or not to use service account credentials when contacting the API server from" + + " the shuffle service.") + .booleanConf + .createWithDefault(true) + private[spark] val KUBERNETES_ALLOCATION_BATCH_SIZE = ConfigBuilder("spark.kubernetes.allocation.batch.size") .doc("Number of pods to launch at once in each round of dynamic allocation. ") @@ -285,12 +210,36 @@ package object config extends Logging { .createWithDefaultString("1s") // Spark resource staging server. + private[spark] val RESOURCE_STAGING_SERVER_API_SERVER_URL = + ConfigBuilder("spark.kubernetes.resourceStagingServer.apiServer.url") + .doc("URL for the Kubernetes API server. The resource staging server monitors the API" + + " server to check when pods no longer are using mounted resources. Note that this isn't" + + " to be used in Spark applications, as the API server URL should be set via spark.master.") + .stringConf + .createWithDefault(KUBERNETES_MASTER_INTERNAL_URL) + + private[spark] val RESOURCE_STAGING_SERVER_API_SERVER_CA_CERT_FILE = + ConfigBuilder("spark.kubernetes.resourceStagingServer.apiServer.caCertFile") + .doc("CA certificate for the resource staging server to use when contacting the Kubernetes" + + " API server over TLS.") + .stringConf + .createOptional + private[spark] val RESOURCE_STAGING_SERVER_PORT = ConfigBuilder("spark.kubernetes.resourceStagingServer.port") .doc("Port for the Kubernetes resource staging server to listen on.") .intConf .createWithDefault(10000) + private[spark] val RESOURCE_STAGING_SERVER_INITIAL_ACCESS_EXPIRATION_TIMEOUT = + ConfigBuilder("spark.kubernetes.resourceStagingServer.initialAccessExpirationTimeout") + .doc("The resource staging server will wait for any resource bundle to be accessed for a" + + " first time for this period. If this timeout expires before the resources are accessed" + + " the first time, the resources are cleaned up under the assumption that the dependents" + + " of the given resource bundle failed to launch at all.") + .timeConf(TimeUnit.MILLISECONDS) + .createWithDefaultString("30m") + private[spark] val RESOURCE_STAGING_SERVER_KEY_PEM = ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.keyPem") .doc("Key PEM file to use when having the Kubernetes dependency server listen on TLS.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index dc8a6da45495e..85dac3df57b4c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -20,10 +20,11 @@ import java.io.File import java.util.Collections import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, OwnerReferenceBuilder, PodBuilder} +import io.fabric8.kubernetes.client.KubernetesClient import scala.collection.JavaConverters._ -import org.apache.spark.SparkConf -import org.apache.spark.deploy.kubernetes.ConfigurationUtils +import org.apache.spark.{SparkConf, SparkException} +import org.apache.spark.deploy.kubernetes.{ConfigurationUtils, SparkKubernetesClientFactory} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.rest.kubernetes.ResourceStagingServerSslOptionsProviderImpl @@ -42,18 +43,18 @@ import org.apache.spark.util.Utils * where different steps of submission should be factored out into separate classes. */ private[spark] class Client( - appName: String, - kubernetesAppId: String, - mainClass: String, - sparkConf: SparkConf, - appArgs: Array[String], - sparkJars: Seq[String], - sparkFiles: Seq[String], - waitForAppCompletion: Boolean, - kubernetesClientProvider: SubmissionKubernetesClientProvider, - initContainerComponentsProvider: DriverInitContainerComponentsProvider, - kubernetesCredentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider, - loggingPodStatusWatcher: LoggingPodStatusWatcher) + appName: String, + kubernetesAppId: String, + mainClass: String, + sparkConf: SparkConf, + appArgs: Array[String], + sparkJars: Seq[String], + sparkFiles: Seq[String], + waitForAppCompletion: Boolean, + kubernetesClient: KubernetesClient, + initContainerComponentsProvider: DriverInitContainerComponentsProvider, + kubernetesCredentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider, + loggingPodStatusWatcher: LoggingPodStatusWatcher) extends Logging { private val kubernetesDriverPodName = sparkConf.get(KUBERNETES_DRIVER_POD_NAME) @@ -89,142 +90,134 @@ private[spark] class Client( val parsedCustomAnnotations = ConfigurationUtils.parseKeyValuePairs( customAnnotations, KUBERNETES_DRIVER_ANNOTATIONS.key, "annotations") - Utils.tryWithResource(kubernetesClientProvider.get) { kubernetesClient => - val driverExtraClasspathEnv = driverExtraClasspath.map { classPath => - new EnvVarBuilder() - .withName(ENV_SUBMIT_EXTRA_CLASSPATH) - .withValue(classPath) - .build() - } - val driverContainer = new ContainerBuilder() - .withName(DRIVER_CONTAINER_NAME) - .withImage(driverDockerImage) - .withImagePullPolicy("IfNotPresent") - .addToEnv(driverExtraClasspathEnv.toSeq: _*) - .addNewEnv() - .withName(ENV_DRIVER_MEMORY) - .withValue(driverContainerMemoryWithOverhead + "m") - .endEnv() - .addNewEnv() - .withName(ENV_DRIVER_MAIN_CLASS) - .withValue(mainClass) - .endEnv() - .addNewEnv() - .withName(ENV_DRIVER_ARGS) - .withValue(appArgs.mkString(" ")) - .endEnv() + val driverExtraClasspathEnv = driverExtraClasspath.map { classPath => + new EnvVarBuilder() + .withName(ENV_SUBMIT_EXTRA_CLASSPATH) + .withValue(classPath) .build() - val basePod = new PodBuilder() - .withNewMetadata() - .withName(kubernetesDriverPodName) - .addToLabels(allLabels.asJava) - .addToAnnotations(parsedCustomAnnotations.asJava) - .endMetadata() - .withNewSpec() - .withRestartPolicy("Never") - .addToContainers(driverContainer) - .endSpec() + } + val driverContainer = new ContainerBuilder() + .withName(DRIVER_CONTAINER_NAME) + .withImage(driverDockerImage) + .withImagePullPolicy("IfNotPresent") + .addToEnv(driverExtraClasspathEnv.toSeq: _*) + .addNewEnv() + .withName(ENV_DRIVER_MEMORY) + .withValue(driverContainerMemoryWithOverhead + "m") + .endEnv() + .addNewEnv() + .withName(ENV_DRIVER_MAIN_CLASS) + .withValue(mainClass) + .endEnv() + .addNewEnv() + .withName(ENV_DRIVER_ARGS) + .withValue(appArgs.mkString(" ")) + .endEnv() + .build() + val basePod = new PodBuilder() + .withNewMetadata() + .withName(kubernetesDriverPodName) + .addToLabels(allLabels.asJava) + .addToAnnotations(parsedCustomAnnotations.asJava) + .endMetadata() + .withNewSpec() + .withRestartPolicy("Never") + .addToContainers(driverContainer) + .endSpec() - val maybeSubmittedDependencyUploader = initContainerComponentsProvider + val maybeSubmittedDependencyUploader = initContainerComponentsProvider .provideInitContainerSubmittedDependencyUploader(allLabels) - val maybeSubmittedResourceIdentifiers = maybeSubmittedDependencyUploader.map { uploader => - SubmittedResources(uploader.uploadJars(), uploader.uploadFiles()) - } - val maybeSecretBuilder = initContainerComponentsProvider - .provideSubmittedDependenciesSecretBuilder( - maybeSubmittedResourceIdentifiers.map(_.secrets())) - val maybeSubmittedDependenciesSecret = maybeSecretBuilder.map(_.build()) - val initContainerConfigMap = initContainerComponentsProvider + val maybeSubmittedResourceIdentifiers = maybeSubmittedDependencyUploader.map { uploader => + SubmittedResources(uploader.uploadJars(), uploader.uploadFiles()) + } + val maybeSecretBuilder = initContainerComponentsProvider + .provideSubmittedDependenciesSecretBuilder( + maybeSubmittedResourceIdentifiers.map(_.secrets())) + val maybeSubmittedDependenciesSecret = maybeSecretBuilder.map(_.build()) + val initContainerConfigMap = initContainerComponentsProvider .provideInitContainerConfigMapBuilder(maybeSubmittedResourceIdentifiers.map(_.ids())) .build() - val podWithInitContainer = initContainerComponentsProvider + val podWithInitContainer = initContainerComponentsProvider .provideInitContainerBootstrap() .bootstrapInitContainerAndVolumes(driverContainer.getName, basePod) - val containerLocalizedFilesResolver = initContainerComponentsProvider - .provideContainerLocalizedFilesResolver() - val resolvedSparkJars = containerLocalizedFilesResolver.resolveSubmittedSparkJars() - val resolvedSparkFiles = containerLocalizedFilesResolver.resolveSubmittedSparkFiles() + val containerLocalizedFilesResolver = initContainerComponentsProvider + .provideContainerLocalizedFilesResolver() + val resolvedSparkJars = containerLocalizedFilesResolver.resolveSubmittedSparkJars() + val resolvedSparkFiles = containerLocalizedFilesResolver.resolveSubmittedSparkFiles() - val executorInitContainerConfiguration = initContainerComponentsProvider - .provideExecutorInitContainerConfiguration() - val sparkConfWithExecutorInit = executorInitContainerConfiguration - .configureSparkConfForExecutorInitContainer(sparkConf) - val credentialsMounter = kubernetesCredentialsMounterProvider - .getDriverPodKubernetesCredentialsMounter() - val credentialsSecret = credentialsMounter.createCredentialsSecret() - val podWithInitContainerAndMountedCreds = credentialsMounter.mountDriverKubernetesCredentials( - podWithInitContainer, driverContainer.getName, credentialsSecret) - val resolvedSparkConf = credentialsMounter.setDriverPodKubernetesCredentialLocations( - sparkConfWithExecutorInit) - if (resolvedSparkJars.nonEmpty) { - resolvedSparkConf.set("spark.jars", resolvedSparkJars.mkString(",")) - } - if (resolvedSparkFiles.nonEmpty) { - resolvedSparkConf.set("spark.files", resolvedSparkFiles.mkString(",")) - } - resolvedSparkConf.setIfMissing(KUBERNETES_DRIVER_POD_NAME, kubernetesDriverPodName) - resolvedSparkConf.set("spark.app.id", kubernetesAppId) - // We don't need this anymore since we just set the JVM options on the environment - resolvedSparkConf.remove(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) - resolvedSparkConf.get(KUBERNETES_SUBMIT_OAUTH_TOKEN).foreach { _ => - resolvedSparkConf.set(KUBERNETES_SUBMIT_OAUTH_TOKEN.key, "") - } - resolvedSparkConf.get(KUBERNETES_DRIVER_OAUTH_TOKEN).foreach { _ => - resolvedSparkConf.set(KUBERNETES_DRIVER_OAUTH_TOKEN.key, "") - } - val resolvedLocalClasspath = containerLocalizedFilesResolver - .resolveSubmittedAndRemoteSparkJars() - val resolvedDriverJavaOpts = resolvedSparkConf.getAll.map { - case (confKey, confValue) => s"-D$confKey=$confValue" - }.mkString(" ") + driverJavaOptions.map(" " + _).getOrElse("") - val resolvedDriverPod = podWithInitContainerAndMountedCreds.editSpec() - .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainer.getName)) - .addNewEnv() - .withName(ENV_MOUNTED_CLASSPATH) - .withValue(resolvedLocalClasspath.mkString(File.pathSeparator)) - .endEnv() - .addNewEnv() - .withName(ENV_DRIVER_JAVA_OPTS) - .withValue(resolvedDriverJavaOpts) - .endEnv() - .endContainer() - .endSpec() - .build() - Utils.tryWithResource( - kubernetesClient - .pods() - .withName(resolvedDriverPod.getMetadata.getName) - .watch(loggingPodStatusWatcher)) { _ => - val createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod) - try { - val driverOwnedResources = Seq(initContainerConfigMap) ++ - maybeSubmittedDependenciesSecret.toSeq ++ - credentialsSecret.toSeq - val driverPodOwnerReference = new OwnerReferenceBuilder() - .withName(createdDriverPod.getMetadata.getName) - .withApiVersion(createdDriverPod.getApiVersion) - .withUid(createdDriverPod.getMetadata.getUid) - .withKind(createdDriverPod.getKind) - .withController(true) - .build() - driverOwnedResources.foreach { resource => - val originalMetadata = resource.getMetadata - originalMetadata.setOwnerReferences(Collections.singletonList(driverPodOwnerReference)) - } - kubernetesClient.resourceList(driverOwnedResources: _*).createOrReplace() - } catch { - case e: Throwable => - kubernetesClient.pods().delete(createdDriverPod) - throw e - } - if (waitForAppCompletion) { - logInfo(s"Waiting for application $kubernetesAppId to finish...") - loggingPodStatusWatcher.awaitCompletion() - logInfo(s"Application $kubernetesAppId finished.") - } else { - logInfo(s"Deployed Spark application $kubernetesAppId into Kubernetes.") + val executorInitContainerConfiguration = initContainerComponentsProvider + .provideExecutorInitContainerConfiguration() + val sparkConfWithExecutorInit = executorInitContainerConfiguration + .configureSparkConfForExecutorInitContainer(sparkConf) + val credentialsMounter = kubernetesCredentialsMounterProvider + .getDriverPodKubernetesCredentialsMounter() + val credentialsSecret = credentialsMounter.createCredentialsSecret() + val podWithInitContainerAndMountedCreds = credentialsMounter.mountDriverKubernetesCredentials( + podWithInitContainer, driverContainer.getName, credentialsSecret) + val resolvedSparkConf = credentialsMounter.setDriverPodKubernetesCredentialLocations( + sparkConfWithExecutorInit) + if (resolvedSparkJars.nonEmpty) { + resolvedSparkConf.set("spark.jars", resolvedSparkJars.mkString(",")) + } + if (resolvedSparkFiles.nonEmpty) { + resolvedSparkConf.set("spark.files", resolvedSparkFiles.mkString(",")) + } + resolvedSparkConf.setIfMissing(KUBERNETES_DRIVER_POD_NAME, kubernetesDriverPodName) + resolvedSparkConf.set("spark.app.id", kubernetesAppId) + // We don't need this anymore since we just set the JVM options on the environment + resolvedSparkConf.remove(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) + val resolvedLocalClasspath = containerLocalizedFilesResolver + .resolveSubmittedAndRemoteSparkJars() + val resolvedDriverJavaOpts = resolvedSparkConf.getAll.map { + case (confKey, confValue) => s"-D$confKey=$confValue" + }.mkString(" ") + driverJavaOptions.map(" " + _).getOrElse("") + val resolvedDriverPod = podWithInitContainerAndMountedCreds.editSpec() + .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainer.getName)) + .addNewEnv() + .withName(ENV_MOUNTED_CLASSPATH) + .withValue(resolvedLocalClasspath.mkString(File.pathSeparator)) + .endEnv() + .addNewEnv() + .withName(ENV_DRIVER_JAVA_OPTS) + .withValue(resolvedDriverJavaOpts) + .endEnv() + .endContainer() + .endSpec() + .build() + Utils.tryWithResource( + kubernetesClient + .pods() + .withName(resolvedDriverPod.getMetadata.getName) + .watch(loggingPodStatusWatcher)) { _ => + val createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod) + try { + val driverOwnedResources = Seq(initContainerConfigMap) ++ + maybeSubmittedDependenciesSecret.toSeq ++ + credentialsSecret.toSeq + val driverPodOwnerReference = new OwnerReferenceBuilder() + .withName(createdDriverPod.getMetadata.getName) + .withApiVersion(createdDriverPod.getApiVersion) + .withUid(createdDriverPod.getMetadata.getUid) + .withKind(createdDriverPod.getKind) + .withController(true) + .build() + driverOwnedResources.foreach { resource => + val originalMetadata = resource.getMetadata + originalMetadata.setOwnerReferences(Collections.singletonList(driverPodOwnerReference)) } + kubernetesClient.resourceList(driverOwnedResources: _*).createOrReplace() + } catch { + case e: Throwable => + kubernetesClient.pods().delete(createdDriverPod) + throw e + } + if (waitForAppCompletion) { + logInfo(s"Waiting for application $kubernetesAppId to finish...") + loggingPodStatusWatcher.awaitCompletion() + logInfo(s"Application $kubernetesAppId finished.") + } else { + logInfo(s"Deployed Spark application $kubernetesAppId into Kubernetes.") } } } @@ -268,27 +261,43 @@ private[spark] object Client { val appName = sparkConf.getOption("spark.app.name") .getOrElse("spark") val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") + val namespace = sparkConf.get(KUBERNETES_NAMESPACE) + val master = resolveK8sMaster(sparkConf.get("spark.master")) val sslOptionsProvider = new ResourceStagingServerSslOptionsProviderImpl(sparkConf) val initContainerComponentsProvider = new DriverInitContainerComponentsProviderImpl( - sparkConf, kubernetesAppId, sparkJars, sparkFiles, sslOptionsProvider.getSslOptions) - val kubernetesClientProvider = new SubmissionKubernetesClientProviderImpl(sparkConf) - val kubernetesCredentialsMounterProvider = - new DriverPodKubernetesCredentialsMounterProviderImpl(sparkConf, kubernetesAppId) - val waitForAppCompletion = sparkConf.get(WAIT_FOR_APP_COMPLETION) - val loggingInterval = Option(sparkConf.get(REPORT_INTERVAL)).filter( _ => waitForAppCompletion) - val loggingPodStatusWatcher = new LoggingPodStatusWatcherImpl(kubernetesAppId, loggingInterval) - new Client( - appName, - kubernetesAppId, - mainClass, - sparkConf, - appArgs, - sparkJars, - sparkFiles, - waitForAppCompletion, - kubernetesClientProvider, - initContainerComponentsProvider, - kubernetesCredentialsMounterProvider, - loggingPodStatusWatcher).run() + sparkConf, + kubernetesAppId, + namespace, + sparkJars, + sparkFiles, + sslOptionsProvider.getSslOptions) + Utils.tryWithResource(SparkKubernetesClientFactory.createKubernetesClient( + master, + Some(namespace), + APISERVER_AUTH_SUBMISSION_CONF_PREFIX, + sparkConf, + None, + None)) { kubernetesClient => + val kubernetesCredentialsMounterProvider = + new DriverPodKubernetesCredentialsMounterProviderImpl(sparkConf, kubernetesAppId) + val waitForAppCompletion = sparkConf.get(WAIT_FOR_APP_COMPLETION) + val loggingInterval = Option(sparkConf.get(REPORT_INTERVAL)) + .filter( _ => waitForAppCompletion) + val loggingPodStatusWatcher = new LoggingPodStatusWatcherImpl( + kubernetesAppId, loggingInterval) + new Client( + appName, + kubernetesAppId, + mainClass, + sparkConf, + appArgs, + sparkJars, + sparkFiles, + waitForAppCompletion, + kubernetesClient, + initContainerComponentsProvider, + kubernetesCredentialsMounterProvider, + loggingPodStatusWatcher).run() + } } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala index 7fbb0c9274bf5..ccb349c5b2988 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala @@ -16,6 +16,8 @@ */ package org.apache.spark.deploy.kubernetes.submit +import java.io.File + import org.apache.spark.{SparkConf, SSLOptions} import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, OptionRequirements, SparkPodInitContainerBootstrap, SparkPodInitContainerBootstrapImpl} import org.apache.spark.deploy.kubernetes.config._ @@ -46,6 +48,7 @@ private[spark] trait DriverInitContainerComponentsProvider { private[spark] class DriverInitContainerComponentsProviderImpl( sparkConf: SparkConf, kubernetesAppId: String, + namespace: String, sparkJars: Seq[String], sparkFiles: Seq[String], resourceStagingServerExternalSslOptions: SSLOptions) @@ -98,7 +101,6 @@ private[spark] class DriverInitContainerComponentsProviderImpl( private val maybeSecretName = maybeResourceStagingServerUri.map { _ => s"$kubernetesAppId-init-secret" } - private val namespace = sparkConf.get(KUBERNETES_NAMESPACE) private val configMapName = s"$kubernetesAppId-init-config" private val configMapKey = s"$kubernetesAppId-init-config-key" private val initContainerImage = sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala index ded0237732ce0..b13800f389605 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala @@ -53,41 +53,50 @@ private[spark] trait DriverPodKubernetesCredentialsMounter { } private[spark] class DriverPodKubernetesCredentialsMounterImpl( - kubernetesAppId: String, - submitterLocalDriverPodKubernetesCredentials: KubernetesCredentials, - maybeUserSpecifiedMountedClientKeyFile: Option[String], - maybeUserSpecifiedMountedClientCertFile: Option[String], - maybeUserSpecifiedMountedOAuthTokenFile: Option[String], - maybeUserSpecifiedMountedCaCertFile: Option[String]) + kubernetesAppId: String, + submitterLocalDriverPodKubernetesCredentials: KubernetesCredentials, + maybeUserSpecifiedMountedClientKeyFile: Option[String], + maybeUserSpecifiedMountedClientCertFile: Option[String], + maybeUserSpecifiedMountedOAuthTokenFile: Option[String], + maybeUserSpecifiedMountedCaCertFile: Option[String]) extends DriverPodKubernetesCredentialsMounter { override def setDriverPodKubernetesCredentialLocations(sparkConf: SparkConf): SparkConf = { val resolvedMountedClientKeyFile = resolveSecretLocation( - maybeUserSpecifiedMountedClientKeyFile, - submitterLocalDriverPodKubernetesCredentials.clientKeyDataBase64, - DRIVER_CREDENTIALS_CLIENT_KEY_PATH) + maybeUserSpecifiedMountedClientKeyFile, + submitterLocalDriverPodKubernetesCredentials.clientKeyDataBase64, + DRIVER_CREDENTIALS_CLIENT_KEY_PATH) val resolvedMountedClientCertFile = resolveSecretLocation( - maybeUserSpecifiedMountedClientCertFile, - submitterLocalDriverPodKubernetesCredentials.clientCertDataBase64, - DRIVER_CREDENTIALS_CLIENT_CERT_PATH) + maybeUserSpecifiedMountedClientCertFile, + submitterLocalDriverPodKubernetesCredentials.clientCertDataBase64, + DRIVER_CREDENTIALS_CLIENT_CERT_PATH) val resolvedMountedCaCertFile = resolveSecretLocation( - maybeUserSpecifiedMountedCaCertFile, - submitterLocalDriverPodKubernetesCredentials.caCertDataBase64, - DRIVER_CREDENTIALS_CA_CERT_PATH) + maybeUserSpecifiedMountedCaCertFile, + submitterLocalDriverPodKubernetesCredentials.caCertDataBase64, + DRIVER_CREDENTIALS_CA_CERT_PATH) val resolvedMountedOAuthTokenFile = resolveSecretLocation( - maybeUserSpecifiedMountedOAuthTokenFile, - submitterLocalDriverPodKubernetesCredentials.oauthTokenBase64, - DRIVER_CREDENTIALS_OAUTH_TOKEN_PATH) + maybeUserSpecifiedMountedOAuthTokenFile, + submitterLocalDriverPodKubernetesCredentials.oauthTokenBase64, + DRIVER_CREDENTIALS_OAUTH_TOKEN_PATH) val sparkConfWithCredentialLocations = sparkConf.clone() - .setOption(KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE, resolvedMountedCaCertFile) - .setOption(KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE, resolvedMountedClientKeyFile) - .setOption(KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE, resolvedMountedClientCertFile) - .setOption(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN, resolvedMountedOAuthTokenFile) - sparkConfWithCredentialLocations.get(KUBERNETES_DRIVER_OAUTH_TOKEN).foreach { _ => - sparkConfWithCredentialLocations.set(KUBERNETES_DRIVER_OAUTH_TOKEN, "") - } - sparkConfWithCredentialLocations.get(KUBERNETES_SUBMIT_OAUTH_TOKEN).foreach { _ => - sparkConfWithCredentialLocations.set(KUBERNETES_SUBMIT_OAUTH_TOKEN, "") + .setOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX", + resolvedMountedCaCertFile) + .setOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX", + resolvedMountedClientKeyFile) + .setOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX", + resolvedMountedClientCertFile) + .setOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX", + resolvedMountedOAuthTokenFile) + // Redact all OAuth token values + sparkConfWithCredentialLocations + .getAll + .filter(_._1.endsWith(OAUTH_TOKEN_CONF_SUFFIX)).map(_._1) + .foreach { + sparkConfWithCredentialLocations.set(_, "") } sparkConfWithCredentialLocations } @@ -141,9 +150,9 @@ private[spark] class DriverPodKubernetesCredentialsMounterImpl( } private def resolveSecretLocation( - mountedUserSpecified: Option[String], - valueMountedFromSubmitter: Option[String], - mountedCanonicalLocation: String): Option[String] = { + mountedUserSpecified: Option[String], + valueMountedFromSubmitter: Option[String], + mountedCanonicalLocation: String): Option[String] = { mountedUserSpecified.orElse(valueMountedFromSubmitter.map( _ => { mountedCanonicalLocation })) @@ -167,7 +176,7 @@ private[spark] class DriverPodKubernetesCredentialsMounterImpl( } private class OptionSettableSparkConf(sparkConf: SparkConf) { - def setOption[T](configEntry: OptionalConfigEntry[T], option: Option[T]): SparkConf = { + def setOption(configEntry: String, option: Option[String]): SparkConf = { option.map( opt => { sparkConf.set(configEntry, opt) }).getOrElse(sparkConf) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala index 3f0e7d97275a5..913279198146a 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala @@ -37,9 +37,13 @@ private[spark] class DriverPodKubernetesCredentialsMounterProviderImpl( new DriverPodKubernetesCredentialsMounterImpl( kubernetesAppId, submitterLocalDriverPodKubernetesCredentials, - sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE), - sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE), - sparkConf.get(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN), - sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE)) + sparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX"), + sparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX"), + sparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX"), + sparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX")) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala index 404741520c059..41b0cf8ceaeab 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala @@ -29,25 +29,20 @@ import org.apache.spark.internal.config.OptionalConfigEntry private[spark] class DriverPodKubernetesCredentialsProvider(sparkConf: SparkConf) { def get(): KubernetesCredentials = { - sparkConf.get(KUBERNETES_SERVICE_ACCOUNT_NAME).foreach { _ => - require(sparkConf.get(KUBERNETES_DRIVER_OAUTH_TOKEN).isEmpty, - "Cannot specify both a service account and a driver pod OAuth token.") - require(sparkConf.get(KUBERNETES_DRIVER_CA_CERT_FILE).isEmpty, - "Cannot specify both a service account and a driver pod CA cert file.") - require(sparkConf.get(KUBERNETES_DRIVER_CLIENT_KEY_FILE).isEmpty, - "Cannot specify both a service account and a driver pod client key file.") - require(sparkConf.get(KUBERNETES_DRIVER_CLIENT_CERT_FILE).isEmpty, - "Cannot specify both a service account and a driver pod client cert file.") - } - val oauthTokenBase64 = sparkConf.get(KUBERNETES_DRIVER_OAUTH_TOKEN).map { token => + val oauthTokenBase64 = sparkConf + .getOption(s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$OAUTH_TOKEN_CONF_SUFFIX") + .map { token => BaseEncoding.base64().encode(token.getBytes(Charsets.UTF_8)) } - val caCertDataBase64 = safeFileConfToBase64(KUBERNETES_DRIVER_CA_CERT_FILE, - s"Driver CA cert file provided at %s does not exist or is not a file.") - val clientKeyDataBase64 = safeFileConfToBase64(KUBERNETES_DRIVER_CLIENT_KEY_FILE, - s"Driver client key file provided at %s does not exist or is not a file.") - val clientCertDataBase64 = safeFileConfToBase64(KUBERNETES_DRIVER_CLIENT_CERT_FILE, - s"Driver client cert file provided at %s does not exist or is not a file.") + val caCertDataBase64 = safeFileConfToBase64( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX", + s"Driver CA cert file provided at %s does not exist or is not a file.") + val clientKeyDataBase64 = safeFileConfToBase64( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX", + s"Driver client key file provided at %s does not exist or is not a file.") + val clientCertDataBase64 = safeFileConfToBase64( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX", + s"Driver client cert file provided at %s does not exist or is not a file.") KubernetesCredentials( oauthTokenBase64 = oauthTokenBase64, caCertDataBase64 = caCertDataBase64, @@ -56,9 +51,9 @@ private[spark] class DriverPodKubernetesCredentialsProvider(sparkConf: SparkConf } private def safeFileConfToBase64( - conf: OptionalConfigEntry[String], + conf: String, fileNotFoundFormatString: String): Option[String] = { - sparkConf.get(conf) + sparkConf.getOption(conf) .map(new File(_)) .map { file => require(file.isFile, String.format(fileNotFoundFormatString, file.getAbsolutePath)) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmissionKubernetesClientProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmissionKubernetesClientProvider.scala deleted file mode 100644 index 17b61d4a6ace0..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmissionKubernetesClientProvider.scala +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient, KubernetesClient} - -import org.apache.spark.SparkConf -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.internal.Logging - -trait SubmissionKubernetesClientProvider { - def get: KubernetesClient -} - -private[spark] class SubmissionKubernetesClientProviderImpl(sparkConf: SparkConf) - extends SubmissionKubernetesClientProvider with Logging { - - private val namespace = sparkConf.get(KUBERNETES_NAMESPACE) - private val master = resolveK8sMaster(sparkConf.get("spark.master")) - - override def get: KubernetesClient = { - var k8ConfBuilder = new ConfigBuilder() - .withApiVersion("v1") - .withMasterUrl(master) - .withNamespace(namespace) - sparkConf.get(KUBERNETES_SUBMIT_CA_CERT_FILE).foreach { - f => k8ConfBuilder = k8ConfBuilder.withCaCertFile(f) - } - sparkConf.get(KUBERNETES_SUBMIT_CLIENT_KEY_FILE).foreach { - f => k8ConfBuilder = k8ConfBuilder.withClientKeyFile(f) - } - sparkConf.get(KUBERNETES_SUBMIT_CLIENT_CERT_FILE).foreach { - f => k8ConfBuilder = k8ConfBuilder.withClientCertFile(f) - } - sparkConf.get(KUBERNETES_SUBMIT_OAUTH_TOKEN).foreach { token => - k8ConfBuilder = k8ConfBuilder.withOauthToken(token) - } - val k8ClientConfig = k8ConfBuilder.build - new DefaultKubernetesClient(k8ClientConfig) - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala index 9d0d863d174bc..a891cf3904d2d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala @@ -21,12 +21,14 @@ import javax.ws.rs.core.MediaType import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.google.common.base.Charsets +import com.google.common.io.{BaseEncoding, Files} import okhttp3.RequestBody import retrofit2.Call import org.apache.spark.{SparkException, SSLOptions} -import org.apache.spark.deploy.kubernetes.{CompressionUtils, KubernetesCredentials} -import org.apache.spark.deploy.rest.kubernetes.{ResourceStagingServiceRetrofit, RetrofitClientFactory} +import org.apache.spark.deploy.kubernetes.CompressionUtils +import org.apache.spark.deploy.rest.kubernetes.{ResourceStagingServiceRetrofit, RetrofitClientFactory, StagedResourcesOwner, StagedResourcesOwnerType} import org.apache.spark.util.Utils private[spark] trait SubmittedDependencyUploader { @@ -76,29 +78,23 @@ private[spark] class SubmittedDependencyUploaderImpl( Utils.tryWithResource(new FileOutputStream(filesTgz)) { filesOutputStream => CompressionUtils.writeTarGzipToStream(filesOutputStream, files.map(_.getAbsolutePath)) } - // TODO provide credentials properly when the staging server monitors the Kubernetes API. - val kubernetesCredentialsString = OBJECT_MAPPER.writer() - .writeValueAsString(KubernetesCredentials(None, None, None, None)) - val labelsAsString = OBJECT_MAPPER.writer().writeValueAsString(podLabels) + val stagedResourcesOwner = StagedResourcesOwner( + ownerNamespace = podNamespace, + ownerLabels = podLabels, + ownerType = StagedResourcesOwnerType.Pod) + val stagedResourcesOwnerString = OBJECT_MAPPER.writeValueAsString(stagedResourcesOwner) + val stagedResourcesOwnerBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), stagedResourcesOwnerString) val filesRequestBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.MULTIPART_FORM_DATA), filesTgz) - - val kubernetesCredentialsBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), kubernetesCredentialsString) - - val namespaceRequestBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.TEXT_PLAIN), podNamespace) - - val labelsRequestBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), labelsAsString) + okhttp3.MediaType.parse(MediaType.MULTIPART_FORM_DATA), filesTgz) val service = retrofitClientFactory.createRetrofitClient( stagingServerUri, classOf[ResourceStagingServiceRetrofit], stagingServiceSslOptions) val uploadResponse = service.uploadResources( - labelsRequestBody, namespaceRequestBody, filesRequestBody, kubernetesCredentialsBody) + resources = filesRequestBody, resourcesOwner = stagedResourcesOwnerBody) getTypedResponseResult(uploadResponse) } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala index 9bdc224f10c90..ac19c2463218b 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala @@ -204,13 +204,6 @@ private class FileFetcherImpl(sparkConf: SparkConf, securityManager: SparkSecuri } } -private case class StagedResources( - resourceSecret: String, - podLabels: Map[String, String], - podNamespace: String, - resourcesFile: File, - kubernetesCredentials: KubernetesCredentials) - object KubernetesSparkDependencyDownloadInitContainer extends Logging { def main(args: Array[String]): Unit = { logInfo("Starting init-container to download Spark application dependencies.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServer.scala index 34594ba518b62..0b97317eba8b1 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServer.scala @@ -21,6 +21,7 @@ import java.io.File import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.jaxrs.json.JacksonJaxbJsonProvider import com.fasterxml.jackson.module.scala.DefaultScalaModule +import io.fabric8.kubernetes.client.Config import org.eclipse.jetty.http.HttpVersion import org.eclipse.jetty.server.{HttpConfiguration, HttpConnectionFactory, Server, ServerConnector, SslConnectionFactory} import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder} @@ -30,9 +31,10 @@ import org.glassfish.jersey.server.ResourceConfig import org.glassfish.jersey.servlet.ServletContainer import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.SparkKubernetesClientFactory import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.internal.Logging -import org.apache.spark.util.Utils +import org.apache.spark.util.{SystemClock, ThreadUtils, Utils} private[spark] class ResourceStagingServer( port: Int, @@ -98,8 +100,33 @@ object ResourceStagingServer { } else { new SparkConf(true) } + val apiServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_API_SERVER_URL) + val initialAccessExpirationMs = sparkConf.get( + RESOURCE_STAGING_SERVER_INITIAL_ACCESS_EXPIRATION_TIMEOUT) val dependenciesRootDir = Utils.createTempDir(namePrefix = "local-application-dependencies") - val serviceInstance = new ResourceStagingServiceImpl(dependenciesRootDir) + val useServiceAccountCredentials = sparkConf.get( + RESOURCE_STAGING_SERVER_USE_SERVICE_ACCOUNT_CREDENTIALS) + // Namespace doesn't matter because we list resources from various namespaces + val kubernetesClient = SparkKubernetesClientFactory.createKubernetesClient( + apiServerUri, + None, + APISERVER_AUTH_RESOURCE_STAGING_SERVER_CONF_PREFIX, + sparkConf, + Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH)) + .filter( _ => useServiceAccountCredentials), + Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH)) + .filter( _ => useServiceAccountCredentials)) + + val stagedResourcesStore = new StagedResourcesStoreImpl(dependenciesRootDir) + val stagedResourcesCleaner = new StagedResourcesCleanerImpl( + stagedResourcesStore, + kubernetesClient, + ThreadUtils.newDaemonSingleThreadScheduledExecutor("resource-expiration"), + new SystemClock(), + initialAccessExpirationMs) + stagedResourcesCleaner.start() + val serviceInstance = new ResourceStagingServiceImpl( + stagedResourcesStore, stagedResourcesCleaner) val sslOptionsProvider = new ResourceStagingServerSslOptionsProviderImpl(sparkConf) val server = new ResourceStagingServer( port = sparkConf.get(RESOURCE_STAGING_SERVER_PORT), diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingService.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingService.scala index 525711e78c01c..b9d283a99ade9 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingService.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingService.scala @@ -52,13 +52,12 @@ private[spark] trait ResourceStagingService { * The tarball should contain the files laid out in a flat hierarchy, without * any directories. We take a stream here to avoid holding these entirely in * memory. - * @param podLabels Labels of pods to monitor. When no more pods are running with the given label, - * after some period of time, these dependencies will be cleared. - * @param podNamespace Namespace of pods to monitor. - * @param kubernetesCredentials These credentials are primarily used to monitor the progress of - * the application. When the application shuts down normally, shuts - * down abnormally and does not restart, or fails to start entirely, - * the data uploaded through this endpoint is cleared. + * @param resourcesOwner A description of the "owner" of a resource. A resource owner is a + * Kubernetes API object in a given namespace, with a specific set of + * labels. When there are no resources of the owner's type in the given + * namespace with the given labels, the resources are cleaned up. The owner + * bundle also includes any Kubernetes credentials that are required for + * resource staging server to watch the object's state over time. * @return A unique token that should be provided when retrieving these dependencies later. */ @POST @@ -66,10 +65,8 @@ private[spark] trait ResourceStagingService { @Produces(Array(MediaType.APPLICATION_JSON)) @Path("/resources") def uploadResources( - @FormDataParam("podLabels") podLabels: Map[String, String], - @FormDataParam("podNamespace") podNamespace: String, - @FormDataParam("resources") resources: InputStream, - @FormDataParam("kubernetesCredentials") kubernetesCredentials: KubernetesCredentials) + @FormDataParam("resources") resources: InputStream, + @FormDataParam("resourcesOwner") resourcesOwner: StagedResourcesOwner) : SubmittedResourceIdAndSecret /** diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImpl.scala index abe956da9914d..7bc21c21619e1 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImpl.scala @@ -31,58 +31,28 @@ import org.apache.spark.deploy.kubernetes.submit.SubmittedResourceIdAndSecret import org.apache.spark.internal.Logging import org.apache.spark.util.Utils -private[spark] class ResourceStagingServiceImpl(dependenciesRootDir: File) +private[spark] class ResourceStagingServiceImpl( + stagedResourcesStore: StagedResourcesStore, + stagedResourcesCleaner: StagedResourcesCleaner) extends ResourceStagingService with Logging { - private val SECURE_RANDOM = new SecureRandom() - // TODO clean up these resources based on the driver's lifecycle - private val stagedResources = TrieMap.empty[String, StagedResources] - override def uploadResources( - podLabels: Map[String, String], - podNamespace: String, resources: InputStream, - kubernetesCredentials: KubernetesCredentials): SubmittedResourceIdAndSecret = { - val resourceId = UUID.randomUUID().toString - val secretBytes = new Array[Byte](1024) - SECURE_RANDOM.nextBytes(secretBytes) - val resourceSecret = resourceId + "-" + BaseEncoding.base64().encode(secretBytes) - - val namespaceDir = new File(dependenciesRootDir, podNamespace) - val resourcesDir = new File(namespaceDir, resourceId) - try { - if (!resourcesDir.exists()) { - if (!resourcesDir.mkdirs()) { - throw new SparkException("Failed to create dependencies directory for application" + - s" at ${resourcesDir.getAbsolutePath}") - } - } - // TODO encrypt the written data with the secret. - val resourcesTgz = new File(resourcesDir, "resources.data") - Utils.tryWithResource(new FileOutputStream(resourcesTgz)) { ByteStreams.copy(resources, _) } - stagedResources(resourceId) = StagedResources( - resourceSecret, - podLabels, - podNamespace, - resourcesTgz, - kubernetesCredentials) - SubmittedResourceIdAndSecret(resourceId, resourceSecret) - } catch { - case e: Throwable => - if (!resourcesDir.delete()) { - logWarning(s"Failed to delete application directory $resourcesDir.") - } - throw e - } + resourcesOwner: StagedResourcesOwner): SubmittedResourceIdAndSecret = { + val stagedResources = stagedResourcesStore.addResources( + resourcesOwner.ownerNamespace, resources) + stagedResourcesCleaner.registerResourceForCleaning( + stagedResources.resourceId, resourcesOwner) + SubmittedResourceIdAndSecret(stagedResources.resourceId, stagedResources.resourceSecret) } override def downloadResources(resourceId: String, resourceSecret: String): StreamingOutput = { - val resource = stagedResources - .get(resourceId) + val resource = stagedResourcesStore.getResources(resourceId) .getOrElse(throw new NotFoundException(s"No resource bundle found with id $resourceId")) if (!resource.resourceSecret.equals(resourceSecret)) { throw new NotAuthorizedException(s"Unauthorized to download resource with id $resourceId") } + stagedResourcesCleaner.markResourceAsUsed(resourceId) new StreamingOutput { override def write(outputStream: OutputStream) = { Files.copy(resource.resourcesFile, outputStream) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala index c0da44838aba3..5fbf0f9c43970 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala @@ -31,11 +31,9 @@ private[spark] trait ResourceStagingServiceRetrofit { @Multipart @retrofit2.http.POST("api/v0/resources/") def uploadResources( - @retrofit2.http.Part("podLabels") podLabels: RequestBody, - @retrofit2.http.Part("podNamespace") podNamespace: RequestBody, @retrofit2.http.Part("resources") resources: RequestBody, - @retrofit2.http.Part("kubernetesCredentials") - kubernetesCredentials: RequestBody): Call[SubmittedResourceIdAndSecret] + @retrofit2.http.Part("resourcesOwner") resourcesOwner: RequestBody) + : Call[SubmittedResourceIdAndSecret] @Streaming @retrofit2.http.GET("api/v0/resources/{resourceId}") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResources.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResources.scala new file mode 100644 index 0000000000000..81f394800f803 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResources.scala @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import java.io.File + +case class StagedResources( + resourceId: String, + resourceSecret: String, + resourcesFile: File) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesCleaner.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesCleaner.scala new file mode 100644 index 0000000000000..5d9db728483fa --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesCleaner.scala @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import java.util.concurrent.{ScheduledExecutorService, TimeUnit} + +import io.fabric8.kubernetes.client.KubernetesClient +import scala.collection.JavaConverters._ +import scala.collection.mutable + +import org.apache.spark.SparkException +import org.apache.spark.internal.Logging +import org.apache.spark.util.Clock + +private[spark] trait StagedResourcesCleaner { + + def start(): Unit + + def registerResourceForCleaning( + resourceId: String, stagedResourceOwner: StagedResourcesOwner): Unit + + def markResourceAsUsed(resourceId: String): Unit +} + +private class StagedResourcesCleanerImpl( + stagedResourcesStore: StagedResourcesStore, + kubernetesClient: KubernetesClient, + cleanupExecutorService: ScheduledExecutorService, + clock: Clock, + initialAccessExpirationMs: Long) + extends StagedResourcesCleaner { + + private val CLEANUP_INTERVAL_MS = 30000 + private val RESOURCE_LOCK = new Object() + private val activeResources = mutable.Map.empty[String, MonitoredResource] + private val unusedResources = mutable.Map.empty[String, UnusedMonitoredResource] + + override def start(): Unit = { + cleanupExecutorService.scheduleAtFixedRate( + new CleanupRunnable(), + CLEANUP_INTERVAL_MS, + CLEANUP_INTERVAL_MS, + TimeUnit.MILLISECONDS) + } + + override def registerResourceForCleaning( + resourceId: String, stagedResourceOwner: StagedResourcesOwner): Unit = { + RESOURCE_LOCK.synchronized { + unusedResources(resourceId) = UnusedMonitoredResource( + clock.getTimeMillis() + initialAccessExpirationMs, + MonitoredResource(resourceId, stagedResourceOwner)) + + } + } + + override def markResourceAsUsed(resourceId: String): Unit = RESOURCE_LOCK.synchronized { + val resource = unusedResources.remove(resourceId) + resource.foreach { res => + activeResources(resourceId) = res.resource + } + } + + private class CleanupRunnable extends Runnable with Logging { + + override def run(): Unit = { + // Make a copy so we can iterate through this while modifying + val activeResourcesCopy = RESOURCE_LOCK.synchronized { + Map.apply(activeResources.toSeq: _*) + } + for ((resourceId, resource) <- activeResourcesCopy) { + val namespace = kubernetesClient.namespaces() + .withName(resource.stagedResourceOwner.ownerNamespace) + .get() + if (namespace == null) { + logInfo(s"Resource files with id $resourceId is being removed. The owner's namespace" + + s" ${resource.stagedResourceOwner.ownerNamespace} was not found.") + stagedResourcesStore.removeResources(resourceId) + RESOURCE_LOCK.synchronized { + activeResources.remove(resourceId) + } + } else { + val metadataOperation = resource.stagedResourceOwner.ownerType match { + case StagedResourcesOwnerType.Pod => + kubernetesClient.pods().inNamespace(resource.stagedResourceOwner.ownerNamespace) + case _ => + throw new SparkException(s"Unsupported resource owner type for cleanup:" + + s" ${resource.stagedResourceOwner.ownerType}") + } + if (metadataOperation + .withLabels(resource.stagedResourceOwner.ownerLabels.asJava) + .list() + .getItems + .isEmpty) { + logInfo(s"Resource files with id $resourceId is being removed. Owners of the" + + s" resource with namespace: ${resource.stagedResourceOwner.ownerNamespace}," + + s" type: ${resource.stagedResourceOwner.ownerType}, and labels:" + + s" ${resource.stagedResourceOwner.ownerLabels} was not found on the API server.") + stagedResourcesStore.removeResources(resourceId) + RESOURCE_LOCK.synchronized { + activeResources.remove(resourceId) + } + } + } + } + + // Make a copy so we can iterate through this while modifying + val unusedResourcesCopy = RESOURCE_LOCK.synchronized { + Map.apply(unusedResources.toSeq: _*) + } + + for ((resourceId, resource) <- unusedResourcesCopy) { + if (resource.expiresAt < clock.getTimeMillis()) { + RESOURCE_LOCK.synchronized { + // Check for existence again here (via foreach) because in between the time we starting + // iterating over the unused resources copy, we might have already marked the resource + // as active in-between, and likely shouldn't remove the resources in such a case. + unusedResources.remove(resourceId).foreach { _ => + logInfo(s"Resources with id $resourceId was not accessed after being added to" + + s" the staging server at least $initialAccessExpirationMs ms ago. The resource" + + s" will be deleted.") + stagedResourcesStore.removeResources(resourceId) + } + } + } + } + } + } + + private case class MonitoredResource( + resourceId: String, + stagedResourceOwner: StagedResourcesOwner) + + private case class UnusedMonitoredResource(expiresAt: Long, resource: MonitoredResource) +} + + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesOwner.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesOwner.scala new file mode 100644 index 0000000000000..4061bc36764d7 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesOwner.scala @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import com.fasterxml.jackson.core.`type`.TypeReference +import com.fasterxml.jackson.module.scala.JsonScalaEnumeration + +object StagedResourcesOwnerType extends Enumeration { + type OwnerType = Value + // In more generic scenarios, we might want to be watching Deployments, etc. + val Pod = Value +} + +class StagedResourcesOwnerTypeReference extends TypeReference[StagedResourcesOwnerType.type] + +case class StagedResourcesOwner( + ownerNamespace: String, + ownerLabels: Map[String, String], + @JsonScalaEnumeration(classOf[StagedResourcesOwnerTypeReference]) + ownerType: StagedResourcesOwnerType.OwnerType) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesStore.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesStore.scala new file mode 100644 index 0000000000000..0c0d428e035dc --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesStore.scala @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import java.io.{File, FileOutputStream, InputStream, IOException} +import java.security.SecureRandom +import java.util.UUID + +import com.google.common.io.{BaseEncoding, ByteStreams} +import org.apache.commons.io.FileUtils +import scala.collection.concurrent.TrieMap + +import org.apache.spark.SparkException +import org.apache.spark.internal.Logging +import org.apache.spark.util.Utils + + +private[spark] trait StagedResourcesStore { + + /** + * Store the given stream on disk and return its resource ID and secret. + */ + def addResources( + podNamespace: String, + resources: InputStream): StagedResources + + /** + * Retrieve a resource bundle with the given id. Returns empty if no resources match this id. + */ + def getResources(resourceId: String): Option[StagedResources] + + def removeResources(resourceId: String): Unit +} + +private[spark] class StagedResourcesStoreImpl(dependenciesRootDir: File) + extends StagedResourcesStore with Logging { + + private val SECURE_RANDOM = new SecureRandom() + private val stagedResources = TrieMap.empty[String, StagedResources] + + override def addResources( + podNamespace: String, + resources: InputStream): StagedResources = { + val resourceId = UUID.randomUUID().toString + val secretBytes = new Array[Byte](1024) + SECURE_RANDOM.nextBytes(secretBytes) + val resourceSecret = resourceId + "-" + BaseEncoding.base64().encode(secretBytes) + + val namespaceDir = new File(dependenciesRootDir, podNamespace) + val resourcesDir = new File(namespaceDir, resourceId) + try { + if (!resourcesDir.exists()) { + if (!resourcesDir.mkdirs()) { + throw new SparkException("Failed to create dependencies directory for application" + + s" at ${resourcesDir.getAbsolutePath}") + } + } + // TODO encrypt the written data with the secret. + val resourcesFile = new File(resourcesDir, "resources.data") + Utils.tryWithResource(new FileOutputStream(resourcesFile)) { + ByteStreams.copy(resources, _) + } + val resourceBundle = StagedResources(resourceId, resourceSecret, resourcesFile) + stagedResources(resourceId) = resourceBundle + resourceBundle + } catch { + case e: Throwable => + if (!resourcesDir.delete()) { + logWarning(s"Failed to delete application directory $resourcesDir.") + } + stagedResources.remove(resourceId) + throw e + } + } + + override def getResources(resourceId: String): Option[StagedResources] = { + stagedResources.get(resourceId) + } + + override def removeResources(resourceId: String): Unit = { + stagedResources.remove(resourceId) + .map(_.resourcesFile.getParentFile) + .foreach { resourcesDirectory => + try { + FileUtils.deleteDirectory(resourcesDirectory) + } catch { + case e: IOException => + logWarning(s"Failed to delete resources directory" + + s" at ${resourcesDirectory.getAbsolutePath}", e) + } + } + } +} + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala deleted file mode 100644 index cc2032219f885..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.scheduler.cluster.kubernetes - -import java.io.File - -import com.google.common.base.Charsets -import com.google.common.io.Files -import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient} -import io.fabric8.kubernetes.client.utils.HttpClientUtils -import okhttp3.Dispatcher - -import org.apache.spark.SparkConf -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.util.ThreadUtils - -private[spark] class DriverPodKubernetesClientProvider( - sparkConf: SparkConf, - namespace: Option[String] = None) { - - private val SERVICE_ACCOUNT_TOKEN = new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH) - private val SERVICE_ACCOUNT_CA_CERT = new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH) - private val oauthTokenFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN) - private val caCertFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE) - private val clientKeyFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE) - private val clientCertFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE) - - /** - * Creates a {@link KubernetesClient}, expecting to be from within the context of a pod. When - * doing so, service account token files can be picked up from canonical locations. - */ - def get: DefaultKubernetesClient = { - val baseClientConfigBuilder = new ConfigBuilder() - .withApiVersion("v1") - .withMasterUrl(KUBERNETES_MASTER_INTERNAL_URL) - - // Build a namespaced client if specified. - val namespacedClientConfigBuilder = namespace - .map(baseClientConfigBuilder.withNamespace(_)).getOrElse(baseClientConfigBuilder) - - val configBuilder = oauthTokenFile - .orElse(caCertFile) - .orElse(clientKeyFile) - .orElse(clientCertFile) - .map { _ => - var mountedAuthConfigBuilder = baseClientConfigBuilder - oauthTokenFile.foreach { tokenFilePath => - val tokenFile = new File(tokenFilePath) - mountedAuthConfigBuilder = mountedAuthConfigBuilder - .withOauthToken(Files.toString(tokenFile, Charsets.UTF_8)) - } - caCertFile.foreach { caFile => - mountedAuthConfigBuilder = mountedAuthConfigBuilder.withCaCertFile(caFile) - } - clientKeyFile.foreach { keyFile => - mountedAuthConfigBuilder = mountedAuthConfigBuilder.withClientKeyFile(keyFile) - } - clientCertFile.foreach { certFile => - mountedAuthConfigBuilder = mountedAuthConfigBuilder.withClientCertFile(certFile) - } - mountedAuthConfigBuilder - }.getOrElse { - var serviceAccountConfigBuilder = baseClientConfigBuilder - if (SERVICE_ACCOUNT_CA_CERT.isFile) { - serviceAccountConfigBuilder = serviceAccountConfigBuilder.withCaCertFile( - SERVICE_ACCOUNT_CA_CERT.getAbsolutePath) - } - - if (SERVICE_ACCOUNT_TOKEN.isFile) { - serviceAccountConfigBuilder = serviceAccountConfigBuilder.withOauthToken( - Files.toString(SERVICE_ACCOUNT_TOKEN, Charsets.UTF_8)) - } - serviceAccountConfigBuilder - } - // Disable the ping thread that is not daemon, in order to allow - // the driver main thread to shut down upon errors. Otherwise, the driver - // will hang indefinitely. - val config = configBuilder - .withWebsocketPingInterval(0) - .build() - val httpClient = HttpClientUtils.createHttpClient(config).newBuilder() - // Use a Dispatcher with a custom executor service that creates daemon threads. The default - // executor service used by Dispatcher creates non-daemon threads. - .dispatcher(new Dispatcher(ThreadUtils.newDaemonCachedThreadPool("spark-on-k8s"))) - .build() - new DefaultKubernetesClient(httpClient, config) - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala index e2630b9918b61..6abce55cff209 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala @@ -16,9 +16,14 @@ */ package org.apache.spark.scheduler.cluster.kubernetes +import java.io.File + +import io.fabric8.kubernetes.client.Config + import org.apache.spark.SparkContext -import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, SparkPodInitContainerBootstrapImpl} +import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, SparkKubernetesClientFactory, SparkPodInitContainerBootstrapImpl} import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.internal.Logging import org.apache.spark.scheduler.{ExternalClusterManager, SchedulerBackend, TaskScheduler, TaskSchedulerImpl} @@ -75,8 +80,15 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit logWarning("The executor's init-container config map key was not specified. Executors will" + " therefore not attempt to fetch remote or submitted dependencies.") } + val kubernetesClient = SparkKubernetesClientFactory.createKubernetesClient( + KUBERNETES_MASTER_INTERNAL_URL, + Some(sparkConf.get(KUBERNETES_NAMESPACE)), + APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX, + sparkConf, + Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH)), + Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH))) new KubernetesClusterSchedulerBackend( - sc.taskScheduler.asInstanceOf[TaskSchedulerImpl], sc, bootStrap) + sc.taskScheduler.asInstanceOf[TaskSchedulerImpl], sc, bootStrap, kubernetesClient) } override def initialize(scheduler: TaskScheduler, backend: SchedulerBackend): Unit = { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 257cee80fdea9..1852ed021d91a 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -21,7 +21,7 @@ import java.util.concurrent.TimeUnit import java.util.concurrent.atomic.{AtomicInteger, AtomicLong, AtomicReference} import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, EnvVarSourceBuilder, Pod, PodBuilder, QuantityBuilder} -import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} +import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action import org.apache.commons.io.FilenameUtils import scala.collection.JavaConverters._ @@ -43,7 +43,8 @@ import org.apache.spark.util.{ThreadUtils, Utils} private[spark] class KubernetesClusterSchedulerBackend( scheduler: TaskSchedulerImpl, val sc: SparkContext, - executorInitContainerBootstrap: Option[SparkPodInitContainerBootstrap]) + executorInitContainerBootstrap: Option[SparkPodInitContainerBootstrap], + kubernetesClient: KubernetesClient) extends CoarseGrainedSchedulerBackend(scheduler, sc.env.rpcEnv) { import KubernetesClusterSchedulerBackend._ @@ -102,9 +103,6 @@ private[spark] class KubernetesClusterSchedulerBackend( private implicit val requestExecutorContext = ExecutionContext.fromExecutorService( ThreadUtils.newDaemonCachedThreadPool("kubernetes-executor-requests")) - private val kubernetesClient = new DriverPodKubernetesClientProvider(conf, - Some(kubernetesNamespace)).get - private val driverPod = try { kubernetesClient.pods().inNamespace(kubernetesNamespace). withName(kubernetesDriverPodName).get() diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala index ff6c710117318..00f09c64b53b7 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala @@ -37,7 +37,6 @@ import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.network.shuffle.kubernetes.KubernetesExternalShuffleClient -import org.apache.spark.scheduler.cluster.kubernetes.DriverPodKubernetesClientProvider class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val JARS_RESOURCE = SubmittedResourceIdAndSecret("jarsId", "jarsSecret") @@ -131,8 +130,6 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { @Mock private var initContainerComponentsProvider: DriverInitContainerComponentsProvider = _ @Mock - private var kubernetesClientProvider: SubmissionKubernetesClientProvider = _ - @Mock private var kubernetesClient: KubernetesClient = _ @Mock private var podOps: MixedOperation[Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]] = _ @@ -174,7 +171,6 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { .thenReturn(INIT_CONTAINER_SECRET) when(initContainerConfigMapBuilder.build()) .thenReturn(INIT_CONTAINER_CONFIG_MAP) - when(kubernetesClientProvider.get).thenReturn(kubernetesClient) when(kubernetesClient.pods()).thenReturn(podOps) when(podOps.create(any())).thenAnswer(new Answer[Pod] { override def answer(invocation: InvocationOnMock): Pod = { @@ -302,37 +298,13 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { SPARK_JARS, SPARK_FILES, true, - kubernetesClientProvider, + kubernetesClient, initContainerComponentsProvider, credentialsMounterProvider, loggingPodStatusWatcher).run() verify(loggingPodStatusWatcher).awaitCompletion() } - test("Run kubernetes shuffle service.") { - expectationsForNoMountedCredentials() - expectationsForNoDependencyUploader() - - val shuffleService = new KubernetesExternalShuffleService( - SPARK_CONF, - new SecurityManager(SPARK_CONF), - new DriverPodKubernetesClientProvider(SPARK_CONF)) - - val shuffleClient = new KubernetesExternalShuffleClient( - SparkTransportConf.fromSparkConf(SPARK_CONF, "shuffle"), - new SecurityManager(SPARK_CONF), - false, - false) - - shuffleService.start() - shuffleClient.init("newapp") - - // verifies that we can connect to the shuffle service and send - // it a message. - shuffleClient.registerDriverWithShuffleService("localhost", 7337) - shuffleService.stop() - } - private def expectationsForNoDependencyUploader(): Unit = { when(initContainerComponentsProvider .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) @@ -409,7 +381,7 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { SPARK_JARS, SPARK_FILES, false, - kubernetesClientProvider, + kubernetesClient, initContainerComponentsProvider, credentialsMounterProvider, loggingPodStatusWatcher).run() diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala index c1005a176408c..2e0a7ba5098b2 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala @@ -111,13 +111,17 @@ class DriverPodKubernetesCredentialsMounterSuite val baseSparkConf = new SparkConf() val resolvedSparkConf = credentialsMounter.setDriverPodKubernetesCredentialLocations(baseSparkConf) - assert(resolvedSparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE) === + assert(resolvedSparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX") === expectedClientKeyFile) - assert(resolvedSparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE) === + assert(resolvedSparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX") === expectedClientCertFile) - assert(resolvedSparkConf.get(KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE) === + assert(resolvedSparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX") === expectedCaCertFile) - assert(resolvedSparkConf.get(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN) === + assert(resolvedSparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX") === expectedOAuthTokenFile) } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesExternalShuffleServiceSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesExternalShuffleServiceSuite.scala new file mode 100644 index 0000000000000..0de1955884c8e --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesExternalShuffleServiceSuite.scala @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit + +import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.KubernetesExternalShuffleService +import org.apache.spark.network.netty.SparkTransportConf +import org.apache.spark.network.shuffle.kubernetes.KubernetesExternalShuffleClient + +private[spark] class KubernetesExternalShuffleServiceSuite extends SparkFunSuite { + + private val SPARK_CONF = new SparkConf() + .set(KUBERNETES_SHUFFLE_USE_SERVICE_ACCOUNT_CREDENTIALS, false) + + test("Run kubernetes shuffle service.") { + val shuffleService = new KubernetesExternalShuffleService( + SPARK_CONF, + new SecurityManager(SPARK_CONF)) + + val shuffleClient = new KubernetesExternalShuffleClient( + SparkTransportConf.fromSparkConf(SPARK_CONF, "shuffle"), + new SecurityManager(SPARK_CONF), + false, + false) + + shuffleService.start() + shuffleClient.init("newapp") + + // verifies that we can connect to the shuffle service and send + // it a message. + shuffleClient.registerDriverWithShuffleService("localhost", 7337) + shuffleService.stop() + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala index 8693ff4e15372..c207e3c69cd3c 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala @@ -22,26 +22,24 @@ import java.util.UUID import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.google.common.base.Charsets -import com.google.common.io.Files +import com.google.common.io.{BaseEncoding, Files} import okhttp3.RequestBody import okio.Okio -import org.mockito.Matchers.any -import org.mockito.Mockito -import org.mockito.invocation.InvocationOnMock -import org.mockito.stubbing.Answer +import org.mockito.{ArgumentCaptor, Mockito} import org.scalatest.BeforeAndAfter import org.scalatest.mock.MockitoSugar._ import retrofit2.{Call, Response} import org.apache.spark.{SparkFunSuite, SSLOptions} import org.apache.spark.deploy.kubernetes.CompressionUtils -import org.apache.spark.deploy.rest.kubernetes.{ResourceStagingServiceRetrofit, RetrofitClientFactory} +import org.apache.spark.deploy.rest.kubernetes.{ResourceStagingServiceRetrofit, RetrofitClientFactory, StagedResourcesOwner} import org.apache.spark.util.Utils private[spark] class SubmittedDependencyUploaderSuite extends SparkFunSuite with BeforeAndAfter { import SubmittedDependencyUploaderSuite.createTempFile private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + private val BASE_64 = BaseEncoding.base64() private val APP_ID = "app-id" private val LABELS = Map("label1" -> "label1value", "label2" -> "label2value") private val NAMESPACE = "namespace" @@ -61,18 +59,31 @@ private[spark] class SubmittedDependencyUploaderSuite extends SparkFunSuite with trustStore = Some(TRUSTSTORE_FILE), trustStorePassword = Some(TRUSTSTORE_PASSWORD), trustStoreType = Some(TRUSTSTORE_TYPE)) + private val CLIENT_KEY_FILE = createTempFile("pem") + private val CLIENT_CERT_FILE = createTempFile("pem") + private val OAUTH_TOKEN = "token" private var retrofitClientFactory: RetrofitClientFactory = _ private var retrofitClient: ResourceStagingServiceRetrofit = _ + private var resourcesOwnerCaptor: ArgumentCaptor[RequestBody] = _ + private var resourcesDataCaptor: ArgumentCaptor[RequestBody] = _ private var dependencyUploaderUnderTest: SubmittedDependencyUploader = _ before { + resourcesOwnerCaptor = ArgumentCaptor.forClass(classOf[RequestBody]) + resourcesDataCaptor = ArgumentCaptor.forClass(classOf[RequestBody]) retrofitClientFactory = mock[RetrofitClientFactory] retrofitClient = mock[ResourceStagingServiceRetrofit] Mockito.when( retrofitClientFactory.createRetrofitClient( STAGING_SERVER_URI, classOf[ResourceStagingServiceRetrofit], STAGING_SERVER_SSL_OPTIONS)) .thenReturn(retrofitClient) + val responseCall = mock[Call[SubmittedResourceIdAndSecret]] + Mockito.when(responseCall.execute()).thenReturn( + Response.success(SubmittedResourceIdAndSecret("resourceId", "resourceSecret"))) + Mockito.when(retrofitClient.uploadResources( + resourcesDataCaptor.capture(), resourcesOwnerCaptor.capture())) + .thenReturn(responseCall) dependencyUploaderUnderTest = new SubmittedDependencyUploaderImpl( APP_ID, LABELS, @@ -85,38 +96,24 @@ private[spark] class SubmittedDependencyUploaderSuite extends SparkFunSuite with } test("Uploading jars should contact the staging server with the appropriate parameters") { - val capturingArgumentsAnswer = new UploadDependenciesArgumentsCapturingAnswer( - SubmittedResourceIdAndSecret("resourceId", "resourceSecret")) - Mockito.when(retrofitClient.uploadResources(any(), any(), any(), any())) - .thenAnswer(capturingArgumentsAnswer) dependencyUploaderUnderTest.uploadJars() - testUploadSendsCorrectFiles(LOCAL_JARS, capturingArgumentsAnswer) + testUploadSendsCorrectFiles(LOCAL_JARS) } test("Uploading files should contact the staging server with the appropriate parameters") { - val capturingArgumentsAnswer = new UploadDependenciesArgumentsCapturingAnswer( - SubmittedResourceIdAndSecret("resourceId", "resourceSecret")) - Mockito.when(retrofitClient.uploadResources(any(), any(), any(), any())) - .thenAnswer(capturingArgumentsAnswer) dependencyUploaderUnderTest.uploadFiles() - testUploadSendsCorrectFiles(LOCAL_FILES, capturingArgumentsAnswer) + testUploadSendsCorrectFiles(LOCAL_FILES) } - private def testUploadSendsCorrectFiles( - expectedFiles: Seq[String], - capturingArgumentsAnswer: UploadDependenciesArgumentsCapturingAnswer) = { - val requestLabelsBytes = requestBodyBytes(capturingArgumentsAnswer.podLabelsArg) - val requestLabelsString = new String(requestLabelsBytes, Charsets.UTF_8) - val requestLabelsMap = OBJECT_MAPPER.readValue( - requestLabelsString, classOf[Map[String, String]]) - assert(requestLabelsMap === LABELS) - val requestNamespaceBytes = requestBodyBytes(capturingArgumentsAnswer.podNamespaceArg) - val requestNamespaceString = new String(requestNamespaceBytes, Charsets.UTF_8) - assert(requestNamespaceString === NAMESPACE) - + private def testUploadSendsCorrectFiles(expectedFiles: Seq[String]) = { + val resourceOwnerString = new String( + requestBodyBytes(resourcesOwnerCaptor.getValue), Charsets.UTF_8) + val resourceOwner = OBJECT_MAPPER.readValue(resourceOwnerString, classOf[StagedResourcesOwner]) + assert(resourceOwner.ownerLabels === LABELS) + assert(resourceOwner.ownerNamespace === NAMESPACE) val unpackedFilesDir = Utils.createTempDir(namePrefix = "test-unpacked-files") val compressedBytesInput = new ByteArrayInputStream( - requestBodyBytes(capturingArgumentsAnswer.podResourcesArg)) + requestBodyBytes(resourcesDataCaptor.getValue())) CompressionUtils.unpackTarStreamToDirectory(compressedBytesInput, unpackedFilesDir) val writtenFiles = unpackedFilesDir.listFiles assert(writtenFiles.size === expectedFiles.size) @@ -148,25 +145,6 @@ private[spark] class SubmittedDependencyUploaderSuite extends SparkFunSuite with } } -private class UploadDependenciesArgumentsCapturingAnswer(returnValue: SubmittedResourceIdAndSecret) - extends Answer[Call[SubmittedResourceIdAndSecret]] { - - var podLabelsArg: RequestBody = _ - var podNamespaceArg: RequestBody = _ - var podResourcesArg: RequestBody = _ - var kubernetesCredentialsArg: RequestBody = _ - - override def answer(invocationOnMock: InvocationOnMock): Call[SubmittedResourceIdAndSecret] = { - podLabelsArg = invocationOnMock.getArgumentAt(0, classOf[RequestBody]) - podNamespaceArg = invocationOnMock.getArgumentAt(1, classOf[RequestBody]) - podResourcesArg = invocationOnMock.getArgumentAt(2, classOf[RequestBody]) - kubernetesCredentialsArg = invocationOnMock.getArgumentAt(3, classOf[RequestBody]) - val responseCall = mock[Call[SubmittedResourceIdAndSecret]] - Mockito.when(responseCall.execute()).thenReturn(Response.success(returnValue)) - responseCall - } -} - private object SubmittedDependencyUploaderSuite { def createTempFile(extension: String): String = { val dir = Utils.createTempDir() diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala index 0604e0d6494ae..0c0908da20d89 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala @@ -24,10 +24,11 @@ import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.google.common.io.ByteStreams import okhttp3.{RequestBody, ResponseBody} import org.scalatest.BeforeAndAfter +import org.scalatest.mock.MockitoSugar.mock import retrofit2.Call import org.apache.spark.{SparkFunSuite, SSLOptions} -import org.apache.spark.deploy.kubernetes.{KubernetesCredentials, SSLUtils} +import org.apache.spark.deploy.kubernetes.SSLUtils import org.apache.spark.util.Utils /** @@ -40,12 +41,21 @@ import org.apache.spark.util.Utils * receive streamed uploads and can stream downloads. */ class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfter { + private var serviceImpl: ResourceStagingService = _ + private var stagedResourcesCleaner: StagedResourcesCleaner = _ + private var server: ResourceStagingServer = _ private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) private val serverPort = new ServerSocket(0).getLocalPort - private val serviceImpl = new ResourceStagingServiceImpl(Utils.createTempDir()) + private val sslOptionsProvider = new SettableReferenceSslOptionsProvider() - private val server = new ResourceStagingServer(serverPort, serviceImpl, sslOptionsProvider) + + before { + stagedResourcesCleaner = mock[StagedResourcesCleaner] + serviceImpl = new ResourceStagingServiceImpl( + new StagedResourcesStoreImpl(Utils.createTempDir()), stagedResourcesCleaner) + server = new ResourceStagingServer(serverPort, serviceImpl, sslOptionsProvider) + } after { server.stop() @@ -83,20 +93,17 @@ class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfter { val resourcesBytes = Array[Byte](1, 2, 3, 4) val labels = Map("label1" -> "label1Value", "label2" -> "label2value") val namespace = "namespace" - val labelsJson = OBJECT_MAPPER.writer().writeValueAsString(labels) + val resourcesOwner = StagedResourcesOwner( + ownerLabels = labels, + ownerNamespace = namespace, + ownerType = StagedResourcesOwnerType.Pod) + val resourcesOwnerJson = OBJECT_MAPPER.writeValueAsString(resourcesOwner) + val resourcesOwnerRequestBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), resourcesOwnerJson) val resourcesRequestBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.MULTIPART_FORM_DATA), resourcesBytes) - val labelsRequestBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), labelsJson) - val namespaceRequestBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.TEXT_PLAIN), namespace) - val kubernetesCredentials = KubernetesCredentials(Some("token"), Some("ca-cert"), None, None) - val kubernetesCredentialsString = OBJECT_MAPPER.writer() - .writeValueAsString(kubernetesCredentials) - val kubernetesCredentialsBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), kubernetesCredentialsString) + okhttp3.MediaType.parse(MediaType.MULTIPART_FORM_DATA), resourcesBytes) val uploadResponse = retrofitService.uploadResources( - labelsRequestBody, namespaceRequestBody, resourcesRequestBody, kubernetesCredentialsBody) + resourcesRequestBody, resourcesOwnerRequestBody) val resourceIdentifier = getTypedResponseResult(uploadResponse) checkResponseBodyBytesMatches( retrofitService.downloadResources( diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImplSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImplSuite.scala deleted file mode 100644 index 53396a3f27a1a..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImplSuite.scala +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.rest.kubernetes - -import java.io.{ByteArrayInputStream, File} -import java.nio.file.Paths - -import com.google.common.io.Files - -import org.apache.spark.SparkFunSuite -import org.apache.spark.deploy.kubernetes.KubernetesCredentials -import org.apache.spark.util.Utils - -/** - * Unit, scala-level tests for KubernetesSparkDependencyServiceImpl. The coverage here - * differs from that of KubernetesSparkDependencyServerSuite as here we invoke the - * implementation methods directly as opposed to over HTTP, as well as check the - * data written to the underlying disk. - */ -class ResourceStagingServiceImplSuite extends SparkFunSuite { - - private val dependencyRootDir = Utils.createTempDir() - private val serviceImpl = new ResourceStagingServiceImpl(dependencyRootDir) - private val resourceBytes = Array[Byte](1, 2, 3, 4) - private val kubernetesCredentials = KubernetesCredentials( - Some("token"), Some("caCert"), Some("key"), Some("cert")) - private val namespace = "namespace" - private val labels = Map("label1" -> "label1value", "label2" -> "label2value") - - test("Uploads should write data to the underlying disk") { - Utils.tryWithResource(new ByteArrayInputStream(resourceBytes)) { resourceStream => - serviceImpl.uploadResources(labels, namespace, resourceStream, kubernetesCredentials) - } - val resourceNamespaceDir = Paths.get(dependencyRootDir.getAbsolutePath, "namespace").toFile - assert(resourceNamespaceDir.isDirectory, s"Resource namespace dir was not created at" + - s" ${resourceNamespaceDir.getAbsolutePath} or is not a directory.") - val resourceDirs = resourceNamespaceDir.listFiles() - assert(resourceDirs.length === 1, s"Resource root directory did not have exactly one" + - s" subdirectory. Got: ${resourceDirs.map(_.getAbsolutePath).mkString(",")}") - val resourceTgz = new File(resourceDirs(0), "resources.data") - assert(resourceTgz.isFile, - s"Resources written to ${resourceTgz.getAbsolutePath} does not exist or is not a file.") - val resourceTgzBytes = Files.toByteArray(resourceTgz) - assert(resourceTgzBytes.toSeq === resourceBytes.toSeq, "Incorrect resource bytes were written.") - } -} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesCleanerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesCleanerSuite.scala new file mode 100644 index 0000000000000..8b398a9891f34 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesCleanerSuite.scala @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import java.util.concurrent.{ScheduledExecutorService, TimeUnit} + +import io.fabric8.kubernetes.api.model.{DoneableNamespace, DoneablePod, Namespace, NamespaceList, Pod, PodList, PodListBuilder} +import io.fabric8.kubernetes.client.{KubernetesClient, Watch, Watcher} +import io.fabric8.kubernetes.client.dsl.{FilterWatchListDeletable, MixedOperation, NonNamespaceOperation, PodResource, Resource} +import org.mockito.{ArgumentCaptor, Mock, MockitoAnnotations} +import org.mockito.Matchers.{eq => mockitoEq} +import org.mockito.Mockito.{never, verify, when} +import org.scalatest.BeforeAndAfter +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkFunSuite +import org.apache.spark.util.Clock + +private[spark] class StagedResourcesCleanerSuite extends SparkFunSuite with BeforeAndAfter { + + private type PODS = MixedOperation[Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]] + private type PODSWITHLABELS = FilterWatchListDeletable[ + Pod, PodList, java.lang.Boolean, Watch, Watcher[Pod]] + private type PODSINNAMESPACE = NonNamespaceOperation[ + Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]] + private type NAMESPACES = NonNamespaceOperation[ + Namespace, NamespaceList, DoneableNamespace, Resource[Namespace, DoneableNamespace]] + private type NAMESPACEWITHNAME = Resource[Namespace, DoneableNamespace] + + private val INITIAL_ACCESS_EXPIRATION_MS = 5000L + private val CURRENT_TIME = 10000L + private val RESOURCE_ID = "resource-id" + private val POD_NAMESPACE = "namespace" + private val POD_LABELS = Map("label1" -> "label1value", "label2" -> "label2value") + private val RESOURCES_OWNER = StagedResourcesOwner( + ownerNamespace = POD_NAMESPACE, + ownerLabels = POD_LABELS, + ownerType = StagedResourcesOwnerType.Pod) + + @Mock + private var stagedResourcesStore: StagedResourcesStore = _ + @Mock + private var kubernetesClient: KubernetesClient = _ + @Mock + private var clock: Clock = _ + @Mock + private var cleanerExecutorService: ScheduledExecutorService = _ + @Mock + private var podOperations: PODS = _ + @Mock + private var podsInNamespaceOperations: PODSINNAMESPACE = _ + @Mock + private var podsWithLabelsOperations: PODSWITHLABELS = _ + @Mock + private var namespaceOperations: NAMESPACES = _ + @Mock + private var namedNamespaceOperations: NAMESPACEWITHNAME = _ + private var cleanerUnderTest: StagedResourcesCleaner = _ + + before { + MockitoAnnotations.initMocks(this) + cleanerUnderTest = new StagedResourcesCleanerImpl( + stagedResourcesStore, + kubernetesClient, + cleanerExecutorService, + clock, + INITIAL_ACCESS_EXPIRATION_MS) + when(kubernetesClient.pods()).thenReturn(podOperations) + when(podOperations.withLabels(POD_LABELS.asJava)).thenReturn(podsWithLabelsOperations) + when(kubernetesClient.namespaces()).thenReturn(namespaceOperations) + } + + test("Clean the resource if it is never accessed for the expiration interval.") { + val cleanupRunnable = startCleanupAndGetCleanupRunnable() + cleanerUnderTest.registerResourceForCleaning(RESOURCE_ID, RESOURCES_OWNER) + when(clock.getTimeMillis()).thenReturn(CURRENT_TIME + INITIAL_ACCESS_EXPIRATION_MS) + cleanupRunnable.run() + verify(stagedResourcesStore).removeResources(RESOURCE_ID) + verify(kubernetesClient, never()).pods() + } + + test("Don't clean the resource if it is accessed in the expiration interval" + + " and there are owners available.") { + val cleanupRunnable = startCleanupAndGetCleanupRunnable() + cleanerUnderTest.registerResourceForCleaning(RESOURCE_ID, RESOURCES_OWNER) + cleanerUnderTest.markResourceAsUsed(RESOURCE_ID) + when(clock.getTimeMillis()).thenReturn(CURRENT_TIME + INITIAL_ACCESS_EXPIRATION_MS) + when(namespaceOperations.withName(POD_NAMESPACE)).thenReturn(namedNamespaceOperations) + when(namedNamespaceOperations.get()).thenReturn(new Namespace()) + when(podOperations.inNamespace(POD_NAMESPACE)).thenReturn(podsInNamespaceOperations) + when(podsInNamespaceOperations.withLabels(POD_LABELS.asJava)) + .thenReturn(podsWithLabelsOperations) + when(podsWithLabelsOperations.list()).thenReturn( + new PodListBuilder().addNewItemLike(new Pod()).endItem().build()) + cleanupRunnable.run() + verify(stagedResourcesStore, never()).removeResources(RESOURCE_ID) + } + + test("Clean the resource if no owners are available.") { + val cleanupRunnable = startCleanupAndGetCleanupRunnable() + cleanerUnderTest.registerResourceForCleaning(RESOURCE_ID, RESOURCES_OWNER) + cleanerUnderTest.markResourceAsUsed(RESOURCE_ID) + when(clock.getTimeMillis()).thenReturn(CURRENT_TIME + INITIAL_ACCESS_EXPIRATION_MS) + when(namespaceOperations.withName(POD_NAMESPACE)).thenReturn(namedNamespaceOperations) + when(namedNamespaceOperations.get()).thenReturn(new Namespace()) + when(podOperations.inNamespace(POD_NAMESPACE)).thenReturn(podsInNamespaceOperations) + when(podsInNamespaceOperations.withLabels(POD_LABELS.asJava)) + .thenReturn(podsWithLabelsOperations) + when(podsWithLabelsOperations.list()).thenReturn(new PodListBuilder().build()) + cleanupRunnable.run() + verify(stagedResourcesStore).removeResources(RESOURCE_ID) + } + + test("Clean up the resource if the namespace does not exist.") { + val cleanupRunnable = startCleanupAndGetCleanupRunnable() + cleanerUnderTest.registerResourceForCleaning(RESOURCE_ID, RESOURCES_OWNER) + cleanerUnderTest.markResourceAsUsed(RESOURCE_ID) + when(clock.getTimeMillis()).thenReturn(CURRENT_TIME + INITIAL_ACCESS_EXPIRATION_MS) + when(namespaceOperations.withName(POD_NAMESPACE)).thenReturn(namedNamespaceOperations) + when(namedNamespaceOperations.get()).thenReturn(null) + cleanupRunnable.run() + verify(stagedResourcesStore).removeResources(RESOURCE_ID) + } + + private def startCleanupAndGetCleanupRunnable(): Runnable = { + val captor = ArgumentCaptor.forClass(classOf[Runnable]) + cleanerUnderTest.start() + verify(cleanerExecutorService).scheduleAtFixedRate( + captor.capture(), + mockitoEq(30000L), + mockitoEq(30000L), + mockitoEq(TimeUnit.MILLISECONDS)) + captor.getValue + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesStoreSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesStoreSuite.scala new file mode 100644 index 0000000000000..6b5737ebf2e23 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesStoreSuite.scala @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import java.io.{ByteArrayInputStream, File} +import java.nio.file.Paths + +import com.google.common.io.Files +import org.scalatest.BeforeAndAfter + +import org.apache.spark.SparkFunSuite +import org.apache.spark.util.Utils + +private[spark] class StagedResourcesStoreSuite extends SparkFunSuite with BeforeAndAfter { + + private val resourceBytes = Array[Byte](1, 2, 3, 4) + private val namespace = "namespace" + private var dependencyRootDir: File = _ + private var stagedResourcesStore: StagedResourcesStore = _ + + before { + dependencyRootDir = Utils.createTempDir() + stagedResourcesStore = new StagedResourcesStoreImpl(dependencyRootDir) + } + + after { + dependencyRootDir.delete() + } + + test("Uploads should write data to the underlying disk") { + val resourceIdAndSecret = Utils.tryWithResource(new ByteArrayInputStream(resourceBytes)) { + resourceStream => + stagedResourcesStore.addResources(namespace, resourceStream) + } + val resourceNamespaceDir = Paths.get(dependencyRootDir.getAbsolutePath, "namespace").toFile + assert(resourceNamespaceDir.isDirectory, s"Resource namespace dir was not created at" + + s" ${resourceNamespaceDir.getAbsolutePath} or is not a directory.") + val resourceDirs = resourceNamespaceDir.listFiles() + assert(resourceDirs.length === 1, s"Resource root directory did not have exactly one" + + s" subdirectory. Got: ${resourceDirs.map(_.getAbsolutePath).mkString(",")}") + assert(resourceDirs(0).getName === resourceIdAndSecret.resourceId) + val resourceTgz = new File(resourceDirs(0), "resources.data") + assert(resourceTgz.isFile, + s"Resources written to ${resourceTgz.getAbsolutePath} does not exist or is not a file.") + val resourceTgzBytes = Files.toByteArray(resourceTgz) + assert(resourceTgzBytes.toSeq === resourceBytes.toSeq, "Incorrect resource bytes were written.") + } + + test("Uploading and then getting should return a stream with the written bytes.") { + val resourceIdAndSecret = Utils.tryWithResource(new ByteArrayInputStream(resourceBytes)) { + resourceStream => + stagedResourcesStore.addResources(namespace, resourceStream) + } + val resources = stagedResourcesStore.getResources(resourceIdAndSecret.resourceId) + assert(resources.map(_.resourcesFile) + .map(Files.toByteArray) + .exists(resourceBytes.sameElements(_))) + assert(resources.exists(_.resourceId == resourceIdAndSecret.resourceId)) + assert(resources.exists(_.resourceSecret == resourceIdAndSecret.resourceSecret)) + } + + test("Uploading and then deleting should result in the resource directory being deleted.") { + val resourceIdAndSecret = Utils.tryWithResource(new ByteArrayInputStream(resourceBytes)) { + resourceStream => + stagedResourcesStore.addResources(namespace, resourceStream) + } + stagedResourcesStore.removeResources(resourceIdAndSecret.resourceId) + val resourceNamespaceDir = Paths.get(dependencyRootDir.getAbsolutePath, "namespace").toFile + assert(resourceNamespaceDir.listFiles().isEmpty) + assert(stagedResourcesStore.getResources(resourceIdAndSecret.resourceId).isEmpty) + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 95775d262a69d..6a296d6112c97 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -169,11 +169,14 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { sparkConf.setJars(Seq( CONTAINER_LOCAL_MAIN_APP_RESOURCE, CONTAINER_LOCAL_HELPER_JAR_PATH)) - sparkConf.set(KUBERNETES_DRIVER_CLIENT_KEY_FILE, + sparkConf.set( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX", kubernetesTestComponents.clientConfig.getClientKeyFile) - sparkConf.set(KUBERNETES_DRIVER_CLIENT_CERT_FILE, + sparkConf.set( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX", kubernetesTestComponents.clientConfig.getClientCertFile) - sparkConf.set(KUBERNETES_DRIVER_CA_CERT_FILE, + sparkConf.set( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX", kubernetesTestComponents.clientConfig.getCaCertFile) runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) } From c325691d1c788e12d3e09a9d728481f52223ca2b Mon Sep 17 00:00:00 2001 From: Timothy Chen Date: Sun, 4 Jun 2017 07:25:28 -0700 Subject: [PATCH 127/156] Copy yaml files when making distribution (#327) --- dev/make-distribution.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index 6f9dfa0e39072..bb5fa3da12209 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -246,6 +246,7 @@ fi # Copy other things mkdir "$DISTDIR"/conf cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf +cp "$SPARK_HOME"/conf/*.yaml "$DISTDIR"/conf cp "$SPARK_HOME/README.md" "$DISTDIR" cp -r "$SPARK_HOME/bin" "$DISTDIR" cp -r "$SPARK_HOME/python" "$DISTDIR" From d835b6abd7277bcb76721124f769fba1a3ee332e Mon Sep 17 00:00:00 2001 From: Timothy Chen Date: Sun, 4 Jun 2017 19:54:56 -0700 Subject: [PATCH 128/156] Allow docker image pull policy to be configurable (#328) * Allow docker image pull policy to be configurable * Add flag documentation * Update running-on-kubernetes.md --- docs/running-on-kubernetes.md | 7 +++++++ .../deploy/kubernetes/SparkPodInitContainerBootstrap.scala | 3 ++- .../scala/org/apache/spark/deploy/kubernetes/config.scala | 6 ++++++ .../org/apache/spark/deploy/kubernetes/submit/Client.scala | 3 ++- .../submit/DriverInitContainerComponentsProvider.scala | 2 ++ .../cluster/kubernetes/KubernetesClusterManager.scala | 4 ++-- .../kubernetes/KubernetesClusterSchedulerBackend.scala | 3 ++- .../kubernetes/SparkPodInitContainerBootstrapSuite.scala | 3 +++ 8 files changed, 26 insertions(+), 5 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index e9002bdfe0502..a88b0d380fac0 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -661,6 +661,13 @@ from the other deployment modes. See the [configuration page](configuration.html Interval between reports of the current Spark job status in cluster mode. + + spark.kubernetes.docker.image.pullPolicy + IfNotPresent + + Docker image pull policy used when pulling Docker images with Kubernetes. + + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala index a4d0aeb23d01f..87462dbde17a5 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala @@ -36,6 +36,7 @@ private[spark] trait SparkPodInitContainerBootstrap { private[spark] class SparkPodInitContainerBootstrapImpl( initContainerImage: String, + dockerImagePullPolicy: String, jarsDownloadPath: String, filesDownloadPath: String, downloadTimeoutMinutes: Long, @@ -60,7 +61,7 @@ private[spark] class SparkPodInitContainerBootstrapImpl( val initContainer = new ContainerBuilder() .withName(s"spark-init") .withImage(initContainerImage) - .withImagePullPolicy("IfNotPresent") + .withImagePullPolicy(dockerImagePullPolicy) .addNewVolumeMount() .withName(INIT_CONTAINER_PROPERTIES_FILE_VOLUME) .withMountPath(INIT_CONTAINER_PROPERTIES_FILE_DIR) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index dd99e0f7a5ae0..47c3c24fa88f7 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -47,6 +47,12 @@ package object config extends Logging { .stringConf .createWithDefault(s"spark-executor:$sparkVersion") + private[spark] val DOCKER_IMAGE_PULL_POLICY = + ConfigBuilder("spark.kubernetes.docker.image.pullPolicy") + .doc("Docker image pull policy when pulling any docker image in Kubernetes integration") + .stringConf + .createWithDefault("IfNotPresent") + private[spark] val APISERVER_AUTH_SUBMISSION_CONF_PREFIX = "spark.kubernetes.authenticate.submission" private[spark] val APISERVER_AUTH_DRIVER_CONF_PREFIX = diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index 85dac3df57b4c..1bebaf92501f4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -60,6 +60,7 @@ private[spark] class Client( private val kubernetesDriverPodName = sparkConf.get(KUBERNETES_DRIVER_POD_NAME) .getOrElse(kubernetesAppId) private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) + private val dockerImagePullPolicy = sparkConf.get(DOCKER_IMAGE_PULL_POLICY) private val driverMemoryMb = sparkConf.get(org.apache.spark.internal.config.DRIVER_MEMORY) private val memoryOverheadMb = sparkConf .get(KUBERNETES_DRIVER_MEMORY_OVERHEAD) @@ -99,7 +100,7 @@ private[spark] class Client( val driverContainer = new ContainerBuilder() .withName(DRIVER_CONTAINER_NAME) .withImage(driverDockerImage) - .withImagePullPolicy("IfNotPresent") + .withImagePullPolicy(dockerImagePullPolicy) .addToEnv(driverExtraClasspathEnv.toSeq: _*) .addNewEnv() .withName(ENV_DRIVER_MEMORY) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala index ccb349c5b2988..be9da2582cb47 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala @@ -104,6 +104,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( private val configMapName = s"$kubernetesAppId-init-config" private val configMapKey = s"$kubernetesAppId-init-config-key" private val initContainerImage = sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE) + private val dockerImagePullPolicy = sparkConf.get(DOCKER_IMAGE_PULL_POLICY) private val downloadTimeoutMinutes = sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT) override def provideInitContainerConfigMapBuilder( @@ -196,6 +197,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( } new SparkPodInitContainerBootstrapImpl( initContainerImage, + dockerImagePullPolicy, jarsDownloadPath, filesDownloadPath, downloadTimeoutMinutes, diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala index 6abce55cff209..2a0f6e78c2aea 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala @@ -46,7 +46,7 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit val maybeExecutorInitContainerSecretName = sparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET) val maybeExecutorInitContainerSecretMount = - sparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET_MOUNT_DIR) + sparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET_MOUNT_DIR) val executorInitContainerSecretVolumePlugin = for { initContainerSecretName <- maybeExecutorInitContainerSecretName initContainerSecretMountPath <- maybeExecutorInitContainerSecretMount @@ -65,6 +65,7 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit } yield { new SparkPodInitContainerBootstrapImpl( sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE), + sparkConf.get(DOCKER_IMAGE_PULL_POLICY), sparkConf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION), sparkConf.get(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION), sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT), @@ -95,4 +96,3 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit scheduler.asInstanceOf[TaskSchedulerImpl].initialize(backend) } } - diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 1852ed021d91a..c3a6fe28a6255 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -77,6 +77,7 @@ private[spark] class KubernetesClusterSchedulerBackend( private var shufflePodCache: Option[ShufflePodCache] = None private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) + private val dockerImagePullPolicy = conf.get(DOCKER_IMAGE_PULL_POLICY) private val kubernetesNamespace = conf.get(KUBERNETES_NAMESPACE) private val executorPort = conf.getInt("spark.executor.port", DEFAULT_STATIC_PORT) private val blockmanagerPort = conf @@ -354,7 +355,7 @@ private[spark] class KubernetesClusterSchedulerBackend( .addNewContainer() .withName(s"executor") .withImage(executorDockerImage) - .withImagePullPolicy("IfNotPresent") + .withImagePullPolicy(dockerImagePullPolicy) .withNewResources() .addToRequests("memory", executorMemoryQuantity) .addToLimits("memory", executorMemoryLimitQuantity) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala index 3feba80f800c7..90d7b10df211c 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala @@ -27,6 +27,7 @@ import org.apache.spark.deploy.kubernetes.constants._ class SparkPodInitContainerBootstrapSuite extends SparkFunSuite with BeforeAndAfter { private val OBJECT_MAPPER = new ObjectMapper() private val INIT_CONTAINER_IMAGE = "spark-init:latest" + private val DOCKER_IMAGE_PULL_POLICY = "IfNotPresent" private val JARS_DOWNLOAD_PATH = "/var/data/spark-jars" private val FILES_DOWNLOAD_PATH = "/var/data/spark-files" private val DOWNLOAD_TIMEOUT_MINUTES = 5 @@ -137,6 +138,7 @@ class SparkPodInitContainerBootstrapSuite extends SparkFunSuite with BeforeAndAf private def bootstrapPodWithoutSubmittedDependencies(): Pod = { val bootstrapUnderTest = new SparkPodInitContainerBootstrapImpl( INIT_CONTAINER_IMAGE, + DOCKER_IMAGE_PULL_POLICY, JARS_DOWNLOAD_PATH, FILES_DOWNLOAD_PATH, DOWNLOAD_TIMEOUT_MINUTES, @@ -150,6 +152,7 @@ class SparkPodInitContainerBootstrapSuite extends SparkFunSuite with BeforeAndAf private def bootstrapPodWithSubmittedDependencies(): Pod = { val bootstrapUnderTest = new SparkPodInitContainerBootstrapImpl( INIT_CONTAINER_IMAGE, + DOCKER_IMAGE_PULL_POLICY, JARS_DOWNLOAD_PATH, FILES_DOWNLOAD_PATH, DOWNLOAD_TIMEOUT_MINUTES, From 4751371f115a81ea2f965721f1a79f74b6feee8b Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Mon, 5 Jun 2017 10:08:57 -0700 Subject: [PATCH 129/156] POM update 0.2.0 (#329) --- assembly/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/java8-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- launcher/pom.xml | 2 +- mesos/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/docker-minimal-bundle/pom.xml | 2 +- .../kubernetes/integration-tests-spark-jobs-helpers/pom.xml | 2 +- .../kubernetes/integration-tests-spark-jobs/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/pom.xml | 2 +- 41 files changed, 41 insertions(+), 41 deletions(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index c1f2c5b29f7e8..a4f695e790ce3 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 7d016120e44d7..58889a55cf651 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 718f609178e24..2daacc14d42b5 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index d543991cb6a94..e14b4748efca9 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 384ef55b6f8a9..24fd97315ef4e 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 3d8eb2703ed6b..e07e51c34ec93 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index cd53039ed9a47..0bf7005b32eeb 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index 23510cb50bcb7..9cac063dc62e7 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../pom.xml diff --git a/examples/pom.xml b/examples/pom.xml index e3f7529ecbcec..0e91ae2a14dab 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 60fa11d8a5043..e4da21cb9b4be 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index a21ec3f3d7fcb..41cf53acdb38c 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 25e34698d831e..75c6f0596eae6 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 8058732e72e74..ac410bd46c403 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml index fc326931315a3..884660d7dffdf 100644 --- a/external/java8-tests/pom.xml +++ b/external/java8-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index b023bc49203b2..7d0bd87bdcb93 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 03cd565f93025..c2dafb03bcb8f 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 5e294e6acc006..d147aef12b9cc 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml index 3654ede0e192a..e11c2d0937517 100644 --- a/external/kafka-0-8-assembly/pom.xml +++ b/external/kafka-0-8-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml index f9d61028e906a..9955cd65f6475 100644 --- a/external/kafka-0-8/pom.xml +++ b/external/kafka-0-8/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 1ba318d8d39c2..284425d4f43f2 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index fcc4456ef4a13..6d7fa95aec967 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index 2bef062c0e0b3..379526b682c85 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 78ca270157dcf..ec4cfbad99d01 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index 1f0549f901f47..6a536039b3975 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../pom.xml diff --git a/mesos/pom.xml b/mesos/pom.xml index e97743cf1bbd9..ab3744863a5c6 100644 --- a/mesos/pom.xml +++ b/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 24718fd46a3ec..073b6482ce930 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 384b6af178d43..6d8fe24d4e185 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 9533076a211ee..7f9325fa5f185 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT pom Spark Project Parent POM http://spark.apache.org/ diff --git a/repl/pom.xml b/repl/pom.xml index a42c69a99573d..94ecb10dfa6e4 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index aa429f73a5627..a227342f46771 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml index c66b87ac0952d..51ca26c0134fa 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml index 581bf9453f2f2..206059bd8e5b1 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml index 9639811479ff5..555398aa3e6d9 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 02904c0e5fe21..bbf4b02cdaaf9 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 818f33868ef7a..2b5faf37ddd0b 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index d1b8982b2d464..f98deb0893af7 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 3dc6539e178c3..ece565e607315 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index ce3879131e736..2d12eb50905b6 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 715aa4bbf6373..f55bbc61df071 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index d978d6ef6fdd4..eafb3f283c619 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../pom.xml diff --git a/yarn/pom.xml b/yarn/pom.xml index 424965419c10b..00812174cdf0c 100644 --- a/yarn/pom.xml +++ b/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../pom.xml From 5470366ccf9a504dcd96ab83491c211cd6508e50 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Mon, 5 Jun 2017 18:28:16 -0700 Subject: [PATCH 130/156] Update tags (#332) * Update tags * update tags in conf directory --- conf/kubernetes-resource-staging-server.yaml | 2 +- conf/kubernetes-shuffle-service.yaml | 2 +- docs/running-on-kubernetes.md | 30 ++++++++++---------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/conf/kubernetes-resource-staging-server.yaml b/conf/kubernetes-resource-staging-server.yaml index 11f5d3a13b9e3..025b9b125d9e0 100644 --- a/conf/kubernetes-resource-staging-server.yaml +++ b/conf/kubernetes-resource-staging-server.yaml @@ -32,7 +32,7 @@ spec: name: spark-resource-staging-server-config containers: - name: spark-resource-staging-server - image: kubespark/spark-resource-staging-server:v2.1.0-kubernetes-0.1.0-alpha.3 + image: kubespark/spark-resource-staging-server:v2.1.0-kubernetes-0.2.0 resources: requests: cpu: 100m diff --git a/conf/kubernetes-shuffle-service.yaml b/conf/kubernetes-shuffle-service.yaml index c0cc310cf4755..55c170b01a4f5 100644 --- a/conf/kubernetes-shuffle-service.yaml +++ b/conf/kubernetes-shuffle-service.yaml @@ -38,7 +38,7 @@ spec: # This is an official image that is built # from the dockerfiles/shuffle directory # in the spark distribution. - image: spark-shuffle:latest + image: kubespark/spark-shuffle:v2.1.0-kubernetes-0.2.0 imagePullPolicy: IfNotPresent volumeMounts: - mountPath: '/tmp' diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index a88b0d380fac0..36b45526dfb44 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -36,15 +36,15 @@ If you wish to use pre-built docker images, you may use the images published in ComponentImage Spark Driver Image - kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 + kubespark/spark-driver:v2.1.0-kubernetes-0.2.0 Spark Executor Image - kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 + kubespark/spark-executor:v2.1.0-kubernetes-0.2.0 Spark Initialization Image - kubespark/spark-init:v2.1.0-kubernetes-0.1.0-alpha.2 + kubespark/spark-init:v2.1.0-kubernetes-0.2.0 @@ -76,9 +76,9 @@ are set up as described above: --kubernetes-namespace default \ --conf spark.executor.instances=5 \ --conf spark.app.name=spark-pi \ - --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 \ - --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 \ - --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.2.0 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.2.0 \ + --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.2.0 \ local:///opt/spark/examples/jars/spark_examples_2.11-2.2.0.jar The Spark master, specified either via passing the `--master` command line argument to `spark-submit` or by setting @@ -125,9 +125,9 @@ and then you can compute the value of Pi as follows: --kubernetes-namespace default \ --conf spark.executor.instances=5 \ --conf spark.app.name=spark-pi \ - --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 \ - --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 \ - --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.2.0 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.2.0 \ + --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.2.0 \ --conf spark.kubernetes.resourceStagingServer.uri=http://:31000 \ examples/jars/spark_examples_2.11-2.2.0.jar @@ -168,9 +168,9 @@ If our local proxy were listening on port 8001, we would have our submission loo --kubernetes-namespace default \ --conf spark.executor.instances=5 \ --conf spark.app.name=spark-pi \ - --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 \ - --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 \ - --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.2.0 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.2.0 \ + --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.2.0 \ local:///opt/spark/examples/jars/spark_examples_2.11-2.2.0.jar Communication between Spark and Kubernetes clusters is performed using the fabric8 kubernetes-client library. @@ -284,9 +284,9 @@ communicate with the resource staging server over TLS. The trustStore can be set --kubernetes-namespace default \ --conf spark.executor.instances=5 \ --conf spark.app.name=spark-pi \ - --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 \ - --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 \ - --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.2.0 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.2.0 \ + --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.2.0 \ --conf spark.kubernetes.resourceStagingServer.uri=https://:31000 \ --conf spark.ssl.kubernetes.resourceStagingServer.enabled=true \ --conf spark.ssl.kubernetes.resourceStagingServer.clientCertPem=/home/myuser/cert.pem \ From ca4309febafba48948ea2546801cfeae69fecd9a Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Mon, 5 Jun 2017 22:26:24 -0700 Subject: [PATCH 131/156] nicer readme (#333) --- README.md | 113 +--------------------------------- docs/running-on-kubernetes.md | 2 - 2 files changed, 2 insertions(+), 113 deletions(-) diff --git a/README.md b/README.md index 484fef67dc180..cf6b4fa80242b 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ This is a collaboratively maintained project working on [SPARK-18278](https://is ## Getting Started -- [Usage guide](docs/running-on-kubernetes.md) shows how to run the code +- [Usage guide](https://apache-spark-on-k8s.github.io/userdocs/) shows how to run the code - [Development docs](resource-managers/kubernetes/README.md) shows how to get set up for development - Code is primarily located in the [resource-managers/kubernetes](resource-managers/kubernetes) folder @@ -30,113 +30,4 @@ This is a collaborative effort by several folks from different companies who are - Intel - Palantir - Pepperdata -- Red Hat - --------------------- - -(original README below) - -# Apache Spark - -Spark is a fast and general cluster computing system for Big Data. It provides -high-level APIs in Scala, Java, Python, and R, and an optimized engine that -supports general computation graphs for data analysis. It also supports a -rich set of higher-level tools including Spark SQL for SQL and DataFrames, -MLlib for machine learning, GraphX for graph processing, -and Spark Streaming for stream processing. - - - - -## Online Documentation - -You can find the latest Spark documentation, including a programming -guide, on the [project web page](http://spark.apache.org/documentation.html). -This README file only contains basic setup instructions. - -## Building Spark - -Spark is built using [Apache Maven](http://maven.apache.org/). -To build Spark and its example programs, run: - - build/mvn -DskipTests clean package - -(You do not need to do this if you downloaded a pre-built package.) - -You can build Spark using more than one thread by using the -T option with Maven, see ["Parallel builds in Maven 3"](https://cwiki.apache.org/confluence/display/MAVEN/Parallel+builds+in+Maven+3). -More detailed documentation is available from the project site, at -["Building Spark"](http://spark.apache.org/docs/latest/building-spark.html). - -For general development tips, including info on developing Spark using an IDE, see -[http://spark.apache.org/developer-tools.html](the Useful Developer Tools page). - -## Interactive Scala Shell - -The easiest way to start using Spark is through the Scala shell: - - ./bin/spark-shell - -Try the following command, which should return 1000: - - scala> sc.parallelize(1 to 1000).count() - -## Interactive Python Shell - -Alternatively, if you prefer Python, you can use the Python shell: - - ./bin/pyspark - -And run the following command, which should also return 1000: - - >>> sc.parallelize(range(1000)).count() - -## Example Programs - -Spark also comes with several sample programs in the `examples` directory. -To run one of them, use `./bin/run-example [params]`. For example: - - ./bin/run-example SparkPi - -will run the Pi example locally. - -You can set the MASTER environment variable when running examples to submit -examples to a cluster. This can be a mesos:// or spark:// URL, -"yarn" to run on YARN, and "local" to run -locally with one thread, or "local[N]" to run locally with N threads. You -can also use an abbreviated class name if the class is in the `examples` -package. For instance: - - MASTER=spark://host:7077 ./bin/run-example SparkPi - -Many of the example programs print usage help if no params are given. - -## Running Tests - -Testing first requires [building Spark](#building-spark). Once Spark is built, tests -can be run using: - - ./dev/run-tests - -Please see the guidance on how to -[run tests for a module, or individual tests](http://spark.apache.org/developer-tools.html#individual-tests). - -## A Note About Hadoop Versions - -Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported -storage systems. Because the protocols have changed in different versions of -Hadoop, you must build Spark against the same version that your cluster runs. - -Please refer to the build documentation at -["Specifying the Hadoop Version"](http://spark.apache.org/docs/latest/building-spark.html#specifying-the-hadoop-version) -for detailed guidance on building for a particular distribution of Hadoop, including -building for particular Hive and Hive Thriftserver distributions. - -## Configuration - -Please refer to the [Configuration Guide](http://spark.apache.org/docs/latest/configuration.html) -in the online documentation for an overview on how to configure Spark. - -## Contributing - -Please review the [Contribution to Spark guide](http://spark.apache.org/contributing.html) -for information on how to get started contributing to the project. +- Red Hat \ No newline at end of file diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 36b45526dfb44..dc3cf738832ad 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -149,8 +149,6 @@ environment variable in your Dockerfiles. ### Accessing Kubernetes Clusters -For details about running on public cloud environments, such as Google Container Engine (GKE), refer to [running Spark in the cloud with Kubernetes](running-on-kubernetes-cloud.md). - Spark-submit also supports submission through the [local kubectl proxy](https://kubernetes.io/docs/user-guide/accessing-the-cluster/#using-kubectl-proxy). One can use the authenticating proxy to communicate with the api server directly without passing credentials to spark-submit. From 0dd146c168b121f4c73ef0ed62bf4be0b679233f Mon Sep 17 00:00:00 2001 From: dyhfighter <1294057873@qq.com> Date: Fri, 9 Jun 2017 00:41:15 +0800 Subject: [PATCH 132/156] Support specify CPU cores and Memory restricts for driver (#340) Signed-off-by: duyanghao <1294057873@qq.com> --- .../deploy/kubernetes/submit/Client.scala | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index 1bebaf92501f4..0544bf064844f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -19,7 +19,7 @@ package org.apache.spark.deploy.kubernetes.submit import java.io.File import java.util.Collections -import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, OwnerReferenceBuilder, PodBuilder} +import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, OwnerReferenceBuilder, PodBuilder, QuantityBuilder} import io.fabric8.kubernetes.client.KubernetesClient import scala.collection.JavaConverters._ @@ -61,6 +61,11 @@ private[spark] class Client( .getOrElse(kubernetesAppId) private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) private val dockerImagePullPolicy = sparkConf.get(DOCKER_IMAGE_PULL_POLICY) + + // CPU settings + private val driverCpuCores = sparkConf.getOption("spark.driver.cores").getOrElse("1") + + // Memory settings private val driverMemoryMb = sparkConf.get(org.apache.spark.internal.config.DRIVER_MEMORY) private val memoryOverheadMb = sparkConf .get(KUBERNETES_DRIVER_MEMORY_OVERHEAD) @@ -97,6 +102,15 @@ private[spark] class Client( .withValue(classPath) .build() } + val driverCpuQuantity = new QuantityBuilder(false) + .withAmount(driverCpuCores) + .build() + val driverMemoryQuantity = new QuantityBuilder(false) + .withAmount(s"${driverMemoryMb}M") + .build() + val driverMemoryLimitQuantity = new QuantityBuilder(false) + .withAmount(s"${driverContainerMemoryWithOverhead}M") + .build() val driverContainer = new ContainerBuilder() .withName(DRIVER_CONTAINER_NAME) .withImage(driverDockerImage) @@ -114,6 +128,12 @@ private[spark] class Client( .withName(ENV_DRIVER_ARGS) .withValue(appArgs.mkString(" ")) .endEnv() + .withNewResources() + .addToRequests("cpu", driverCpuQuantity) + .addToLimits("cpu", driverCpuQuantity) + .addToRequests("memory", driverMemoryQuantity) + .addToLimits("memory", driverMemoryLimitQuantity) + .endResources() .build() val basePod = new PodBuilder() .withNewMetadata() From bcf57cf1d33dec415b2edf05a33ee9e2ff15cbe7 Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 8 Jun 2017 12:40:46 -0700 Subject: [PATCH 133/156] Generate the application ID label irrespective of app name. (#331) * Generate the application ID label irrespective of app name. * Add an integration test. * Fix scalastyle --- .../KubernetesExternalShuffleService.scala | 2 +- .../spark/deploy/kubernetes/config.scala | 7 ++ .../spark/deploy/kubernetes/constants.scala | 6 +- .../deploy/kubernetes/submit/Client.scala | 76 ++++++------ ...riverInitContainerComponentsProvider.scala | 111 +++++++++--------- .../SubmittedDependencyUploaderImpl.scala | 1 - .../KubernetesClusterSchedulerBackend.scala | 15 ++- .../kubernetes/submit/ClientV2Suite.scala | 18 +-- .../SubmittedDependencyUploaderSuite.scala | 15 ++- .../integrationtest/KubernetesSuite.scala | 7 ++ 10 files changed, 144 insertions(+), 114 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala index 01a8a9a6899fd..c61f4f1d44acf 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala @@ -91,7 +91,7 @@ private[spark] class KubernetesShuffleBlockHandler ( try { Some(kubernetesClient .pods() - .withLabels(Map(SPARK_ROLE_LABEL -> "driver").asJava) + .withLabels(Map(SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE).asJava) .watch(new Watcher[Pod] { override def eventReceived(action: Watcher.Action, p: Pod): Unit = { action match { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 47c3c24fa88f7..d1fd88fc880d1 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -151,6 +151,13 @@ package object config extends Logging { .stringConf .createOptional + private[spark] val KUBERNETES_EXECUTOR_POD_NAME_PREFIX = + ConfigBuilder("spark.kubernetes.executor.podNamePrefix") + .doc("Prefix to use in front of the executor pod names.") + .internal() + .stringConf + .createWithDefault("spark") + private[spark] val KUBERNETES_SHUFFLE_NAMESPACE = ConfigBuilder("spark.kubernetes.shuffle.namespace") .doc("Namespace of the shuffle service") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index e267c9ff7e1d1..9c46d7494b187 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -19,10 +19,12 @@ package org.apache.spark.deploy.kubernetes package object constants { // Labels private[spark] val SPARK_DRIVER_LABEL = "spark-driver" - private[spark] val SPARK_APP_ID_LABEL = "spark-app-id" - private[spark] val SPARK_APP_NAME_LABEL = "spark-app-name" + private[spark] val SPARK_APP_ID_LABEL = "spark-app-selector" private[spark] val SPARK_EXECUTOR_ID_LABEL = "spark-exec-id" private[spark] val SPARK_ROLE_LABEL = "spark-role" + private[spark] val SPARK_POD_DRIVER_ROLE = "driver" + private[spark] val SPARK_POD_EXECUTOR_ROLE = "executor" + private[spark] val SPARK_APP_NAME_ANNOTATION = "spark-app-name" // Credentials secrets private[spark] val DRIVER_CREDENTIALS_SECRETS_BASE_DIR = diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index 0544bf064844f..c2e616eadc1e0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -17,13 +17,13 @@ package org.apache.spark.deploy.kubernetes.submit import java.io.File -import java.util.Collections +import java.util.{Collections, UUID} import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, OwnerReferenceBuilder, PodBuilder, QuantityBuilder} import io.fabric8.kubernetes.client.KubernetesClient import scala.collection.JavaConverters._ -import org.apache.spark.{SparkConf, SparkException} +import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.{ConfigurationUtils, SparkKubernetesClientFactory} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ @@ -43,22 +43,21 @@ import org.apache.spark.util.Utils * where different steps of submission should be factored out into separate classes. */ private[spark] class Client( - appName: String, - kubernetesAppId: String, - mainClass: String, - sparkConf: SparkConf, - appArgs: Array[String], - sparkJars: Seq[String], - sparkFiles: Seq[String], - waitForAppCompletion: Boolean, - kubernetesClient: KubernetesClient, - initContainerComponentsProvider: DriverInitContainerComponentsProvider, - kubernetesCredentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider, - loggingPodStatusWatcher: LoggingPodStatusWatcher) - extends Logging { - + appName: String, + kubernetesResourceNamePrefix: String, + kubernetesAppId: String, + mainClass: String, + sparkConf: SparkConf, + appArgs: Array[String], + sparkJars: Seq[String], + sparkFiles: Seq[String], + waitForAppCompletion: Boolean, + kubernetesClient: KubernetesClient, + initContainerComponentsProvider: DriverInitContainerComponentsProvider, + kubernetesCredentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider, + loggingPodStatusWatcher: LoggingPodStatusWatcher) extends Logging { private val kubernetesDriverPodName = sparkConf.get(KUBERNETES_DRIVER_POD_NAME) - .getOrElse(kubernetesAppId) + .getOrElse(s"$kubernetesResourceNamePrefix-driver") private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) private val dockerImagePullPolicy = sparkConf.get(DOCKER_IMAGE_PULL_POLICY) @@ -86,15 +85,16 @@ private[spark] class Client( val parsedCustomLabels = ConfigurationUtils.parseKeyValuePairs( customLabels, KUBERNETES_DRIVER_LABELS.key, "labels") require(!parsedCustomLabels.contains(SPARK_APP_ID_LABEL), s"Label with key " + - s" $SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping operations.") - require(!parsedCustomLabels.contains(SPARK_APP_NAME_LABEL), s"Label with key" + - s" $SPARK_APP_NAME_LABEL is not allowed as it is reserved for Spark bookkeeping operations.") + s" $SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping" + + s" operations.") + val parsedCustomAnnotations = ConfigurationUtils.parseKeyValuePairs( + customAnnotations, KUBERNETES_DRIVER_ANNOTATIONS.key, "annotations") + require(!parsedCustomAnnotations.contains(SPARK_APP_NAME_ANNOTATION), s"Annotation with key" + + s" $SPARK_APP_NAME_ANNOTATION is not allowed as it is reserved for Spark bookkeeping" + + s" operations.") val allLabels = parsedCustomLabels ++ Map( SPARK_APP_ID_LABEL -> kubernetesAppId, - SPARK_APP_NAME_LABEL -> appName, - SPARK_ROLE_LABEL -> "driver") - val parsedCustomAnnotations = ConfigurationUtils.parseKeyValuePairs( - customAnnotations, KUBERNETES_DRIVER_ANNOTATIONS.key, "annotations") + SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE) val driverExtraClasspathEnv = driverExtraClasspath.map { classPath => new EnvVarBuilder() @@ -140,6 +140,7 @@ private[spark] class Client( .withName(kubernetesDriverPodName) .addToLabels(allLabels.asJava) .addToAnnotations(parsedCustomAnnotations.asJava) + .addToAnnotations(SPARK_APP_NAME_ANNOTATION, appName) .endMetadata() .withNewSpec() .withRestartPolicy("Never") @@ -186,6 +187,7 @@ private[spark] class Client( } resolvedSparkConf.setIfMissing(KUBERNETES_DRIVER_POD_NAME, kubernetesDriverPodName) resolvedSparkConf.set("spark.app.id", kubernetesAppId) + resolvedSparkConf.set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX, kubernetesResourceNamePrefix) // We don't need this anymore since we just set the JVM options on the environment resolvedSparkConf.remove(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) val resolvedLocalClasspath = containerLocalizedFilesResolver @@ -234,11 +236,11 @@ private[spark] class Client( throw e } if (waitForAppCompletion) { - logInfo(s"Waiting for application $kubernetesAppId to finish...") + logInfo(s"Waiting for application $appName to finish...") loggingPodStatusWatcher.awaitCompletion() - logInfo(s"Application $kubernetesAppId finished.") + logInfo(s"Application $appName finished.") } else { - logInfo(s"Deployed Spark application $kubernetesAppId into Kubernetes.") + logInfo(s"Deployed Spark application $appName into Kubernetes.") } } } @@ -279,15 +281,21 @@ private[spark] object Client { val sparkFiles = sparkConf.getOption("spark.files") .map(_.split(",")) .getOrElse(Array.empty[String]) - val appName = sparkConf.getOption("spark.app.name") - .getOrElse("spark") - val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") + val appName = sparkConf.getOption("spark.app.name").getOrElse("spark") + // The resource name prefix is derived from the application name, making it easy to connect the + // names of the Kubernetes resources from e.g. Kubectl or the Kubernetes dashboard to the + // application the user submitted. However, we can't use the application name in the label, as + // label values are considerably restrictive, e.g. must be no longer than 63 characters in + // length. So we generate a separate identifier for the app ID itself, and bookkeeping that + // requires finding "all pods for this application" should use the kubernetesAppId. + val kubernetesResourceNamePrefix = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") + val kubernetesAppId = s"spark-${UUID.randomUUID().toString.replaceAll("-", "")}" val namespace = sparkConf.get(KUBERNETES_NAMESPACE) val master = resolveK8sMaster(sparkConf.get("spark.master")) val sslOptionsProvider = new ResourceStagingServerSslOptionsProviderImpl(sparkConf) val initContainerComponentsProvider = new DriverInitContainerComponentsProviderImpl( sparkConf, - kubernetesAppId, + kubernetesResourceNamePrefix, namespace, sparkJars, sparkFiles, @@ -300,14 +308,16 @@ private[spark] object Client { None, None)) { kubernetesClient => val kubernetesCredentialsMounterProvider = - new DriverPodKubernetesCredentialsMounterProviderImpl(sparkConf, kubernetesAppId) + new DriverPodKubernetesCredentialsMounterProviderImpl( + sparkConf, kubernetesResourceNamePrefix) val waitForAppCompletion = sparkConf.get(WAIT_FOR_APP_COMPLETION) val loggingInterval = Option(sparkConf.get(REPORT_INTERVAL)) .filter( _ => waitForAppCompletion) val loggingPodStatusWatcher = new LoggingPodStatusWatcherImpl( - kubernetesAppId, loggingInterval) + kubernetesResourceNamePrefix, loggingInterval) new Client( appName, + kubernetesResourceNamePrefix, kubernetesAppId, mainClass, sparkConf, diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala index be9da2582cb47..cfc61e193dcff 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala @@ -16,8 +16,6 @@ */ package org.apache.spark.deploy.kubernetes.submit -import java.io.File - import org.apache.spark.{SparkConf, SSLOptions} import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, OptionRequirements, SparkPodInitContainerBootstrap, SparkPodInitContainerBootstrapImpl} import org.apache.spark.deploy.kubernetes.config._ @@ -46,12 +44,12 @@ private[spark] trait DriverInitContainerComponentsProvider { } private[spark] class DriverInitContainerComponentsProviderImpl( - sparkConf: SparkConf, - kubernetesAppId: String, - namespace: String, - sparkJars: Seq[String], - sparkFiles: Seq[String], - resourceStagingServerExternalSslOptions: SSLOptions) + sparkConf: SparkConf, + kubernetesResourceNamePrefix: String, + namespace: String, + sparkJars: Seq[String], + sparkFiles: Seq[String], + resourceStagingServerExternalSslOptions: SSLOptions) extends DriverInitContainerComponentsProvider { private val maybeResourceStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) @@ -99,10 +97,10 @@ private[spark] class DriverInitContainerComponentsProviderImpl( private val jarsDownloadPath = sparkConf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION) private val filesDownloadPath = sparkConf.get(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION) private val maybeSecretName = maybeResourceStagingServerUri.map { _ => - s"$kubernetesAppId-init-secret" + s"$kubernetesResourceNamePrefix-init-secret" } - private val configMapName = s"$kubernetesAppId-init-config" - private val configMapKey = s"$kubernetesAppId-init-config-key" + private val configMapName = s"$kubernetesResourceNamePrefix-init-config" + private val configMapKey = s"$kubernetesResourceNamePrefix-init-config-key" private val initContainerImage = sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE) private val dockerImagePullPolicy = sparkConf.get(DOCKER_IMAGE_PULL_POLICY) private val downloadTimeoutMinutes = sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT) @@ -116,29 +114,29 @@ private[spark] class DriverInitContainerComponentsProviderImpl( filesResourceId <- maybeSubmittedResourceIds.map(_.filesResourceId) } yield { new SubmittedDependencyInitContainerConfigPluginImpl( - // Configure the init-container with the internal URI over the external URI. - maybeResourceStagingServerInternalUri.getOrElse(stagingServerUri), - jarsResourceId, - filesResourceId, - INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY, - INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY, - INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, - INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY, - resourceStagingServerInternalSslEnabled, - maybeResourceStagingServerInternalTrustStore, - maybeResourceStagingServerInternalClientCert, - maybeResourceStagingServerInternalTrustStorePassword, - maybeResourceStagingServerInternalTrustStoreType, - INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH) + // Configure the init-container with the internal URI over the external URI. + maybeResourceStagingServerInternalUri.getOrElse(stagingServerUri), + jarsResourceId, + filesResourceId, + INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY, + INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY, + INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, + INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY, + resourceStagingServerInternalSslEnabled, + maybeResourceStagingServerInternalTrustStore, + maybeResourceStagingServerInternalClientCert, + maybeResourceStagingServerInternalTrustStorePassword, + maybeResourceStagingServerInternalTrustStoreType, + INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH) } new SparkInitContainerConfigMapBuilderImpl( - sparkJars, - sparkFiles, - jarsDownloadPath, - filesDownloadPath, - configMapName, - configMapKey, - submittedDependencyConfigPlugin) + sparkJars, + sparkFiles, + jarsDownloadPath, + filesDownloadPath, + configMapName, + configMapKey, + submittedDependencyConfigPlugin) } override def provideContainerLocalizedFilesResolver(): ContainerLocalizedFilesResolver = { @@ -158,14 +156,13 @@ private[spark] class DriverInitContainerComponentsProviderImpl( driverPodLabels: Map[String, String]): Option[SubmittedDependencyUploader] = { maybeResourceStagingServerUri.map { stagingServerUri => new SubmittedDependencyUploaderImpl( - kubernetesAppId, - driverPodLabels, - namespace, - stagingServerUri, - sparkJars, - sparkFiles, - resourceStagingServerExternalSslOptions, - RetrofitClientFactoryImpl) + driverPodLabels, + namespace, + stagingServerUri, + sparkJars, + sparkFiles, + resourceStagingServerExternalSslOptions, + RetrofitClientFactoryImpl) } } @@ -178,15 +175,15 @@ private[spark] class DriverInitContainerComponentsProviderImpl( filesResourceSecret <- maybeSubmittedResourceSecrets.map(_.filesResourceSecret) } yield { new SubmittedDependencySecretBuilderImpl( - secretName, - jarsResourceSecret, - filesResourceSecret, - INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY, - INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY, - INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, - INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY, - maybeResourceStagingServerInternalTrustStore, - maybeResourceStagingServerInternalClientCert) + secretName, + jarsResourceSecret, + filesResourceSecret, + INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY, + INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY, + INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, + INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY, + maybeResourceStagingServerInternalTrustStore, + maybeResourceStagingServerInternalClientCert) } } @@ -196,13 +193,13 @@ private[spark] class DriverInitContainerComponentsProviderImpl( secret, INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH) } new SparkPodInitContainerBootstrapImpl( - initContainerImage, - dockerImagePullPolicy, - jarsDownloadPath, - filesDownloadPath, - downloadTimeoutMinutes, - configMapName, - configMapKey, - resourceStagingServerSecretPlugin) + initContainerImage, + dockerImagePullPolicy, + jarsDownloadPath, + filesDownloadPath, + downloadTimeoutMinutes, + configMapName, + configMapKey, + resourceStagingServerSecretPlugin) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala index a891cf3904d2d..83d7a28f5ca10 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala @@ -50,7 +50,6 @@ private[spark] trait SubmittedDependencyUploader { * Resource Staging Service. */ private[spark] class SubmittedDependencyUploaderImpl( - kubernetesAppId: String, podLabels: Map[String, String], podNamespace: String, stagingServerUri: String, diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index c3a6fe28a6255..6ab6480d848a2 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -65,7 +65,8 @@ private[spark] class KubernetesClusterSchedulerBackend( "executor labels") require( !executorLabels.contains(SPARK_APP_ID_LABEL), - s"Custom executor labels cannot contain $SPARK_APP_ID_LABEL as it is reserved for Spark.") + s"Custom executor labels cannot contain $SPARK_APP_ID_LABEL as it is" + + s" reserved for Spark.") require( !executorLabels.contains(SPARK_EXECUTOR_ID_LABEL), s"Custom executor labels cannot contain $SPARK_EXECUTOR_ID_LABEL as it is reserved for" + @@ -87,6 +88,7 @@ private[spark] class KubernetesClusterSchedulerBackend( .get(KUBERNETES_DRIVER_POD_NAME) .getOrElse( throw new SparkException("Must specify the driver pod name")) + private val executorPodNamePrefix = conf.get(KUBERNETES_EXECUTOR_POD_NAME_PREFIX) private val executorMemoryMb = conf.get(org.apache.spark.internal.config.EXECUTOR_MEMORY) private val executorMemoryString = conf.get( @@ -225,8 +227,11 @@ private[spark] class KubernetesClusterSchedulerBackend( override def start(): Unit = { super.start() - executorWatchResource.set(kubernetesClient.pods().withLabel(SPARK_APP_ID_LABEL, applicationId()) - .watch(new ExecutorPodsWatcher())) + executorWatchResource.set( + kubernetesClient + .pods() + .withLabel(SPARK_APP_ID_LABEL, applicationId()) + .watch(new ExecutorPodsWatcher())) allocator.scheduleWithFixedDelay( allocatorRunnable, 0, podAllocationInterval, TimeUnit.SECONDS) @@ -280,7 +285,7 @@ private[spark] class KubernetesClusterSchedulerBackend( private def allocateNewExecutorPod(): (String, Pod) = { val executorId = EXECUTOR_ID_COUNTER.incrementAndGet().toString - val name = s"${applicationId()}-exec-$executorId" + val name = s"$executorPodNamePrefix-exec-$executorId" // hostname must be no longer than 63 characters, so take the last 63 characters of the pod // name as the hostname. This preserves uniqueness since the end of name contains @@ -289,7 +294,7 @@ private[spark] class KubernetesClusterSchedulerBackend( val resolvedExecutorLabels = Map( SPARK_EXECUTOR_ID_LABEL -> executorId, SPARK_APP_ID_LABEL -> applicationId(), - SPARK_ROLE_LABEL -> "executor") ++ + SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE) ++ executorLabels val executorMemoryQuantity = new QuantityBuilder(false) .withAmount(s"${executorMemoryMb}M") diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala index 00f09c64b53b7..193f36a7423b2 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala @@ -45,14 +45,14 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val BOOTSTRAPPED_POD_ANNOTATION = "bootstrapped" private val TRUE = "true" private val APP_NAME = "spark-test" - private val APP_ID = "spark-app-id" + private val APP_RESOURCE_PREFIX = "spark-prefix" + private val APP_ID = "spark-id" private val CUSTOM_LABEL_KEY = "customLabel" private val CUSTOM_LABEL_VALUE = "customLabelValue" private val ALL_EXPECTED_LABELS = Map( CUSTOM_LABEL_KEY -> CUSTOM_LABEL_VALUE, SPARK_APP_ID_LABEL -> APP_ID, - SPARK_APP_NAME_LABEL -> APP_NAME, - SPARK_ROLE_LABEL -> "driver") + SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE) private val CUSTOM_ANNOTATION_KEY = "customAnnotation" private val CUSTOM_ANNOTATION_VALUE = "customAnnotationValue" private val INIT_CONTAINER_SECRET_NAME = "init-container-secret" @@ -183,7 +183,7 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { .build() } }) - when(podOps.withName(APP_ID)).thenReturn(namedPodResource) + when(podOps.withName(s"$APP_RESOURCE_PREFIX-driver")).thenReturn(namedPodResource) when(namedPodResource.watch(loggingPodStatusWatcher)).thenReturn(watch) when(containerLocalizedFilesResolver.resolveSubmittedAndRemoteSparkJars()) .thenReturn(RESOLVED_SPARK_REMOTE_AND_LOCAL_JARS) @@ -291,6 +291,7 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { expectationsForNoDependencyUploader() new Client( APP_NAME, + APP_RESOURCE_PREFIX, APP_ID, MAIN_CLASS, SPARK_CONF, @@ -334,7 +335,7 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { owners.head.getController && owners.head.getKind == DRIVER_POD_KIND && owners.head.getUid == DRIVER_POD_UID && - owners.head.getName == APP_ID && + owners.head.getName == s"$APP_RESOURCE_PREFIX-driver" && owners.head.getApiVersion == DRIVER_POD_API_VERSION }) } @@ -354,14 +355,15 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { .toMap ++ Map( "spark.app.id" -> APP_ID, - KUBERNETES_DRIVER_POD_NAME.key -> APP_ID, + KUBERNETES_DRIVER_POD_NAME.key -> s"$APP_RESOURCE_PREFIX-driver", + KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key -> APP_RESOURCE_PREFIX, EXECUTOR_INIT_CONF_KEY -> TRUE, CUSTOM_JAVA_OPTION_KEY -> CUSTOM_JAVA_OPTION_VALUE, "spark.jars" -> RESOLVED_SPARK_JARS.mkString(","), "spark.files" -> RESOLVED_SPARK_FILES.mkString(",")) runAndVerifyPodMatchesPredicate { p => Option(p) - .filter(_.getMetadata.getName == APP_ID) + .filter(_.getMetadata.getName == s"$APP_RESOURCE_PREFIX-driver") .filter(podHasCorrectAnnotations) .filter(_.getMetadata.getLabels.asScala == ALL_EXPECTED_LABELS) .filter(containerHasCorrectBasicContainerConfiguration) @@ -374,6 +376,7 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private def runAndVerifyPodMatchesPredicate(pred: (Pod => Boolean)): Unit = { new Client( APP_NAME, + APP_RESOURCE_PREFIX, APP_ID, MAIN_CLASS, SPARK_CONF, @@ -442,6 +445,7 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private def podHasCorrectAnnotations(pod: Pod): Boolean = { val expectedAnnotations = Map( CUSTOM_ANNOTATION_KEY -> CUSTOM_ANNOTATION_VALUE, + SPARK_APP_NAME_ANNOTATION -> APP_NAME, BOOTSTRAPPED_POD_ANNOTATION -> TRUE) pod.getMetadata.getAnnotations.asScala == expectedAnnotations } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala index c207e3c69cd3c..96fa92c254297 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala @@ -85,14 +85,13 @@ private[spark] class SubmittedDependencyUploaderSuite extends SparkFunSuite with resourcesDataCaptor.capture(), resourcesOwnerCaptor.capture())) .thenReturn(responseCall) dependencyUploaderUnderTest = new SubmittedDependencyUploaderImpl( - APP_ID, - LABELS, - NAMESPACE, - STAGING_SERVER_URI, - JARS, - FILES, - STAGING_SERVER_SSL_OPTIONS, - retrofitClientFactory) + LABELS, + NAMESPACE, + STAGING_SERVER_URI, + JARS, + FILES, + STAGING_SERVER_SSL_OPTIONS, + retrofitClientFactory) } test("Uploading jars should contact the staging server with the appropriate parameters") { diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 6a296d6112c97..e377f285eb9a6 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -195,6 +195,13 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { Array(testExistenceFile.getName, TEST_EXISTENCE_FILE_CONTENTS)) } + test("Use a very long application name.") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + + sparkConf.setJars(Seq(CONTAINER_LOCAL_HELPER_JAR_PATH)).setAppName("long" * 40) + runSparkPiAndVerifyCompletion(CONTAINER_LOCAL_MAIN_APP_RESOURCE) + } + private def launchStagingServer( resourceStagingServerSslOptions: SSLOptions, keyAndCertPem: Option[KeyAndCertPem]): Unit = { assume(testBackend.name == MINIKUBE_TEST_BACKEND) From 78baf9bd8ed5b515712533289aaffbfddd8e856b Mon Sep 17 00:00:00 2001 From: Johannes Scheuermann Date: Fri, 9 Jun 2017 00:33:40 +0200 Subject: [PATCH 134/156] Create base-image and minimize layer count (#324) * Create base-image and minimize layer count * Create running-on-kubernetes.md --- docs/running-on-kubernetes.md | 6 +++- .../src/main/docker/driver/Dockerfile | 17 ++------- .../src/main/docker/executor/Dockerfile | 17 ++------- .../src/main/docker/init-container/Dockerfile | 16 +-------- .../docker/resource-staging-server/Dockerfile | 16 +-------- .../main/docker/shuffle-service/Dockerfile | 17 ++------- .../src/main/docker/spark-base/Dockerfile | 35 +++++++++++++++++++ .../docker/SparkDockerImageBuilder.scala | 2 ++ 8 files changed, 50 insertions(+), 76 deletions(-) create mode 100644 resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/Dockerfile diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index dc3cf738832ad..c10630fc5c5c6 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -49,7 +49,7 @@ If you wish to use pre-built docker images, you may use the images published in You may also build these docker images from sources, or customize them as required. Spark distributions include the -Docker files for the driver, executor, and init-container at `dockerfiles/driver/Dockerfile`, +Docker files for the base-image, driver, executor, and init-container at `dockerfiles/spark-base/Dockerfile`, `dockerfiles/driver/Dockerfile`, `dockerfiles/executor/Dockerfile`, and `dockerfiles/init-container/Dockerfile` respectively. Use these Docker files to build the Docker images, and then tag them with the registry that the images should be sent to. Finally, push the images to the registry. @@ -57,12 +57,16 @@ to the registry. For example, if the registry host is `registry-host` and the registry is listening on port 5000: cd $SPARK_HOME + docker build -t registry-host:5000/spark-base:latest -f dockerfiles/driver/spark-base . docker build -t registry-host:5000/spark-driver:latest -f dockerfiles/driver/Dockerfile . docker build -t registry-host:5000/spark-executor:latest -f dockerfiles/executor/Dockerfile . docker build -t registry-host:5000/spark-init:latest -f dockerfiles/init-container/Dockerfile . + docker push registry-host:5000/spark-base:latest docker push registry-host:5000/spark-driver:latest docker push registry-host:5000/spark-executor:latest docker push registry-host:5000/spark-init:latest + +Note that `spark-base` is the base image for the other images. It must be built first before the other images, and then afterwards the other images can be built in any order. ## Submitting Applications to Kubernetes diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index fa651ff43aaa0..6bbff8ef64a0f 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -15,26 +15,13 @@ # limitations under the License. # -FROM openjdk:8-alpine +FROM spark-base # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: # docker build -t spark-driver:latest -f dockerfiles/driver/Dockerfile . -RUN apk upgrade --update -RUN apk add --update bash tini -RUN mkdir -p /opt/spark -RUN touch /opt/spark/RELEASE - -ADD jars /opt/spark/jars -ADD examples /opt/spark/examples -ADD bin /opt/spark/bin -ADD sbin /opt/spark/sbin -ADD conf /opt/spark/conf - -ENV SPARK_HOME /opt/spark - -WORKDIR /opt/spark +COPY examples /opt/spark/examples CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ if ! [ -z ${SPARK_MOUNTED_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile index fbad43b6255b9..9c9efb23d7e95 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -15,26 +15,13 @@ # limitations under the License. # -FROM openjdk:8-alpine +FROM spark-base # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: # docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . -RUN apk upgrade --update -RUN apk add --update bash tini -RUN mkdir -p /opt/spark -RUN touch /opt/spark/RELEASE - -ADD jars /opt/spark/jars -ADD examples /opt/spark/examples -ADD bin /opt/spark/bin -ADD sbin /opt/spark/sbin -ADD conf /opt/spark/conf - -ENV SPARK_HOME /opt/spark - -WORKDIR /opt/spark +COPY examples /opt/spark/examples # TODO support spark.executor.extraClassPath CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile index 40557a7465a8a..6bff06da12840 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile @@ -15,24 +15,10 @@ # limitations under the License. # -FROM openjdk:8-alpine +FROM spark-base # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: # docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . -RUN apk upgrade --update -RUN apk add --update bash tini -RUN mkdir -p /opt/spark -RUN touch /opt/spark/RELEASE - -ADD jars /opt/spark/jars -ADD bin /opt/spark/bin -ADD sbin /opt/spark/sbin -ADD conf /opt/spark/conf - -ENV SPARK_HOME /opt/spark - -WORKDIR /opt/spark - ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.KubernetesSparkDependencyDownloadInitContainer" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile index c8b13c44207bc..c9a92fa1c5b62 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile @@ -15,24 +15,10 @@ # limitations under the License. # -FROM openjdk:8-alpine +FROM spark-base # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: # docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . -RUN apk upgrade --update -RUN apk add --update bash tini -RUN mkdir -p /opt/spark -RUN touch /opt/spark/RELEASE - -ADD jars /opt/spark/jars -ADD bin /opt/spark/bin -ADD sbin /opt/spark/sbin -ADD conf /opt/spark/conf - -ENV SPARK_HOME /opt/spark - -WORKDIR /opt/spark - ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.ResourceStagingServer" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile index 1f64376b89aae..7f4e2aa51b67d 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile @@ -15,25 +15,12 @@ # limitations under the License. # -FROM openjdk:8-alpine +FROM spark-base # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: # docker build -t spark-shuffle:latest -f dockerfiles/shuffle/Dockerfile . -RUN apk upgrade --update -RUN apk add --update bash tini -RUN mkdir -p /opt/spark -RUN touch /opt/spark/RELEASE - -ADD jars /opt/spark/jars -ADD examples /opt/spark/examples -ADD bin /opt/spark/bin -ADD sbin /opt/spark/sbin -ADD conf /opt/spark/conf - -ENV SPARK_HOME /opt/spark - -WORKDIR /opt/spark +COPY examples /opt/spark/examples ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.kubernetes.KubernetesExternalShuffleService", "1" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/Dockerfile new file mode 100644 index 0000000000000..b0925e3bb0416 --- /dev/null +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/Dockerfile @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM openjdk:8-alpine + +# If this docker file is being used in the context of building your images from a Spark distribution, the docker build +# command should be invoked from the top level directory of the Spark distribution. E.g.: +# docker build -t spark-base:latest -f dockerfiles/spark-base/Dockerfile . + +RUN apk upgrade --no-cache && \ + apk add --no-cache bash tini && \ + mkdir -p /opt/spark && \ + touch /opt/spark/RELEASE + +COPY jars /opt/spark/jars +COPY bin /opt/spark/bin +COPY sbin /opt/spark/sbin +COPY conf /opt/spark/conf + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala index 3ff72829f88a7..4db19478f44bc 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala @@ -28,6 +28,7 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, private val DOCKER_BUILD_PATH = Paths.get("target", "docker") // Dockerfile paths must be relative to the build path. + private val BASE_DOCKER_FILE = "dockerfiles/spark-base/Dockerfile" private val DRIVER_DOCKER_FILE = "dockerfiles/driver/Dockerfile" private val EXECUTOR_DOCKER_FILE = "dockerfiles/executor/Dockerfile" private val SHUFFLE_SERVICE_DOCKER_FILE = "dockerfiles/shuffle-service/Dockerfile" @@ -60,6 +61,7 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, def buildSparkDockerImages(): Unit = { Eventually.eventually(TIMEOUT, INTERVAL) { dockerClient.ping() } + buildImage("spark-base", BASE_DOCKER_FILE) buildImage("spark-driver", DRIVER_DOCKER_FILE) buildImage("spark-executor", EXECUTOR_DOCKER_FILE) buildImage("spark-shuffle", SHUFFLE_SERVICE_DOCKER_FILE) From 2f80b1d3e7754afe588fd16243907c68fbf614d0 Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Thu, 8 Jun 2017 19:07:14 -0500 Subject: [PATCH 135/156] Added log4j config for k8s unit tests. (#314) --- .../core/src/test/resources/log4j.properties | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 resource-managers/kubernetes/core/src/test/resources/log4j.properties diff --git a/resource-managers/kubernetes/core/src/test/resources/log4j.properties b/resource-managers/kubernetes/core/src/test/resources/log4j.properties new file mode 100644 index 0000000000000..ad95fadb7c0c0 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/resources/log4j.properties @@ -0,0 +1,31 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Set everything to be logged to the file target/unit-tests.log +log4j.rootCategory=INFO, file +log4j.appender.file=org.apache.log4j.FileAppender +log4j.appender.file.append=true +log4j.appender.file.file=target/unit-tests.log +log4j.appender.file.layout=org.apache.log4j.PatternLayout +log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n + +# Ignore messages below warning level from a few verbose libraries. +log4j.logger.com.sun.jersey=WARN +log4j.logger.org.apache.hadoop=WARN +log4j.logger.org.eclipse.jetty=WARN +log4j.logger.org.mortbay=WARN +log4j.logger.org.spark_project.jetty=WARN From d4ec1360547693e71c9f1fb42b5691b7d7bfc362 Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Tue, 13 Jun 2017 18:08:45 -0700 Subject: [PATCH 136/156] Use node affinity to launch executors on preferred nodes benefitting from data locality (#316) * Use node affinity to launch executors on data local nodes * Fix comment style * Use JSON object mapper * Address review comments * Fix a style issue * Clean up and add a TODO * Fix style issue * Address review comments --- .../spark/deploy/kubernetes/constants.scala | 1 + .../KubernetesClusterSchedulerBackend.scala | 103 ++++++++++++++++-- 2 files changed, 96 insertions(+), 8 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 9c46d7494b187..f2f1136e54fe4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -90,6 +90,7 @@ package object constants { private[spark] val INIT_CONTAINER_SECRET_VOLUME_NAME = "spark-init-secret" // Miscellaneous + private[spark] val ANNOTATION_EXECUTOR_NODE_AFFINITY = "scheduler.alpha.kubernetes.io/affinity" private[spark] val DRIVER_CONTAINER_NAME = "spark-kubernetes-driver" private[spark] val KUBERNETES_MASTER_INTERNAL_URL = "https://kubernetes.default.svc" private[spark] val MEMORY_OVERHEAD_FACTOR = 0.10 diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 6ab6480d848a2..85ce5f01200b2 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -17,9 +17,12 @@ package org.apache.spark.scheduler.cluster.kubernetes import java.io.Closeable +import java.net.InetAddress import java.util.concurrent.TimeUnit import java.util.concurrent.atomic.{AtomicInteger, AtomicLong, AtomicReference} +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, EnvVarSourceBuilder, Pod, PodBuilder, QuantityBuilder} import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action @@ -177,16 +180,18 @@ private[spark] class KubernetesClusterSchedulerBackend( .newDaemonSingleThreadScheduledExecutor("kubernetes-pod-allocator") private val allocatorRunnable: Runnable = new Runnable { + override def run(): Unit = { if (totalRegisteredExecutors.get() < runningExecutorPods.size) { logDebug("Waiting for pending executors before scaling") } else if (totalExpectedExecutors.get() <= runningExecutorPods.size) { logDebug("Maximum allowed executor limit reached. Not scaling up further.") } else { + val nodeToLocalTaskCount = getNodesWithLocalTaskCounts RUNNING_EXECUTOR_PODS_LOCK.synchronized { for (i <- 0 until math.min( totalExpectedExecutors.get - runningExecutorPods.size, podAllocationSize)) { - runningExecutorPods += allocateNewExecutorPod() + runningExecutorPods += allocateNewExecutorPod(nodeToLocalTaskCount) logInfo( s"Requesting a new executor, total executors is now ${runningExecutorPods.size}") } @@ -195,6 +200,8 @@ private[spark] class KubernetesClusterSchedulerBackend( } } + private val objectMapper = new ObjectMapper().registerModule(DefaultScalaModule) + private def getShuffleClient(): KubernetesExternalShuffleClient = { new KubernetesExternalShuffleClient( SparkTransportConf.fromSparkConf(conf, "shuffle"), @@ -283,7 +290,70 @@ private[spark] class KubernetesClusterSchedulerBackend( } } - private def allocateNewExecutorPod(): (String, Pod) = { + /** + * @return A map of K8s cluster nodes to the number of tasks that could benefit from data + * locality if an executor launches on the cluster node. + */ + private def getNodesWithLocalTaskCounts() : Map[String, Int] = { + val executorPodsWithIPs = EXECUTOR_PODS_BY_IPS_LOCK.synchronized { + executorPodsByIPs.values.toList // toList makes a defensive copy. + } + val nodeToLocalTaskCount = mutable.Map[String, Int]() ++ + KubernetesClusterSchedulerBackend.this.synchronized { + hostToLocalTaskCount + } + for (pod <- executorPodsWithIPs) { + // Remove cluster nodes that are running our executors already. + // TODO: This prefers spreading out executors across nodes. In case users want + // consolidating executors on fewer nodes, introduce a flag. See the spark.deploy.spreadOut + // flag that Spark standalone has: https://spark.apache.org/docs/latest/spark-standalone.html + nodeToLocalTaskCount.remove(pod.getSpec.getNodeName).nonEmpty || + nodeToLocalTaskCount.remove(pod.getStatus.getHostIP).nonEmpty || + nodeToLocalTaskCount.remove( + InetAddress.getByName(pod.getStatus.getHostIP).getCanonicalHostName).nonEmpty + } + nodeToLocalTaskCount.toMap[String, Int] + } + + private def addNodeAffinityAnnotationIfUseful(basePodBuilder: PodBuilder, + nodeToTaskCount: Map[String, Int]): PodBuilder = { + def scaleToRange(value: Int, baseMin: Double, baseMax: Double, + rangeMin: Double, rangeMax: Double): Int = + (((rangeMax - rangeMin) * (value - baseMin) / (baseMax - baseMin)) + rangeMin).toInt + + if (nodeToTaskCount.nonEmpty) { + val taskTotal = nodeToTaskCount.foldLeft(0)(_ + _._2) + // Normalize to node affinity weights in 1 to 100 range. + val nodeToWeight = nodeToTaskCount.map{ + case (node, taskCount) => + (node, scaleToRange(taskCount, 1, taskTotal, rangeMin = 1, rangeMax = 100))} + val weightToNodes = nodeToWeight.groupBy(_._2).mapValues(_.keys) + // @see https://kubernetes.io/docs/concepts/configuration/assign-pod-node + val nodeAffinityJson = objectMapper.writeValueAsString(SchedulerAffinity(NodeAffinity( + preferredDuringSchedulingIgnoredDuringExecution = + for ((weight, nodes) <- weightToNodes) yield + WeightedPreference(weight, + Preference(Array(MatchExpression("kubernetes.io/hostname", "In", nodes)))) + ))) + // TODO: Use non-annotation syntax when we switch to K8s version 1.6. + logDebug(s"Adding nodeAffinity as annotation $nodeAffinityJson") + basePodBuilder.editMetadata() + .addToAnnotations(ANNOTATION_EXECUTOR_NODE_AFFINITY, nodeAffinityJson) + .endMetadata() + } else { + basePodBuilder + } + } + + /** + * Allocates a new executor pod + * + * @param nodeToLocalTaskCount A map of K8s cluster nodes to the number of tasks that could + * benefit from data locality if an executor launches on the cluster + * node. + * @return A tuple of the new executor name and the Pod data structure. + */ + private def allocateNewExecutorPod(nodeToLocalTaskCount: Map[String, Int]): (String, Pod) = { val executorId = EXECUTOR_ID_COUNTER.incrementAndGet().toString val name = s"$executorPodNamePrefix-exec-$executorId" @@ -393,14 +463,19 @@ private[spark] class KubernetesClusterSchedulerBackend( .endSpec() } }.getOrElse(basePodBuilder) - val resolvedExecutorPod = executorInitContainerBootstrap.map { bootstrap => - bootstrap.bootstrapInitContainerAndVolumes( - "executor", - withMaybeShuffleConfigPodBuilder) - }.getOrElse(withMaybeShuffleConfigPodBuilder) + + val executorInitContainerPodBuilder = executorInitContainerBootstrap.map { + bootstrap => + bootstrap.bootstrapInitContainerAndVolumes( + "executor", + withMaybeShuffleConfigPodBuilder) + }.getOrElse(withMaybeShuffleConfigPodBuilder) + + val resolvedExecutorPodBuilder = addNodeAffinityAnnotationIfUseful( + executorInitContainerPodBuilder, nodeToLocalTaskCount) try { - (executorId, kubernetesClient.pods.create(resolvedExecutorPod.build())) + (executorId, kubernetesClient.pods.create(resolvedExecutorPodBuilder.build())) } catch { case throwable: Throwable => logError("Failed to allocate executor pod.", throwable) @@ -521,3 +596,15 @@ private object KubernetesClusterSchedulerBackend { private val DEFAULT_STATIC_PORT = 10000 private val EXECUTOR_ID_COUNTER = new AtomicLong(0L) } + +/** + * These case classes model K8s node affinity syntax for + * preferredDuringSchedulingIgnoredDuringExecution. + * @see https://kubernetes.io/docs/concepts/configuration/assign-pod-node + */ +case class SchedulerAffinity(nodeAffinity: NodeAffinity) +case class NodeAffinity(preferredDuringSchedulingIgnoredDuringExecution: + Iterable[WeightedPreference]) +case class WeightedPreference(weight: Int, preference: Preference) +case class Preference(matchExpressions: Array[MatchExpression]) +case class MatchExpression(key: String, operator: String, values: Iterable[String]) From d6a311192cbd59314f59368991bd9ee1ff65b3e9 Mon Sep 17 00:00:00 2001 From: mccheah Date: Tue, 13 Jun 2017 21:52:19 -0700 Subject: [PATCH 137/156] Fix sbt build. (#344) * Fix sbt build. - Remove extraneous Feign dependency that we no longer use in submission v2. - Exclude Jackson from various modules to ensure every Jackson module is forced to 2.6.5. - Fix a linter error only caught by sbt. - Add Kubernetes modules to various parts of the SBT infrastructure * Actually remove feign * Actually exclude Jackson from kubernetes client. --- dev/deps/spark-deps-hadoop-2.2 | 20 +++++++ dev/deps/spark-deps-hadoop-2.3 | 21 +++++++- dev/deps/spark-deps-hadoop-2.4 | 21 +++++++- dev/deps/spark-deps-hadoop-2.6 | 21 +++++++- dev/deps/spark-deps-hadoop-2.7 | 21 +++++++- dev/sparktestsupport/modules.py | 8 +++ dev/test-dependencies.sh | 2 +- pom.xml | 54 +++++++++++-------- resource-managers/kubernetes/core/pom.xml | 42 ++++++++------- ...riverPodKubernetesCredentialsMounter.scala | 2 +- 10 files changed, 165 insertions(+), 47 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.2 b/dev/deps/spark-deps-hadoop-2.2 index 89bfcef4d9466..97c8a38cf6143 100644 --- a/dev/deps/spark-deps-hadoop-2.2 +++ b/dev/deps/spark-deps-hadoop-2.2 @@ -8,9 +8,12 @@ aopalliance-1.0.jar aopalliance-repackaged-2.4.0-b34.jar apache-log4j-extras-1.2.17.jar arpack_combined_all-0.1.jar +automaton-1.11-8.jar avro-1.7.7.jar avro-ipc-1.7.7.jar avro-mapred-1.7.7-hadoop2.jar +bcpkix-jdk15on-1.54.jar +bcprov-jdk15on-1.54.jar bonecp-0.8.0.RELEASE.jar breeze-macros_2.11-0.12.jar breeze_2.11-0.12.jar @@ -40,6 +43,8 @@ commons-math3-3.4.1.jar commons-net-2.2.jar commons-pool-1.5.4.jar compress-lzf-1.0.3.jar +converter-jackson-2.2.0.jar +converter-scalars-2.2.0.jar core-1.1.2.jar curator-client-2.4.0.jar curator-framework-2.4.0.jar @@ -49,6 +54,7 @@ datanucleus-core-3.2.10.jar datanucleus-rdbms-3.2.9.jar derby-10.12.1.1.jar eigenbase-properties-1.1.5.jar +generex-1.0.1.jar guava-14.0.1.jar guice-3.0.jar guice-servlet-3.0.jar @@ -77,7 +83,11 @@ jackson-annotations-2.6.5.jar jackson-core-2.6.5.jar jackson-core-asl-1.9.13.jar jackson-databind-2.6.5.jar +jackson-dataformat-yaml-2.6.5.jar +jackson-jaxrs-base-2.6.5.jar +jackson-jaxrs-json-provider-2.6.5.jar jackson-mapper-asl-1.9.13.jar +jackson-module-jaxb-annotations-2.6.5.jar jackson-module-paranamer-2.6.5.jar jackson-module-scala_2.11-2.6.5.jar janino-3.0.0.jar @@ -96,6 +106,7 @@ jersey-container-servlet-2.22.2.jar jersey-container-servlet-core-2.22.2.jar jersey-guava-2.22.2.jar jersey-media-jaxb-2.22.2.jar +jersey-media-multipart-2.22.2.jar jersey-server-2.22.2.jar jets3t-0.7.1.jar jetty-util-6.1.26.jar @@ -111,20 +122,26 @@ jta-1.1.jar jtransforms-2.4.0.jar jul-to-slf4j-1.7.16.jar kryo-shaded-3.0.3.jar +kubernetes-client-2.2.13.jar +kubernetes-model-1.0.67.jar leveldbjni-all-1.8.jar libfb303-0.9.2.jar libthrift-0.9.2.jar log4j-1.2.17.jar +logging-interceptor-3.6.0.jar lz4-1.3.0.jar mesos-1.0.0-shaded-protobuf.jar metrics-core-3.1.2.jar metrics-graphite-3.1.2.jar metrics-json-3.1.2.jar metrics-jvm-3.1.2.jar +mimepull-1.9.6.jar minlog-1.3.0.jar netty-3.8.0.Final.jar netty-all-4.0.42.Final.jar objenesis-2.1.jar +okhttp-3.6.0.jar +okio-1.11.0.jar opencsv-2.3.jar oro-2.0.8.jar osgi-resource-locator-1.0.1.jar @@ -141,6 +158,7 @@ pmml-schema-1.2.15.jar protobuf-java-2.5.0.jar py4j-0.10.4.jar pyrolite-4.13.jar +retrofit-2.2.0.jar scala-compiler-2.11.8.jar scala-library-2.11.8.jar scala-parser-combinators_2.11-1.0.4.jar @@ -150,6 +168,7 @@ scalap-2.11.8.jar shapeless_2.11-2.0.0.jar slf4j-api-1.7.16.jar slf4j-log4j12-1.7.16.jar +snakeyaml-1.15.jar snappy-0.2.jar snappy-java-1.1.2.6.jar spire-macros_2.11-0.7.4.jar @@ -163,4 +182,5 @@ validation-api-1.1.0.Final.jar xbean-asm5-shaded-4.4.jar xmlenc-0.52.jar xz-1.0.jar +zjsonpatch-0.3.0.jar zookeeper-3.4.5.jar diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3 index 8df3858825e13..21ca9259ed3ff 100644 --- a/dev/deps/spark-deps-hadoop-2.3 +++ b/dev/deps/spark-deps-hadoop-2.3 @@ -9,11 +9,13 @@ aopalliance-1.0.jar aopalliance-repackaged-2.4.0-b34.jar apache-log4j-extras-1.2.17.jar arpack_combined_all-0.1.jar +automaton-1.11-8.jar avro-1.7.7.jar avro-ipc-1.7.7.jar avro-mapred-1.7.7-hadoop2.jar base64-2.3.8.jar -bcprov-jdk15on-1.51.jar +bcpkix-jdk15on-1.54.jar +bcprov-jdk15on-1.54.jar bonecp-0.8.0.RELEASE.jar breeze-macros_2.11-0.12.jar breeze_2.11-0.12.jar @@ -42,6 +44,8 @@ commons-math3-3.4.1.jar commons-net-2.2.jar commons-pool-1.5.4.jar compress-lzf-1.0.3.jar +converter-jackson-2.2.0.jar +converter-scalars-2.2.0.jar core-1.1.2.jar curator-client-2.4.0.jar curator-framework-2.4.0.jar @@ -51,6 +55,7 @@ datanucleus-core-3.2.10.jar datanucleus-rdbms-3.2.9.jar derby-10.12.1.1.jar eigenbase-properties-1.1.5.jar +generex-1.0.1.jar guava-14.0.1.jar guice-3.0.jar guice-servlet-3.0.jar @@ -79,7 +84,11 @@ jackson-annotations-2.6.5.jar jackson-core-2.6.5.jar jackson-core-asl-1.9.13.jar jackson-databind-2.6.5.jar +jackson-dataformat-yaml-2.6.5.jar +jackson-jaxrs-base-2.6.5.jar +jackson-jaxrs-json-provider-2.6.5.jar jackson-mapper-asl-1.9.13.jar +jackson-module-jaxb-annotations-2.6.5.jar jackson-module-paranamer-2.6.5.jar jackson-module-scala_2.11-2.6.5.jar janino-3.0.0.jar @@ -100,6 +109,7 @@ jersey-container-servlet-2.22.2.jar jersey-container-servlet-core-2.22.2.jar jersey-guava-2.22.2.jar jersey-media-jaxb-2.22.2.jar +jersey-media-multipart-2.22.2.jar jersey-server-2.22.2.jar jets3t-0.9.3.jar jetty-6.1.26.jar @@ -116,10 +126,13 @@ jta-1.1.jar jtransforms-2.4.0.jar jul-to-slf4j-1.7.16.jar kryo-shaded-3.0.3.jar +kubernetes-client-2.2.13.jar +kubernetes-model-1.0.67.jar leveldbjni-all-1.8.jar libfb303-0.9.2.jar libthrift-0.9.2.jar log4j-1.2.17.jar +logging-interceptor-3.6.0.jar lz4-1.3.0.jar mail-1.4.7.jar mesos-1.0.0-shaded-protobuf.jar @@ -127,11 +140,14 @@ metrics-core-3.1.2.jar metrics-graphite-3.1.2.jar metrics-json-3.1.2.jar metrics-jvm-3.1.2.jar +mimepull-1.9.6.jar minlog-1.3.0.jar mx4j-3.0.2.jar netty-3.8.0.Final.jar netty-all-4.0.42.Final.jar objenesis-2.1.jar +okhttp-3.6.0.jar +okio-1.11.0.jar opencsv-2.3.jar oro-2.0.8.jar osgi-resource-locator-1.0.1.jar @@ -148,6 +164,7 @@ pmml-schema-1.2.15.jar protobuf-java-2.5.0.jar py4j-0.10.4.jar pyrolite-4.13.jar +retrofit-2.2.0.jar scala-compiler-2.11.8.jar scala-library-2.11.8.jar scala-parser-combinators_2.11-1.0.4.jar @@ -157,6 +174,7 @@ scalap-2.11.8.jar shapeless_2.11-2.0.0.jar slf4j-api-1.7.16.jar slf4j-log4j12-1.7.16.jar +snakeyaml-1.15.jar snappy-0.2.jar snappy-java-1.1.2.6.jar spire-macros_2.11-0.7.4.jar @@ -171,4 +189,5 @@ validation-api-1.1.0.Final.jar xbean-asm5-shaded-4.4.jar xmlenc-0.52.jar xz-1.0.jar +zjsonpatch-0.3.0.jar zookeeper-3.4.5.jar diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4 index 71e7fb6dd243d..f71a3cd06216c 100644 --- a/dev/deps/spark-deps-hadoop-2.4 +++ b/dev/deps/spark-deps-hadoop-2.4 @@ -9,11 +9,13 @@ aopalliance-1.0.jar aopalliance-repackaged-2.4.0-b34.jar apache-log4j-extras-1.2.17.jar arpack_combined_all-0.1.jar +automaton-1.11-8.jar avro-1.7.7.jar avro-ipc-1.7.7.jar avro-mapred-1.7.7-hadoop2.jar base64-2.3.8.jar -bcprov-jdk15on-1.51.jar +bcpkix-jdk15on-1.54.jar +bcprov-jdk15on-1.54.jar bonecp-0.8.0.RELEASE.jar breeze-macros_2.11-0.12.jar breeze_2.11-0.12.jar @@ -42,6 +44,8 @@ commons-math3-3.4.1.jar commons-net-2.2.jar commons-pool-1.5.4.jar compress-lzf-1.0.3.jar +converter-jackson-2.2.0.jar +converter-scalars-2.2.0.jar core-1.1.2.jar curator-client-2.4.0.jar curator-framework-2.4.0.jar @@ -51,6 +55,7 @@ datanucleus-core-3.2.10.jar datanucleus-rdbms-3.2.9.jar derby-10.12.1.1.jar eigenbase-properties-1.1.5.jar +generex-1.0.1.jar guava-14.0.1.jar guice-3.0.jar guice-servlet-3.0.jar @@ -79,7 +84,11 @@ jackson-annotations-2.6.5.jar jackson-core-2.6.5.jar jackson-core-asl-1.9.13.jar jackson-databind-2.6.5.jar +jackson-dataformat-yaml-2.6.5.jar +jackson-jaxrs-base-2.6.5.jar +jackson-jaxrs-json-provider-2.6.5.jar jackson-mapper-asl-1.9.13.jar +jackson-module-jaxb-annotations-2.6.5.jar jackson-module-paranamer-2.6.5.jar jackson-module-scala_2.11-2.6.5.jar janino-3.0.0.jar @@ -100,6 +109,7 @@ jersey-container-servlet-2.22.2.jar jersey-container-servlet-core-2.22.2.jar jersey-guava-2.22.2.jar jersey-media-jaxb-2.22.2.jar +jersey-media-multipart-2.22.2.jar jersey-server-2.22.2.jar jets3t-0.9.3.jar jetty-6.1.26.jar @@ -116,10 +126,13 @@ jta-1.1.jar jtransforms-2.4.0.jar jul-to-slf4j-1.7.16.jar kryo-shaded-3.0.3.jar +kubernetes-client-2.2.13.jar +kubernetes-model-1.0.67.jar leveldbjni-all-1.8.jar libfb303-0.9.2.jar libthrift-0.9.2.jar log4j-1.2.17.jar +logging-interceptor-3.6.0.jar lz4-1.3.0.jar mail-1.4.7.jar mesos-1.0.0-shaded-protobuf.jar @@ -127,11 +140,14 @@ metrics-core-3.1.2.jar metrics-graphite-3.1.2.jar metrics-json-3.1.2.jar metrics-jvm-3.1.2.jar +mimepull-1.9.6.jar minlog-1.3.0.jar mx4j-3.0.2.jar netty-3.8.0.Final.jar netty-all-4.0.42.Final.jar objenesis-2.1.jar +okhttp-3.6.0.jar +okio-1.11.0.jar opencsv-2.3.jar oro-2.0.8.jar osgi-resource-locator-1.0.1.jar @@ -148,6 +164,7 @@ pmml-schema-1.2.15.jar protobuf-java-2.5.0.jar py4j-0.10.4.jar pyrolite-4.13.jar +retrofit-2.2.0.jar scala-compiler-2.11.8.jar scala-library-2.11.8.jar scala-parser-combinators_2.11-1.0.4.jar @@ -157,6 +174,7 @@ scalap-2.11.8.jar shapeless_2.11-2.0.0.jar slf4j-api-1.7.16.jar slf4j-log4j12-1.7.16.jar +snakeyaml-1.15.jar snappy-0.2.jar snappy-java-1.1.2.6.jar spire-macros_2.11-0.7.4.jar @@ -171,4 +189,5 @@ validation-api-1.1.0.Final.jar xbean-asm5-shaded-4.4.jar xmlenc-0.52.jar xz-1.0.jar +zjsonpatch-0.3.0.jar zookeeper-3.4.5.jar diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6 index ba31391495f54..211946d583879 100644 --- a/dev/deps/spark-deps-hadoop-2.6 +++ b/dev/deps/spark-deps-hadoop-2.6 @@ -13,11 +13,13 @@ apacheds-kerberos-codec-2.0.0-M15.jar api-asn1-api-1.0.0-M20.jar api-util-1.0.0-M20.jar arpack_combined_all-0.1.jar +automaton-1.11-8.jar avro-1.7.7.jar avro-ipc-1.7.7.jar avro-mapred-1.7.7-hadoop2.jar base64-2.3.8.jar -bcprov-jdk15on-1.51.jar +bcpkix-jdk15on-1.54.jar +bcprov-jdk15on-1.54.jar bonecp-0.8.0.RELEASE.jar breeze-macros_2.11-0.12.jar breeze_2.11-0.12.jar @@ -46,6 +48,8 @@ commons-math3-3.4.1.jar commons-net-2.2.jar commons-pool-1.5.4.jar compress-lzf-1.0.3.jar +converter-jackson-2.2.0.jar +converter-scalars-2.2.0.jar core-1.1.2.jar curator-client-2.6.0.jar curator-framework-2.6.0.jar @@ -55,6 +59,7 @@ datanucleus-core-3.2.10.jar datanucleus-rdbms-3.2.9.jar derby-10.12.1.1.jar eigenbase-properties-1.1.5.jar +generex-1.0.1.jar gson-2.2.4.jar guava-14.0.1.jar guice-3.0.jar @@ -85,8 +90,12 @@ jackson-annotations-2.6.5.jar jackson-core-2.6.5.jar jackson-core-asl-1.9.13.jar jackson-databind-2.6.5.jar +jackson-dataformat-yaml-2.6.5.jar jackson-jaxrs-1.9.13.jar +jackson-jaxrs-base-2.6.5.jar +jackson-jaxrs-json-provider-2.6.5.jar jackson-mapper-asl-1.9.13.jar +jackson-module-jaxb-annotations-2.6.5.jar jackson-module-paranamer-2.6.5.jar jackson-module-scala_2.11-2.6.5.jar jackson-xc-1.9.13.jar @@ -108,6 +117,7 @@ jersey-container-servlet-2.22.2.jar jersey-container-servlet-core-2.22.2.jar jersey-guava-2.22.2.jar jersey-media-jaxb-2.22.2.jar +jersey-media-multipart-2.22.2.jar jersey-server-2.22.2.jar jets3t-0.9.3.jar jetty-6.1.26.jar @@ -124,10 +134,13 @@ jta-1.1.jar jtransforms-2.4.0.jar jul-to-slf4j-1.7.16.jar kryo-shaded-3.0.3.jar +kubernetes-client-2.2.13.jar +kubernetes-model-1.0.67.jar leveldbjni-all-1.8.jar libfb303-0.9.2.jar libthrift-0.9.2.jar log4j-1.2.17.jar +logging-interceptor-3.6.0.jar lz4-1.3.0.jar mail-1.4.7.jar mesos-1.0.0-shaded-protobuf.jar @@ -135,11 +148,14 @@ metrics-core-3.1.2.jar metrics-graphite-3.1.2.jar metrics-json-3.1.2.jar metrics-jvm-3.1.2.jar +mimepull-1.9.6.jar minlog-1.3.0.jar mx4j-3.0.2.jar netty-3.8.0.Final.jar netty-all-4.0.42.Final.jar objenesis-2.1.jar +okhttp-3.6.0.jar +okio-1.11.0.jar opencsv-2.3.jar oro-2.0.8.jar osgi-resource-locator-1.0.1.jar @@ -156,6 +172,7 @@ pmml-schema-1.2.15.jar protobuf-java-2.5.0.jar py4j-0.10.4.jar pyrolite-4.13.jar +retrofit-2.2.0.jar scala-compiler-2.11.8.jar scala-library-2.11.8.jar scala-parser-combinators_2.11-1.0.4.jar @@ -165,6 +182,7 @@ scalap-2.11.8.jar shapeless_2.11-2.0.0.jar slf4j-api-1.7.16.jar slf4j-log4j12-1.7.16.jar +snakeyaml-1.15.jar snappy-0.2.jar snappy-java-1.1.2.6.jar spire-macros_2.11-0.7.4.jar @@ -180,4 +198,5 @@ xbean-asm5-shaded-4.4.jar xercesImpl-2.9.1.jar xmlenc-0.52.jar xz-1.0.jar +zjsonpatch-0.3.0.jar zookeeper-3.4.6.jar diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7 index b129e5a99e2ff..d0a472d3d3805 100644 --- a/dev/deps/spark-deps-hadoop-2.7 +++ b/dev/deps/spark-deps-hadoop-2.7 @@ -13,11 +13,13 @@ apacheds-kerberos-codec-2.0.0-M15.jar api-asn1-api-1.0.0-M20.jar api-util-1.0.0-M20.jar arpack_combined_all-0.1.jar +automaton-1.11-8.jar avro-1.7.7.jar avro-ipc-1.7.7.jar avro-mapred-1.7.7-hadoop2.jar base64-2.3.8.jar -bcprov-jdk15on-1.51.jar +bcpkix-jdk15on-1.54.jar +bcprov-jdk15on-1.54.jar bonecp-0.8.0.RELEASE.jar breeze-macros_2.11-0.12.jar breeze_2.11-0.12.jar @@ -46,6 +48,8 @@ commons-math3-3.4.1.jar commons-net-2.2.jar commons-pool-1.5.4.jar compress-lzf-1.0.3.jar +converter-jackson-2.2.0.jar +converter-scalars-2.2.0.jar core-1.1.2.jar curator-client-2.6.0.jar curator-framework-2.6.0.jar @@ -55,6 +59,7 @@ datanucleus-core-3.2.10.jar datanucleus-rdbms-3.2.9.jar derby-10.12.1.1.jar eigenbase-properties-1.1.5.jar +generex-1.0.1.jar gson-2.2.4.jar guava-14.0.1.jar guice-3.0.jar @@ -85,8 +90,12 @@ jackson-annotations-2.6.5.jar jackson-core-2.6.5.jar jackson-core-asl-1.9.13.jar jackson-databind-2.6.5.jar +jackson-dataformat-yaml-2.6.5.jar jackson-jaxrs-1.9.13.jar +jackson-jaxrs-base-2.6.5.jar +jackson-jaxrs-json-provider-2.6.5.jar jackson-mapper-asl-1.9.13.jar +jackson-module-jaxb-annotations-2.6.5.jar jackson-module-paranamer-2.6.5.jar jackson-module-scala_2.11-2.6.5.jar jackson-xc-1.9.13.jar @@ -108,6 +117,7 @@ jersey-container-servlet-2.22.2.jar jersey-container-servlet-core-2.22.2.jar jersey-guava-2.22.2.jar jersey-media-jaxb-2.22.2.jar +jersey-media-multipart-2.22.2.jar jersey-server-2.22.2.jar jets3t-0.9.3.jar jetty-6.1.26.jar @@ -125,10 +135,13 @@ jta-1.1.jar jtransforms-2.4.0.jar jul-to-slf4j-1.7.16.jar kryo-shaded-3.0.3.jar +kubernetes-client-2.2.13.jar +kubernetes-model-1.0.67.jar leveldbjni-all-1.8.jar libfb303-0.9.2.jar libthrift-0.9.2.jar log4j-1.2.17.jar +logging-interceptor-3.6.0.jar lz4-1.3.0.jar mail-1.4.7.jar mesos-1.0.0-shaded-protobuf.jar @@ -136,11 +149,14 @@ metrics-core-3.1.2.jar metrics-graphite-3.1.2.jar metrics-json-3.1.2.jar metrics-jvm-3.1.2.jar +mimepull-1.9.6.jar minlog-1.3.0.jar mx4j-3.0.2.jar netty-3.8.0.Final.jar netty-all-4.0.42.Final.jar objenesis-2.1.jar +okhttp-3.6.0.jar +okio-1.11.0.jar opencsv-2.3.jar oro-2.0.8.jar osgi-resource-locator-1.0.1.jar @@ -157,6 +173,7 @@ pmml-schema-1.2.15.jar protobuf-java-2.5.0.jar py4j-0.10.4.jar pyrolite-4.13.jar +retrofit-2.2.0.jar scala-compiler-2.11.8.jar scala-library-2.11.8.jar scala-parser-combinators_2.11-1.0.4.jar @@ -166,6 +183,7 @@ scalap-2.11.8.jar shapeless_2.11-2.0.0.jar slf4j-api-1.7.16.jar slf4j-log4j12-1.7.16.jar +snakeyaml-1.15.jar snappy-0.2.jar snappy-java-1.1.2.6.jar spire-macros_2.11-0.7.4.jar @@ -181,4 +199,5 @@ xbean-asm5-shaded-4.4.jar xercesImpl-2.9.1.jar xmlenc-0.52.jar xz-1.0.jar +zjsonpatch-0.3.0.jar zookeeper-3.4.6.jar diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index b34ab51f3b996..0cc9d71d962ce 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -490,6 +490,14 @@ def __hash__(self): sbt_test_goals=["mesos/test"] ) +kubernetes = Module( + name="kubernetes", + dependencies=[], + source_file_regexes=["resource-managers/kubernetes/core"], + build_profile_flags=["-Pkubernetes"], + sbt_test_goals=["kubernetes/test"] +) + # The root module is a dummy module which is used to run all of the tests. # No other modules should directly depend on this module. root = Module( diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh index 4014f42e1983c..b1555957ecc50 100755 --- a/dev/test-dependencies.sh +++ b/dev/test-dependencies.sh @@ -29,7 +29,7 @@ export LC_ALL=C # TODO: This would be much nicer to do in SBT, once SBT supports Maven-style resolution. # NOTE: These should match those in the release publishing script -HADOOP2_MODULE_PROFILES="-Phive-thriftserver -Pmesos -Pyarn -Phive" +HADOOP2_MODULE_PROFILES="-Phive-thriftserver -Pmesos -Pyarn -Phive -Pkubernetes" MVN="build/mvn" HADOOP_PROFILES=( hadoop-2.2 diff --git a/pom.xml b/pom.xml index 7f9325fa5f185..7035938515a58 100644 --- a/pom.xml +++ b/pom.xml @@ -136,7 +136,6 @@ 10.12.1.1 1.8.1 1.6.0 - 8.18.0 2.2.0 1.54 9.2.16.v20160414 @@ -308,35 +307,43 @@ ${chill.version} - - com.netflix.feign - feign-core - ${feign.version} - - - com.netflix.feign - feign-okhttp - ${feign.version} - - - com.netflix.feign - feign-jackson - ${feign.version} - - - com.netflix.feign - feign-jaxrs - ${feign.version} - com.squareup.retrofit2 retrofit ${retrofit.version} + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.core + jackson-annotations + + com.squareup.retrofit2 converter-jackson ${retrofit.version} + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.core + jackson-annotations + + com.squareup.retrofit2 @@ -649,6 +656,11 @@ jackson-annotations ${fasterxml.jackson.version} + + com.fasterxml.jackson.dataformat + jackson-dataformat-yaml + ${fasterxml.jackson.version} + diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index a227342f46771..c90a824b1b8b1 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -51,14 +51,30 @@ io.fabric8 kubernetes-client ${kubernetes.client.version} + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.dataformat + jackson-dataformat-yaml + + + - com.netflix.feign - feign-core - - - com.netflix.feign - feign-okhttp + com.fasterxml.jackson.dataformat + jackson-dataformat-yaml + ${fasterxml.jackson.version} org.glassfish.jersey.containers @@ -68,10 +84,6 @@ org.glassfish.jersey.media jersey-media-multipart - - com.netflix.feign - feign-jackson - com.squareup.retrofit2 retrofit @@ -85,16 +97,6 @@ converter-scalars - - com.netflix.feign - feign-jaxrs - - - javax.ws.rs - jsr311-api - - - com.fasterxml.jackson.jaxrs jackson-jaxrs-json-provider diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala index b13800f389605..25e7c3b3ebd89 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala @@ -18,12 +18,12 @@ package org.apache.spark.deploy.kubernetes.submit import io.fabric8.kubernetes.api.model.{PodBuilder, Secret, SecretBuilder} import scala.collection.JavaConverters._ +import scala.language.implicitConversions import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.KubernetesCredentials import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.internal.config.OptionalConfigEntry private[spark] trait DriverPodKubernetesCredentialsMounter { From fdd50f19cceca13f821be5c3676ea5f7f7ab2b9c Mon Sep 17 00:00:00 2001 From: mccheah Date: Wed, 14 Jun 2017 14:03:09 -0700 Subject: [PATCH 138/156] New API for custom labels and annotations. (#346) * New API for custom labels and annotations. This APi allows for these labels and annotations to have = and , characters, which is hard to accomplish in the old scheme. * Compare correct values in requirements * Use helper method * Address comments. * Fix scalastyle * Use variable * Remove unused import --- docs/running-on-kubernetes.md | 48 +++++++++++++++++++ .../kubernetes/ConfigurationUtils.scala | 31 +++++++++++- .../spark/deploy/kubernetes/config.scala | 5 ++ .../deploy/kubernetes/submit/Client.scala | 37 ++++++++------ .../KubernetesClusterSchedulerBackend.scala | 19 ++++---- .../kubernetes/submit/ClientV2Suite.scala | 13 ++++- 6 files changed, 127 insertions(+), 26 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index c10630fc5c5c6..52d847b4420cf 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -524,10 +524,52 @@ from the other deployment modes. See the [configuration page](configuration.html (typically 6-10%). + + spark.kubernetes.driver.label.[labelKey] + (none) + + Adds a label to the driver pod, with key labelKey and the value as the configuration's value. For + example, setting spark.kubernetes.driver.label.identifier to myIdentifier will result in + the driver pod having a label with key identifier and value myIdentifier. Multiple labels + can be added by setting multiple configurations with this prefix. + + + + spark.kubernetes.driver.annotation.[annotationKey] + (none) + + Adds an annotation to the driver pod, with key annotationKey and the value as the configuration's + value. For example, setting spark.kubernetes.driver.annotation.identifier to myIdentifier + will result in the driver pod having an annotation with key identifier and value + myIdentifier. Multiple annotations can be added by setting multiple configurations with this prefix. + + + + spark.kubernetes.executor.label.[labelKey] + (none) + + Adds a label to all executor pods, with key labelKey and the value as the configuration's value. For + example, setting spark.kubernetes.executor.label.identifier to myIdentifier will result in + the executor pods having a label with key identifier and value myIdentifier. Multiple + labels can be added by setting multiple configurations with this prefix. + + + + spark.kubernetes.executor.annotation.[annotationKey] + (none) + + Adds an annotation to the executor pods, with key annotationKey and the value as the configuration's + value. For example, setting spark.kubernetes.executor.annotation.identifier to myIdentifier + will result in the executor pods having an annotation with key identifier and value + myIdentifier. Multiple annotations can be added by setting multiple configurations with this prefix. + + spark.kubernetes.driver.labels (none) + Deprecated. Use spark.kubernetes.driver.label. instead which supports = + and , characters in label values. Custom labels that will be added to the driver pod. This should be a comma-separated list of label key-value pairs, where each label is in the format key=value. Note that Spark also adds its own labels to the driver pod for bookkeeping purposes. @@ -537,6 +579,8 @@ from the other deployment modes. See the [configuration page](configuration.html spark.kubernetes.driver.annotations (none) + Deprecated. Use spark.kubernetes.driver.annotation. instead which supports + = and , characters in annotation values. Custom annotations that will be added to the driver pod. This should be a comma-separated list of label key-value pairs, where each annotation is in the format key=value. @@ -545,6 +589,8 @@ from the other deployment modes. See the [configuration page](configuration.html spark.kubernetes.executor.labels (none) + Deprecated. Use spark.kubernetes.executor.label. instead which supports + = and , characters in label values. Custom labels that will be added to the executor pods. This should be a comma-separated list of label key-value pairs, where each label is in the format key=value. Note that Spark also adds its own labels to the executor pods for bookkeeping purposes. @@ -554,6 +600,8 @@ from the other deployment modes. See the [configuration page](configuration.html spark.kubernetes.executor.annotations (none) + Deprecated. Use spark.kubernetes.executor.annotation. instead which supports + = and , characters in annotation values. Custom annotations that will be added to the executor pods. This should be a comma-separated list of annotation key-value pairs, where each annotation is in the format key=value. diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala index f3bd598556019..f461da4809b4d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala @@ -17,9 +17,11 @@ package org.apache.spark.deploy.kubernetes -import org.apache.spark.SparkException +import org.apache.spark.{SparkConf, SparkException} +import org.apache.spark.internal.Logging +import org.apache.spark.internal.config.OptionalConfigEntry -object ConfigurationUtils { +object ConfigurationUtils extends Logging { def parseKeyValuePairs( maybeKeyValues: Option[String], configKey: String, @@ -38,4 +40,29 @@ object ConfigurationUtils { }).toMap }).getOrElse(Map.empty[String, String]) } + + def combinePrefixedKeyValuePairsWithDeprecatedConf( + sparkConf: SparkConf, + prefix: String, + deprecatedConf: OptionalConfigEntry[String], + configType: String): Map[String, String] = { + val deprecatedKeyValuePairsString = sparkConf.get(deprecatedConf) + deprecatedKeyValuePairsString.foreach { _ => + logWarning(s"Configuration with key ${deprecatedConf.key} is deprecated. Use" + + s" configurations with prefix $prefix instead.") + } + val fromDeprecated = parseKeyValuePairs( + deprecatedKeyValuePairsString, + deprecatedConf.key, + configType) + val fromPrefix = sparkConf.getAllWithPrefix(prefix) + val combined = fromDeprecated.toSeq ++ fromPrefix + combined.groupBy(_._1).foreach { + case (key, values) => + require(values.size == 1, + s"Cannot have multiple values for a given $configType key, got key $key with" + + s" values $values") + } + combined.toMap + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index d1fd88fc880d1..70ea19e44ef8c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -113,6 +113,11 @@ package object config extends Logging { .bytesConf(ByteUnit.MiB) .createOptional + private[spark] val KUBERNETES_DRIVER_LABEL_PREFIX = "spark.kubernetes.driver.label." + private[spark] val KUBERNETES_DRIVER_ANNOTATION_PREFIX = "spark.kubernetes.driver.annotation." + private[spark] val KUBERNETES_EXECUTOR_LABEL_PREFIX = "spark.kubernetes.executor.label." + private[spark] val KUBERNETES_EXECUTOR_ANNOTATION_PREFIX = "spark.kubernetes.executor.annotation." + private[spark] val KUBERNETES_DRIVER_LABELS = ConfigBuilder("spark.kubernetes.driver.labels") .doc("Custom labels that will be added to the driver pod. This should be a comma-separated" + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index c2e616eadc1e0..a9699d8c34b4e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -29,6 +29,7 @@ import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.rest.kubernetes.ResourceStagingServerSslOptionsProviderImpl import org.apache.spark.internal.Logging +import org.apache.spark.internal.config.ConfigEntry import org.apache.spark.launcher.SparkLauncher import org.apache.spark.util.Utils @@ -82,17 +83,25 @@ private[spark] class Client( def run(): Unit = { validateNoDuplicateFileNames(sparkJars) validateNoDuplicateFileNames(sparkFiles) - val parsedCustomLabels = ConfigurationUtils.parseKeyValuePairs( - customLabels, KUBERNETES_DRIVER_LABELS.key, "labels") - require(!parsedCustomLabels.contains(SPARK_APP_ID_LABEL), s"Label with key " + - s" $SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping" + - s" operations.") - val parsedCustomAnnotations = ConfigurationUtils.parseKeyValuePairs( - customAnnotations, KUBERNETES_DRIVER_ANNOTATIONS.key, "annotations") - require(!parsedCustomAnnotations.contains(SPARK_APP_NAME_ANNOTATION), s"Annotation with key" + - s" $SPARK_APP_NAME_ANNOTATION is not allowed as it is reserved for Spark bookkeeping" + - s" operations.") - val allLabels = parsedCustomLabels ++ Map( + + val driverCustomLabels = ConfigurationUtils.combinePrefixedKeyValuePairsWithDeprecatedConf( + sparkConf, + KUBERNETES_DRIVER_LABEL_PREFIX, + KUBERNETES_DRIVER_LABELS, + "label") + require(!driverCustomLabels.contains(SPARK_APP_ID_LABEL), s"Label with key " + + s" $SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping" + + s" operations.") + + val driverCustomAnnotations = ConfigurationUtils.combinePrefixedKeyValuePairsWithDeprecatedConf( + sparkConf, + KUBERNETES_DRIVER_ANNOTATION_PREFIX, + KUBERNETES_DRIVER_ANNOTATIONS, + "annotation") + require(!driverCustomAnnotations.contains(SPARK_APP_NAME_ANNOTATION), + s"Annotation with key $SPARK_APP_NAME_ANNOTATION is not allowed as it is reserved for" + + s" Spark bookkeeping operations.") + val allDriverLabels = driverCustomLabels ++ Map( SPARK_APP_ID_LABEL -> kubernetesAppId, SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE) @@ -138,8 +147,8 @@ private[spark] class Client( val basePod = new PodBuilder() .withNewMetadata() .withName(kubernetesDriverPodName) - .addToLabels(allLabels.asJava) - .addToAnnotations(parsedCustomAnnotations.asJava) + .addToLabels(allDriverLabels.asJava) + .addToAnnotations(driverCustomAnnotations.toMap.asJava) .addToAnnotations(SPARK_APP_NAME_ANNOTATION, appName) .endMetadata() .withNewSpec() @@ -148,7 +157,7 @@ private[spark] class Client( .endSpec() val maybeSubmittedDependencyUploader = initContainerComponentsProvider - .provideInitContainerSubmittedDependencyUploader(allLabels) + .provideInitContainerSubmittedDependencyUploader(allDriverLabels) val maybeSubmittedResourceIdentifiers = maybeSubmittedDependencyUploader.map { uploader => SubmittedResources(uploader.uploadJars(), uploader.uploadFiles()) } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 85ce5f01200b2..4165eb8cbd067 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -62,10 +62,11 @@ private[spark] class KubernetesClusterSchedulerBackend( org.apache.spark.internal.config.EXECUTOR_CLASS_PATH) private val executorJarsDownloadDir = conf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION) - private val executorLabels = ConfigurationUtils.parseKeyValuePairs( - conf.get(KUBERNETES_EXECUTOR_LABELS), - KUBERNETES_EXECUTOR_LABELS.key, - "executor labels") + private val executorLabels = ConfigurationUtils.combinePrefixedKeyValuePairsWithDeprecatedConf( + conf, + KUBERNETES_EXECUTOR_LABEL_PREFIX, + KUBERNETES_EXECUTOR_LABELS, + "executor label") require( !executorLabels.contains(SPARK_APP_ID_LABEL), s"Custom executor labels cannot contain $SPARK_APP_ID_LABEL as it is" + @@ -74,11 +75,13 @@ private[spark] class KubernetesClusterSchedulerBackend( !executorLabels.contains(SPARK_EXECUTOR_ID_LABEL), s"Custom executor labels cannot contain $SPARK_EXECUTOR_ID_LABEL as it is reserved for" + s" Spark.") - private val executorAnnotations = ConfigurationUtils.parseKeyValuePairs( - conf.get(KUBERNETES_EXECUTOR_ANNOTATIONS), - KUBERNETES_EXECUTOR_ANNOTATIONS.key, - "executor annotations") + private val executorAnnotations = + ConfigurationUtils.combinePrefixedKeyValuePairsWithDeprecatedConf( + conf, + KUBERNETES_EXECUTOR_ANNOTATION_PREFIX, + KUBERNETES_EXECUTOR_ANNOTATIONS, + "executor annotation") private var shufflePodCache: Option[ShufflePodCache] = None private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) private val dockerImagePullPolicy = conf.get(DOCKER_IMAGE_PULL_POLICY) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala index 193f36a7423b2..3945bef5bcfb8 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala @@ -49,12 +49,17 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val APP_ID = "spark-id" private val CUSTOM_LABEL_KEY = "customLabel" private val CUSTOM_LABEL_VALUE = "customLabelValue" + private val DEPRECATED_CUSTOM_LABEL_KEY = "deprecatedCustomLabel" + private val DEPRECATED_CUSTOM_LABEL_VALUE = "deprecatedCustomLabelValue" private val ALL_EXPECTED_LABELS = Map( CUSTOM_LABEL_KEY -> CUSTOM_LABEL_VALUE, + DEPRECATED_CUSTOM_LABEL_KEY -> DEPRECATED_CUSTOM_LABEL_VALUE, SPARK_APP_ID_LABEL -> APP_ID, SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE) private val CUSTOM_ANNOTATION_KEY = "customAnnotation" private val CUSTOM_ANNOTATION_VALUE = "customAnnotationValue" + private val DEPRECATED_CUSTOM_ANNOTATION_KEY = "deprecatedCustomAnnotation" + private val DEPRECATED_CUSTOM_ANNOTATION_VALUE = "deprecatedCustomAnnotationValue" private val INIT_CONTAINER_SECRET_NAME = "init-container-secret" private val INIT_CONTAINER_SECRET_DATA = Map("secret-key" -> "secret-data") private val MAIN_CLASS = "org.apache.spark.examples.SparkPi" @@ -94,8 +99,11 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { .set(DRIVER_DOCKER_IMAGE, CUSTOM_DRIVER_IMAGE) .set(org.apache.spark.internal.config.DRIVER_MEMORY, DRIVER_MEMORY_MB.toLong) .set(KUBERNETES_DRIVER_MEMORY_OVERHEAD, DRIVER_MEMORY_OVERHEAD_MB.toLong) - .set(KUBERNETES_DRIVER_LABELS, s"$CUSTOM_LABEL_KEY=$CUSTOM_LABEL_VALUE") - .set(KUBERNETES_DRIVER_ANNOTATIONS, s"$CUSTOM_ANNOTATION_KEY=$CUSTOM_ANNOTATION_VALUE") + .set(KUBERNETES_DRIVER_LABELS, s"$DEPRECATED_CUSTOM_LABEL_KEY=$DEPRECATED_CUSTOM_LABEL_VALUE") + .set(KUBERNETES_DRIVER_ANNOTATIONS, + s"$DEPRECATED_CUSTOM_ANNOTATION_KEY=$DEPRECATED_CUSTOM_ANNOTATION_VALUE") + .set(s"$KUBERNETES_DRIVER_LABEL_PREFIX$CUSTOM_LABEL_KEY", CUSTOM_LABEL_VALUE) + .set(s"$KUBERNETES_DRIVER_ANNOTATION_PREFIX$CUSTOM_ANNOTATION_KEY", CUSTOM_ANNOTATION_VALUE) .set(org.apache.spark.internal.config.DRIVER_CLASS_PATH, DRIVER_EXTRA_CLASSPATH) .set(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS, DRIVER_JAVA_OPTIONS) private val EXECUTOR_INIT_CONF_KEY = "executor-init-conf" @@ -444,6 +452,7 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private def podHasCorrectAnnotations(pod: Pod): Boolean = { val expectedAnnotations = Map( + DEPRECATED_CUSTOM_ANNOTATION_KEY -> DEPRECATED_CUSTOM_ANNOTATION_VALUE, CUSTOM_ANNOTATION_KEY -> CUSTOM_ANNOTATION_VALUE, SPARK_APP_NAME_ANNOTATION -> APP_NAME, BOOTSTRAPPED_POD_ANNOTATION -> TRUE) From a6291c67ef1dcc3abac72b8813be21fdef27d8a4 Mon Sep 17 00:00:00 2001 From: Hong Zhiguo Date: Thu, 22 Jun 2017 16:57:12 +0800 Subject: [PATCH 139/156] Allow spark driver find shuffle pods in specified namespace (#357) The conf property spark.kubernetes.shuffle.namespace is used to specify the namesapce of shuffle pods. In normal cases, only one "shuffle daemonset" is deployed and shared by all spark pods. The spark driver should be able to list and watch shuffle pods in the namespace specified by user. Note: by default, spark driver pod doesn't have authority to list and watch shuffle pods in another namespace. Some action is needed to grant it the authority. For example, below ABAC policy works. ``` {"apiVersion": "abac.authorization.kubernetes.io/v1beta1", "kind": "Policy", "spec": {"group": "system:serviceaccounts", "namespace": "SHUFFLE_NAMESPACE", "resource": "pods", "readonly": true}} ``` --- .../spark/scheduler/cluster/kubernetes/ShufflePodCache.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/ShufflePodCache.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/ShufflePodCache.scala index 53b4e745ce7c7..15e02664589eb 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/ShufflePodCache.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/ShufflePodCache.scala @@ -37,7 +37,8 @@ private[spark] class ShufflePodCache ( def start(): Unit = { // seed the initial cache. - val pods = client.pods().withLabels(dsLabels.asJava).list() + val pods = client.pods() + .inNamespace(dsNamespace).withLabels(dsLabels.asJava).list() pods.getItems.asScala.foreach { pod => if (Readiness.isReady(pod)) { @@ -50,6 +51,7 @@ private[spark] class ShufflePodCache ( watcher = client .pods() + .inNamespace(dsNamespace) .withLabels(dsLabels.asJava) .watch(new Watcher[Pod] { override def eventReceived(action: Watcher.Action, p: Pod): Unit = { From 08fe9446a5f2a53de5ea7ec299d06ce5085d921c Mon Sep 17 00:00:00 2001 From: Chun Chen Date: Fri, 23 Jun 2017 14:03:24 +0800 Subject: [PATCH 140/156] Bypass init-containers when possible (#348) --- .../deploy/kubernetes/submit/Client.scala | 41 ++++++++++--------- ...riverInitContainerComponentsProvider.scala | 29 +++++++++---- .../submit/InitContainerBundle.scala | 26 ++++++++++++ .../submit/KubernetesFileUtils.scala | 4 ++ .../kubernetes/submit/ClientV2Suite.scala | 24 ++++------- 5 files changed, 80 insertions(+), 44 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerBundle.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index a9699d8c34b4e..ac3a51e74f838 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -156,31 +156,33 @@ private[spark] class Client( .addToContainers(driverContainer) .endSpec() - val maybeSubmittedDependencyUploader = initContainerComponentsProvider - .provideInitContainerSubmittedDependencyUploader(allDriverLabels) - val maybeSubmittedResourceIdentifiers = maybeSubmittedDependencyUploader.map { uploader => + val maybeSubmittedResourceIdentifiers = initContainerComponentsProvider + .provideInitContainerSubmittedDependencyUploader(allDriverLabels) + .map { uploader => SubmittedResources(uploader.uploadJars(), uploader.uploadFiles()) } - val maybeSecretBuilder = initContainerComponentsProvider - .provideSubmittedDependenciesSecretBuilder( - maybeSubmittedResourceIdentifiers.map(_.secrets())) - val maybeSubmittedDependenciesSecret = maybeSecretBuilder.map(_.build()) - val initContainerConfigMap = initContainerComponentsProvider - .provideInitContainerConfigMapBuilder(maybeSubmittedResourceIdentifiers.map(_.ids())) - .build() - val podWithInitContainer = initContainerComponentsProvider - .provideInitContainerBootstrap() - .bootstrapInitContainerAndVolumes(driverContainer.getName, basePod) + val maybeSubmittedDependenciesSecret = initContainerComponentsProvider + .provideSubmittedDependenciesSecretBuilder( + maybeSubmittedResourceIdentifiers.map(_.secrets())) + .map(_.build()) val containerLocalizedFilesResolver = initContainerComponentsProvider - .provideContainerLocalizedFilesResolver() + .provideContainerLocalizedFilesResolver() val resolvedSparkJars = containerLocalizedFilesResolver.resolveSubmittedSparkJars() val resolvedSparkFiles = containerLocalizedFilesResolver.resolveSubmittedSparkFiles() - val executorInitContainerConfiguration = initContainerComponentsProvider - .provideExecutorInitContainerConfiguration() - val sparkConfWithExecutorInit = executorInitContainerConfiguration - .configureSparkConfForExecutorInitContainer(sparkConf) + val initContainerBundler = initContainerComponentsProvider + .provideInitContainerBundle(maybeSubmittedResourceIdentifiers.map(_.ids()), + resolvedSparkJars ++ resolvedSparkFiles) + + val podWithInitContainer = initContainerBundler.map( + _.sparkPodInitContainerBootstrap + .bootstrapInitContainerAndVolumes(driverContainer.getName, basePod)) + .getOrElse(basePod) + val sparkConfWithExecutorInit = initContainerBundler.map( + _.executorInitContainerConfiguration + .configureSparkConfForExecutorInitContainer(sparkConf)) + .getOrElse(sparkConf) val credentialsMounter = kubernetesCredentialsMounterProvider .getDriverPodKubernetesCredentialsMounter() val credentialsSecret = credentialsMounter.createCredentialsSecret() @@ -224,7 +226,8 @@ private[spark] class Client( .watch(loggingPodStatusWatcher)) { _ => val createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod) try { - val driverOwnedResources = Seq(initContainerConfigMap) ++ + val driverOwnedResources = initContainerBundler.map( + _.sparkInitContainerConfigMap).toSeq ++ maybeSubmittedDependenciesSecret.toSeq ++ credentialsSecret.toSeq val driverPodOwnerReference = new OwnerReferenceBuilder() diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala index cfc61e193dcff..cc1837cce6736 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala @@ -16,6 +16,8 @@ */ package org.apache.spark.deploy.kubernetes.submit +import io.fabric8.kubernetes.api.model.ConfigMap + import org.apache.spark.{SparkConf, SSLOptions} import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, OptionRequirements, SparkPodInitContainerBootstrap, SparkPodInitContainerBootstrapImpl} import org.apache.spark.deploy.kubernetes.config._ @@ -30,17 +32,15 @@ import org.apache.spark.util.Utils */ private[spark] trait DriverInitContainerComponentsProvider { - def provideInitContainerConfigMapBuilder( - maybeSubmittedResourceIds: Option[SubmittedResourceIds]) - : SparkInitContainerConfigMapBuilder def provideContainerLocalizedFilesResolver(): ContainerLocalizedFilesResolver - def provideExecutorInitContainerConfiguration(): ExecutorInitContainerConfiguration def provideInitContainerSubmittedDependencyUploader( driverPodLabels: Map[String, String]): Option[SubmittedDependencyUploader] def provideSubmittedDependenciesSecretBuilder( maybeSubmittedResourceSecrets: Option[SubmittedResourceSecrets]) : Option[SubmittedDependencySecretBuilder] def provideInitContainerBootstrap(): SparkPodInitContainerBootstrap + def provideInitContainerBundle(maybeSubmittedResourceIds: Option[SubmittedResourceIds], + uris: Iterable[String]): Option[InitContainerBundle] } private[spark] class DriverInitContainerComponentsProviderImpl( @@ -105,9 +105,8 @@ private[spark] class DriverInitContainerComponentsProviderImpl( private val dockerImagePullPolicy = sparkConf.get(DOCKER_IMAGE_PULL_POLICY) private val downloadTimeoutMinutes = sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT) - override def provideInitContainerConfigMapBuilder( - maybeSubmittedResourceIds: Option[SubmittedResourceIds]) - : SparkInitContainerConfigMapBuilder = { + private def provideInitContainerConfigMap( + maybeSubmittedResourceIds: Option[SubmittedResourceIds]): ConfigMap = { val submittedDependencyConfigPlugin = for { stagingServerUri <- maybeResourceStagingServerUri jarsResourceId <- maybeSubmittedResourceIds.map(_.jarsResourceId) @@ -136,7 +135,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( filesDownloadPath, configMapName, configMapKey, - submittedDependencyConfigPlugin) + submittedDependencyConfigPlugin).build() } override def provideContainerLocalizedFilesResolver(): ContainerLocalizedFilesResolver = { @@ -144,7 +143,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( sparkJars, sparkFiles, jarsDownloadPath, filesDownloadPath) } - override def provideExecutorInitContainerConfiguration(): ExecutorInitContainerConfiguration = { + private def provideExecutorInitContainerConfiguration(): ExecutorInitContainerConfiguration = { new ExecutorInitContainerConfigurationImpl( maybeSecretName, INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH, @@ -202,4 +201,16 @@ private[spark] class DriverInitContainerComponentsProviderImpl( configMapKey, resourceStagingServerSecretPlugin) } + + override def provideInitContainerBundle( + maybeSubmittedResourceIds: Option[SubmittedResourceIds], + uris: Iterable[String]): Option[InitContainerBundle] = { + val containerLocalizedFilesResolver = provideContainerLocalizedFilesResolver() + // Bypass init-containers if `spark.jars` and `spark.files` is empty or only has `local://` URIs + if (KubernetesFileUtils.getNonContainerLocalFiles(uris).nonEmpty) { + Some(InitContainerBundle(provideInitContainerConfigMap(maybeSubmittedResourceIds), + provideInitContainerBootstrap(), + provideExecutorInitContainerConfiguration())) + } else None + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerBundle.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerBundle.scala new file mode 100644 index 0000000000000..ba44f794d5811 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerBundle.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit + +import io.fabric8.kubernetes.api.model.ConfigMap + +import org.apache.spark.deploy.kubernetes.{SparkPodInitContainerBootstrap} + +case class InitContainerBundle( + sparkInitContainerConfigMap: ConfigMap, + sparkPodInitContainerBootstrap: SparkPodInitContainerBootstrap, + executorInitContainerConfiguration: ExecutorInitContainerConfiguration) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala index 1b0af3fa9fb01..d688bf29808fb 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala @@ -33,6 +33,10 @@ private[spark] object KubernetesFileUtils { filterUriStringsByScheme(uris, _ == "local") } + def getNonContainerLocalFiles(uris: Iterable[String]): Iterable[String] = { + filterUriStringsByScheme(uris, _ != "local") + } + def getOnlySubmitterLocalFiles(uris: Iterable[String]): Iterable[String] = { filterUriStringsByScheme(uris, _ == "file") } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala index 3945bef5bcfb8..8992a56e20c80 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala @@ -123,8 +123,6 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val CREDENTIALS_SET_CONF = "spark.kubernetes.driverCredentials.provided" private val CREDENTIALS_SET_ANNOTATION = "credentials-set" - @Mock - private var initContainerConfigMapBuilder: SparkInitContainerConfigMapBuilder = _ @Mock private var containerLocalizedFilesResolver: ContainerLocalizedFilesResolver = _ @Mock @@ -173,12 +171,8 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { }) when(initContainerComponentsProvider.provideContainerLocalizedFilesResolver()) .thenReturn(containerLocalizedFilesResolver) - when(initContainerComponentsProvider.provideExecutorInitContainerConfiguration()) - .thenReturn(executorInitContainerConfiguration) when(submittedDependenciesSecretBuilder.build()) .thenReturn(INIT_CONTAINER_SECRET) - when(initContainerConfigMapBuilder.build()) - .thenReturn(INIT_CONTAINER_CONFIG_MAP) when(kubernetesClient.pods()).thenReturn(podOps) when(podOps.create(any())).thenAnswer(new Answer[Pod] { override def answer(invocation: InvocationOnMock): Pod = { @@ -214,9 +208,10 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { when(initContainerComponentsProvider .provideSubmittedDependenciesSecretBuilder(Some(SUBMITTED_RESOURCES.secrets()))) .thenReturn(Some(submittedDependenciesSecretBuilder)) - when(initContainerComponentsProvider - .provideInitContainerConfigMapBuilder(Some(SUBMITTED_RESOURCES.ids()))) - .thenReturn(initContainerConfigMapBuilder) + when(initContainerComponentsProvider.provideInitContainerBundle(Some(SUBMITTED_RESOURCES.ids()), + RESOLVED_SPARK_JARS ++ RESOLVED_SPARK_FILES)) + .thenReturn(Some(InitContainerBundle(INIT_CONTAINER_CONFIG_MAP, + initContainerBootstrap, executorInitContainerConfiguration))) runAndVerifyDriverPodHasCorrectProperties() val resourceListArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata]) verify(kubernetesClient).resourceList(resourceListArgumentCaptor.capture()) @@ -232,8 +227,6 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { verifyConfigMapWasCreated(createdResources) verify(submittedDependencyUploader).uploadJars() verify(submittedDependencyUploader).uploadFiles() - verify(initContainerComponentsProvider) - .provideInitContainerConfigMapBuilder(Some(SUBMITTED_RESOURCES.ids())) verify(initContainerComponentsProvider) .provideSubmittedDependenciesSecretBuilder(Some(SUBMITTED_RESOURCES.secrets())) } @@ -250,8 +243,6 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { verifyConfigMapWasCreated(createdResources) verify(submittedDependencyUploader, times(0)).uploadJars() verify(submittedDependencyUploader, times(0)).uploadFiles() - verify(initContainerComponentsProvider) - .provideInitContainerConfigMapBuilder(None) verify(initContainerComponentsProvider) .provideSubmittedDependenciesSecretBuilder(None) } @@ -321,9 +312,10 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { when(initContainerComponentsProvider .provideSubmittedDependenciesSecretBuilder(None)) .thenReturn(None) - when(initContainerComponentsProvider - .provideInitContainerConfigMapBuilder(None)) - .thenReturn(initContainerConfigMapBuilder) + when(initContainerComponentsProvider.provideInitContainerBundle(None, RESOLVED_SPARK_JARS ++ + RESOLVED_SPARK_FILES)) + .thenReturn(Some(InitContainerBundle(INIT_CONTAINER_CONFIG_MAP, + initContainerBootstrap, executorInitContainerConfiguration))) } private def expectationsForNoMountedCredentials(): Unit = { From 8b3248fcca89e9958a65c7519df18288c097c74e Mon Sep 17 00:00:00 2001 From: sandflee Date: Fri, 23 Jun 2017 15:48:51 +0800 Subject: [PATCH 141/156] Config for hard cpu limit on pods; default unlimited (#356) --- docs/running-on-kubernetes.md | 14 ++++++++++++++ .../apache/spark/deploy/kubernetes/config.scala | 12 ++++++++++++ .../spark/deploy/kubernetes/submit/Client.scala | 17 ++++++++++++++++- .../KubernetesClusterSchedulerBackend.scala | 17 ++++++++++++++++- 4 files changed, 58 insertions(+), 2 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 52d847b4420cf..3a50860f826c5 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -718,6 +718,20 @@ from the other deployment modes. See the [configuration page](configuration.html Docker image pull policy used when pulling Docker images with Kubernetes. + + spark.kubernetes.driver.limit.cores + (none) + + Specify the hard cpu limit for the driver pod + + + + spark.kubernetes.executor.limit.cores + (none) + + Specify the hard cpu limit for a single executor pod + + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 70ea19e44ef8c..e1c1ab9d459fc 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -485,6 +485,18 @@ package object config extends Logging { .stringConf .createOptional + private[spark] val KUBERNETES_DRIVER_LIMIT_CORES = + ConfigBuilder("spark.kubernetes.driver.limit.cores") + .doc("Specify the hard cpu limit for the driver pod") + .stringConf + .createOptional + + private[spark] val KUBERNETES_EXECUTOR_LIMIT_CORES = + ConfigBuilder("spark.kubernetes.executor.limit.cores") + .doc("Specify the hard cpu limit for a single executor pod") + .stringConf + .createOptional + private[spark] def resolveK8sMaster(rawMasterString: String): String = { if (!rawMasterString.startsWith("k8s://")) { throw new IllegalArgumentException("Master URL should start with k8s:// in Kubernetes mode.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index ac3a51e74f838..8220127eac449 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -64,6 +64,7 @@ private[spark] class Client( // CPU settings private val driverCpuCores = sparkConf.getOption("spark.driver.cores").getOrElse("1") + private val driverLimitCores = sparkConf.getOption(KUBERNETES_DRIVER_LIMIT_CORES.key) // Memory settings private val driverMemoryMb = sparkConf.get(org.apache.spark.internal.config.DRIVER_MEMORY) @@ -139,7 +140,6 @@ private[spark] class Client( .endEnv() .withNewResources() .addToRequests("cpu", driverCpuQuantity) - .addToLimits("cpu", driverCpuQuantity) .addToRequests("memory", driverMemoryQuantity) .addToLimits("memory", driverMemoryLimitQuantity) .endResources() @@ -156,6 +156,21 @@ private[spark] class Client( .addToContainers(driverContainer) .endSpec() + driverLimitCores.map { + limitCores => + val driverCpuLimitQuantity = new QuantityBuilder(false) + .withAmount(limitCores) + .build() + basePod + .editSpec() + .editFirstContainer() + .editResources + .addToLimits("cpu", driverCpuLimitQuantity) + .endResources() + .endContainer() + .endSpec() + } + val maybeSubmittedResourceIdentifiers = initContainerComponentsProvider .provideInitContainerSubmittedDependencyUploader(allDriverLabels) .map { uploader => diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 4165eb8cbd067..31cf929b94e8b 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -108,6 +108,7 @@ private[spark] class KubernetesClusterSchedulerBackend( private val executorMemoryWithOverhead = executorMemoryMb + memoryOverheadMb private val executorCores = conf.getOption("spark.executor.cores").getOrElse("1") + private val executorLimitCores = conf.getOption(KUBERNETES_EXECUTOR_LIMIT_CORES.key) private implicit val requestExecutorContext = ExecutionContext.fromExecutorService( ThreadUtils.newDaemonCachedThreadPool("kubernetes-executor-requests")) @@ -438,7 +439,6 @@ private[spark] class KubernetesClusterSchedulerBackend( .addToRequests("memory", executorMemoryQuantity) .addToLimits("memory", executorMemoryLimitQuantity) .addToRequests("cpu", executorCpuQuantity) - .addToLimits("cpu", executorCpuQuantity) .endResources() .addAllToEnv(requiredEnv.asJava) .addToEnv(executorExtraClasspathEnv.toSeq: _*) @@ -446,6 +446,21 @@ private[spark] class KubernetesClusterSchedulerBackend( .endContainer() .endSpec() + executorLimitCores.map { + limitCores => + val executorCpuLimitQuantity = new QuantityBuilder(false) + .withAmount(limitCores) + .build() + basePodBuilder + .editSpec() + .editFirstContainer() + .editResources + .addToLimits("cpu", executorCpuLimitQuantity) + .endResources() + .endContainer() + .endSpec() + } + val withMaybeShuffleConfigPodBuilder = shuffleServiceConfig .map { config => config.shuffleDirs.foldLeft(basePodBuilder) { (builder, dir) => From 6f6cfd62529d4928d9c72e436fd5bc0da6e65db9 Mon Sep 17 00:00:00 2001 From: Yinan Li Date: Thu, 29 Jun 2017 12:14:42 -0700 Subject: [PATCH 142/156] Allow number of executor cores to have fractional values (#361) This commit tries to solve issue #359 by allowing the `spark.executor.cores` configuration key to take fractional values, e.g., 0.5 or 1.5. The value is used to specify the cpu request when creating the executor pods, which is allowed to be fractional by Kubernetes. When the value is passed to the executor process through the environment variable `SPARK_EXECUTOR_CORES`, the value is rounded up to the closest integer as required by the `CoarseGrainedExecutorBackend`. Signed-off-by: Yinan Li --- .../kubernetes/KubernetesClusterSchedulerBackend.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 31cf929b94e8b..d880cee315c0d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -107,7 +107,7 @@ private[spark] class KubernetesClusterSchedulerBackend( MEMORY_OVERHEAD_MIN)) private val executorMemoryWithOverhead = executorMemoryMb + memoryOverheadMb - private val executorCores = conf.getOption("spark.executor.cores").getOrElse("1") + private val executorCores = conf.getDouble("spark.executor.cores", 1d) private val executorLimitCores = conf.getOption(KUBERNETES_EXECUTOR_LIMIT_CORES.key) private implicit val requestExecutorContext = ExecutionContext.fromExecutorService( @@ -377,7 +377,7 @@ private[spark] class KubernetesClusterSchedulerBackend( .withAmount(s"${executorMemoryWithOverhead}M") .build() val executorCpuQuantity = new QuantityBuilder(false) - .withAmount(executorCores) + .withAmount(executorCores.toString) .build() val executorExtraClasspathEnv = executorExtraClasspath.map { cp => new EnvVarBuilder() @@ -388,7 +388,8 @@ private[spark] class KubernetesClusterSchedulerBackend( val requiredEnv = Seq( (ENV_EXECUTOR_PORT, executorPort.toString), (ENV_DRIVER_URL, driverUrl), - (ENV_EXECUTOR_CORES, executorCores), + // Executor backend expects integral value for executor cores, so round it up to an int. + (ENV_EXECUTOR_CORES, math.ceil(executorCores).toInt.toString), (ENV_EXECUTOR_MEMORY, executorMemoryString), (ENV_APPLICATION_ID, applicationId()), (ENV_EXECUTOR_ID, executorId), From befcf0a30651d0335bb57c242a824e43748db33f Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Mon, 3 Jul 2017 12:25:59 -0700 Subject: [PATCH 143/156] Python Bindings for launching PySpark Jobs from the JVM (#364) * Adding PySpark Submit functionality. Launching Python from JVM * Addressing scala idioms related to PR351 * Removing extends Logging which was necessary for LogInfo * Refactored code to leverage the ContainerLocalizedFileResolver * Modified Unit tests so that they would pass * Modified Unit Test input to pass Unit Tests * Setup working environent for integration tests for PySpark * Comment out Python thread logic until Jenkins has python in Python * Modifying PythonExec to pass on Jenkins * Modifying python exec * Added unit tests to ClientV2 and refactored to include pyspark submission resources * Modified unit test check * Scalastyle * PR 348 file conflicts * Refactored unit tests and styles * further scala stylzing and logic * Modified unit tests to be more specific towards Class in question * Removed space delimiting for methods * Submission client redesign to use a step-based builder pattern. This change overhauls the underlying architecture of the submission client, but it is intended to entirely preserve existing behavior of Spark applications. Therefore users will find this to be an invisible change. The philosophy behind this design is to reconsider the breakdown of the submission process. It operates off the abstraction of "submission steps", which are transformation functions that take the previous state of the driver and return the new state of the driver. The driver's state includes its Spark configurations and the Kubernetes resources that will be used to deploy it. Such a refactor moves away from a features-first API design, which considers different containers to serve a set of features. The previous design, for example, had a container files resolver API object that returned different resolutions of the dependencies added by the user. However, it was up to the main Client to know how to intelligently invoke all of those APIs. Therefore the API surface area of the file resolver became untenably large and it was not intuitive of how it was to be used or extended. This design changes the encapsulation layout; every module is now responsible for changing the driver specification directly. An orchestrator builds the correct chain of steps and hands it to the client, which then calls it verbatim. The main client then makes any final modifications that put the different pieces of the driver together, particularly to attach the driver container itself to the pod and to apply the Spark configuration as command-line arguments. * Don't add the init-container step if all URIs are local. * Python arguments patch + tests + docs * Revert "Python arguments patch + tests + docs" This reverts commit 4533df2a03e2a8922988b0bd01691ad1f26e5d03. * Revert "Don't add the init-container step if all URIs are local." This reverts commit e103225d9ff54ca17692279cc6a7999f9b8c3265. * Revert "Submission client redesign to use a step-based builder pattern." This reverts commit 5499f6ddf9b42c0526f1dc053317afb38dc71294. * style changes * space for styling --- README.md | 1 + .../org/apache/spark/deploy/SparkSubmit.scala | 14 +- docs/running-on-kubernetes.md | 26 ++++ .../spark/deploy/kubernetes/constants.scala | 2 + .../deploy/kubernetes/submit/Client.scala | 77 ++++++---- .../ContainerLocalizedFilesResolver.scala | 39 +++-- ...riverInitContainerComponentsProvider.scala | 25 ++-- .../DriverPodKubernetesFileMounter.scala | 55 +++++++ .../submit/PythonSubmissionResources.scala | 75 ++++++++++ .../kubernetes/submit/ClientV2Suite.scala | 139 +++++++++++++++--- ...ContainerLocalizedFilesResolverSuite.scala | 24 +++ .../PythonSubmissionResourcesSuite.scala | 109 ++++++++++++++ .../src/main/docker/driver-py/Dockerfile | 48 ++++++ .../src/main/docker/executor-py/Dockerfile | 46 ++++++ .../src/main/docker/init-container/Dockerfile | 2 +- .../docker/resource-staging-server/Dockerfile | 3 +- .../main/docker/shuffle-service/Dockerfile | 2 +- .../kubernetes/integration-tests/pom.xml | 102 +++++++++++++ .../integration-tests/src/test/python/pi.py | 46 ++++++ .../integrationtest/KubernetesSuite.scala | 40 ++++- .../docker/SparkDockerImageBuilder.scala | 29 +++- 21 files changed, 831 insertions(+), 73 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesFileMounter.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResources.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResourcesSuite.scala create mode 100644 resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-py/Dockerfile create mode 100644 resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor-py/Dockerfile create mode 100755 resource-managers/kubernetes/integration-tests/src/test/python/pi.py diff --git a/README.md b/README.md index cf6b4fa80242b..cb747225a11d4 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ We've been asked by an Apache Spark Committer to work outside of the Apache infr This is a collaborative effort by several folks from different companies who are interested in seeing this feature be successful. Companies active in this project include (alphabetically): +- Bloomberg - Google - Haiwen - Hyperpilot diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 59ccf3af24ce7..9256a9ddd9960 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -335,8 +335,8 @@ object SparkSubmit { (clusterManager, deployMode) match { case (KUBERNETES, CLIENT) => printErrorAndExit("Client mode is currently not supported for Kubernetes.") - case (KUBERNETES, CLUSTER) if args.isPython || args.isR => - printErrorAndExit("Kubernetes does not currently support python or R applications.") + case (KUBERNETES, CLUSTER) if args.isR => + printErrorAndExit("Kubernetes does not currently support R applications.") case (STANDALONE, CLUSTER) if args.isPython => printErrorAndExit("Cluster deploy mode is currently not supported for python " + "applications on standalone clusters.") @@ -620,8 +620,14 @@ object SparkSubmit { if (isKubernetesCluster) { childMainClass = "org.apache.spark.deploy.kubernetes.submit.Client" - childArgs += args.primaryResource - childArgs += args.mainClass + if (args.isPython) { + childArgs += args.primaryResource + childArgs += "org.apache.spark.deploy.PythonRunner" + childArgs += args.pyFiles + } else { + childArgs += args.primaryResource + childArgs += args.mainClass + } childArgs ++= args.childArgs } diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 3a50860f826c5..2b4e9a6f96af1 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -180,6 +180,32 @@ The above mechanism using `kubectl proxy` can be used when we have authenticatio kubernetes-client library does not support. Authentication using X509 Client Certs and OAuth tokens is currently supported. +### Running PySpark + +Running PySpark on Kubernetes leverages the same spark-submit logic when launching on Yarn and Mesos. +Python files can be distributed by including, in the conf, `--py-files` + +Below is an example submission: + + +``` + bin/spark-submit \ + --deploy-mode cluster \ + --master k8s://http://127.0.0.1:8001 \ + --kubernetes-namespace default \ + --conf spark.executor.memory=500m \ + --conf spark.driver.memory=1G \ + --conf spark.driver.cores=1 \ + --conf spark.executor.cores=1 \ + --conf spark.executor.instances=1 \ + --conf spark.app.name=spark-pi \ + --conf spark.kubernetes.driver.docker.image=spark-driver-py:latest \ + --conf spark.kubernetes.executor.docker.image=spark-executor-py:latest \ + --conf spark.kubernetes.initcontainer.docker.image=spark-init:latest \ + --py-files local:///opt/spark/examples/src/main/python/sort.py \ + local:///opt/spark/examples/src/main/python/pi.py 100 +``` + ## Dynamic Executor Scaling Spark on Kubernetes supports Dynamic Allocation with cluster mode. This mode requires running diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index f2f1136e54fe4..92f051b2ac298 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -67,6 +67,8 @@ package object constants { private[spark] val ENV_DRIVER_ARGS = "SPARK_DRIVER_ARGS" private[spark] val ENV_DRIVER_JAVA_OPTS = "SPARK_DRIVER_JAVA_OPTS" private[spark] val ENV_MOUNTED_FILES_DIR = "SPARK_MOUNTED_FILES_DIR" + private[spark] val ENV_PYSPARK_FILES = "PYSPARK_FILES" + private[spark] val ENV_PYSPARK_PRIMARY = "PYSPARK_PRIMARY" // Bootstrapping dependencies with the init-container private[spark] val INIT_CONTAINER_ANNOTATION = "pod.beta.kubernetes.io/init-containers" diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index 8220127eac449..781ecbd6c5416 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -47,11 +47,11 @@ private[spark] class Client( appName: String, kubernetesResourceNamePrefix: String, kubernetesAppId: String, + mainAppResource: String, + pythonResource: Option[PythonSubmissionResourcesImpl], mainClass: String, sparkConf: SparkConf, appArgs: Array[String], - sparkJars: Seq[String], - sparkFiles: Seq[String], waitForAppCompletion: Boolean, kubernetesClient: KubernetesClient, initContainerComponentsProvider: DriverInitContainerComponentsProvider, @@ -82,9 +82,7 @@ private[spark] class Client( org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) def run(): Unit = { - validateNoDuplicateFileNames(sparkJars) - validateNoDuplicateFileNames(sparkFiles) - + val arguments = (pythonResource map {p => p.arguments}).getOrElse(appArgs) val driverCustomLabels = ConfigurationUtils.combinePrefixedKeyValuePairsWithDeprecatedConf( sparkConf, KUBERNETES_DRIVER_LABEL_PREFIX, @@ -136,7 +134,7 @@ private[spark] class Client( .endEnv() .addNewEnv() .withName(ENV_DRIVER_ARGS) - .withValue(appArgs.mkString(" ")) + .withValue(arguments.mkString(" ")) .endEnv() .withNewResources() .addToRequests("cpu", driverCpuQuantity) @@ -182,10 +180,13 @@ private[spark] class Client( .map(_.build()) val containerLocalizedFilesResolver = initContainerComponentsProvider - .provideContainerLocalizedFilesResolver() + .provideContainerLocalizedFilesResolver(mainAppResource) val resolvedSparkJars = containerLocalizedFilesResolver.resolveSubmittedSparkJars() val resolvedSparkFiles = containerLocalizedFilesResolver.resolveSubmittedSparkFiles() - + val resolvedPySparkFiles = containerLocalizedFilesResolver.resolveSubmittedPySparkFiles() + val resolvedPrimaryPySparkResource = pythonResource.map { + p => p.primaryPySparkResource(containerLocalizedFilesResolver) + }.getOrElse("") val initContainerBundler = initContainerComponentsProvider .provideInitContainerBundle(maybeSubmittedResourceIdentifiers.map(_.ids()), resolvedSparkJars ++ resolvedSparkFiles) @@ -221,7 +222,7 @@ private[spark] class Client( val resolvedDriverJavaOpts = resolvedSparkConf.getAll.map { case (confKey, confValue) => s"-D$confKey=$confValue" }.mkString(" ") + driverJavaOptions.map(" " + _).getOrElse("") - val resolvedDriverPod = podWithInitContainerAndMountedCreds.editSpec() + val resolvedDriverPodBuilder = podWithInitContainerAndMountedCreds.editSpec() .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainer.getName)) .addNewEnv() .withName(ENV_MOUNTED_CLASSPATH) @@ -233,7 +234,15 @@ private[spark] class Client( .endEnv() .endContainer() .endSpec() - .build() + val driverPodFileMounter = initContainerComponentsProvider.provideDriverPodFileMounter() + val resolvedDriverPod = pythonResource.map { + p => p.driverPodWithPySparkEnvs( + driverPodFileMounter, + resolvedPrimaryPySparkResource, + resolvedPySparkFiles.mkString(","), + driverContainer.getName, + resolvedDriverPodBuilder + )}.getOrElse(resolvedDriverPodBuilder.build()) Utils.tryWithResource( kubernetesClient .pods() @@ -271,17 +280,6 @@ private[spark] class Client( } } } - - private def validateNoDuplicateFileNames(allFiles: Seq[String]): Unit = { - val fileNamesToUris = allFiles.map { file => - (new File(Utils.resolveURI(file).getPath).getName, file) - } - fileNamesToUris.groupBy(_._1).foreach { - case (fileName, urisWithFileName) => - require(urisWithFileName.size == 1, "Cannot add multiple files with the same name, but" + - s" file name $fileName is shared by all of these URIs: $urisWithFileName") - } - } } private[spark] object Client { @@ -292,22 +290,34 @@ private[spark] object Client { val appArgs = args.drop(2) run(sparkConf, mainAppResource, mainClass, appArgs) } - def run( sparkConf: SparkConf, mainAppResource: String, mainClass: String, appArgs: Array[String]): Unit = { + val isPython = mainAppResource.endsWith(".py") + val pythonResource: Option[PythonSubmissionResourcesImpl] = + if (isPython) { + Option(new PythonSubmissionResourcesImpl(mainAppResource, appArgs)) + } else None + // Since you might need jars for SQL UDFs in PySpark + def sparkJarFilter(): Seq[String] = + pythonResource.map {p => p.sparkJars}.getOrElse( + Option(mainAppResource) + .filterNot(_ == SparkLauncher.NO_RESOURCE) + .toSeq) val sparkJars = sparkConf.getOption("spark.jars") .map(_.split(",")) - .getOrElse(Array.empty[String]) ++ - Option(mainAppResource) - .filterNot(_ == SparkLauncher.NO_RESOURCE) - .toSeq + .getOrElse(Array.empty[String]) ++ sparkJarFilter() val launchTime = System.currentTimeMillis val sparkFiles = sparkConf.getOption("spark.files") .map(_.split(",")) .getOrElse(Array.empty[String]) + val pySparkFilesOption = pythonResource.map {p => p.pySparkFiles} + validateNoDuplicateFileNames(sparkJars) + validateNoDuplicateFileNames(sparkFiles) + pySparkFilesOption.foreach {b => validateNoDuplicateFileNames(b)} + val pySparkFiles = pySparkFilesOption.getOrElse(Array.empty[String]) val appName = sparkConf.getOption("spark.app.name").getOrElse("spark") // The resource name prefix is derived from the application name, making it easy to connect the // names of the Kubernetes resources from e.g. Kubectl or the Kubernetes dashboard to the @@ -326,6 +336,7 @@ private[spark] object Client { namespace, sparkJars, sparkFiles, + pySparkFiles, sslOptionsProvider.getSslOptions) Utils.tryWithResource(SparkKubernetesClientFactory.createKubernetesClient( master, @@ -346,11 +357,11 @@ private[spark] object Client { appName, kubernetesResourceNamePrefix, kubernetesAppId, + mainAppResource, + pythonResource, mainClass, sparkConf, appArgs, - sparkJars, - sparkFiles, waitForAppCompletion, kubernetesClient, initContainerComponentsProvider, @@ -358,4 +369,14 @@ private[spark] object Client { loggingPodStatusWatcher).run() } } + private def validateNoDuplicateFileNames(allFiles: Seq[String]): Unit = { + val fileNamesToUris = allFiles.map { file => + (new File(Utils.resolveURI(file).getPath).getName, file) + } + fileNamesToUris.groupBy(_._1).foreach { + case (fileName, urisWithFileName) => + require(urisWithFileName.size == 1, "Cannot add multiple files with the same name, but" + + s" file name $fileName is shared by all of these URIs: $urisWithFileName") + } + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala index c635484c4c124..c31aa5f306bea 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala @@ -24,14 +24,19 @@ private[spark] trait ContainerLocalizedFilesResolver { def resolveSubmittedAndRemoteSparkJars(): Seq[String] def resolveSubmittedSparkJars(): Seq[String] def resolveSubmittedSparkFiles(): Seq[String] + def resolveSubmittedPySparkFiles(): Seq[String] + def resolvePrimaryResourceFile(): String } private[spark] class ContainerLocalizedFilesResolverImpl( sparkJars: Seq[String], sparkFiles: Seq[String], + pySparkFiles: Seq[String], + primaryPyFile: String, jarsDownloadPath: String, filesDownloadPath: String) extends ContainerLocalizedFilesResolver { + override def resolveSubmittedAndRemoteSparkJars(): Seq[String] = { sparkJars.map { jar => val jarUri = Utils.resolveURI(jar) @@ -53,16 +58,30 @@ private[spark] class ContainerLocalizedFilesResolverImpl( resolveSubmittedFiles(sparkFiles, filesDownloadPath) } - private def resolveSubmittedFiles(files: Seq[String], downloadPath: String): Seq[String] = { - files.map { file => - val fileUri = Utils.resolveURI(file) - Option(fileUri.getScheme).getOrElse("file") match { - case "file" => - val fileName = new File(fileUri.getPath).getName - s"$downloadPath/$fileName" - case _ => - file - } + override def resolveSubmittedPySparkFiles(): Seq[String] = { + def filterMainResource(x: String) = x match { + case `primaryPyFile` => None + case _ => Some(resolveFile(x, filesDownloadPath)) + } + pySparkFiles.flatMap(x => filterMainResource(x)) + } + + override def resolvePrimaryResourceFile(): String = { + Option(primaryPyFile).map(p => resolveFile(p, filesDownloadPath)).getOrElse("") + } + + private def resolveFile(file: String, downloadPath: String) = { + val fileUri = Utils.resolveURI(file) + Option(fileUri.getScheme).getOrElse("file") match { + case "file" => + val fileName = new File(fileUri.getPath).getName + s"$downloadPath/$fileName" + case _ => + file } } + + private def resolveSubmittedFiles(files: Seq[String], downloadPath: String): Seq[String] = { + files.map { file => resolveFile(file, downloadPath) } + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala index cc1837cce6736..6e185d2c069f6 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala @@ -32,13 +32,15 @@ import org.apache.spark.util.Utils */ private[spark] trait DriverInitContainerComponentsProvider { - def provideContainerLocalizedFilesResolver(): ContainerLocalizedFilesResolver + def provideContainerLocalizedFilesResolver( + mainAppResource: String): ContainerLocalizedFilesResolver def provideInitContainerSubmittedDependencyUploader( driverPodLabels: Map[String, String]): Option[SubmittedDependencyUploader] def provideSubmittedDependenciesSecretBuilder( maybeSubmittedResourceSecrets: Option[SubmittedResourceSecrets]) : Option[SubmittedDependencySecretBuilder] def provideInitContainerBootstrap(): SparkPodInitContainerBootstrap + def provideDriverPodFileMounter(): DriverPodKubernetesFileMounter def provideInitContainerBundle(maybeSubmittedResourceIds: Option[SubmittedResourceIds], uris: Iterable[String]): Option[InitContainerBundle] } @@ -49,6 +51,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( namespace: String, sparkJars: Seq[String], sparkFiles: Seq[String], + pySparkFiles: Seq[String], resourceStagingServerExternalSslOptions: SSLOptions) extends DriverInitContainerComponentsProvider { @@ -104,6 +107,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( private val initContainerImage = sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE) private val dockerImagePullPolicy = sparkConf.get(DOCKER_IMAGE_PULL_POLICY) private val downloadTimeoutMinutes = sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT) + private val pySparkSubmitted = KubernetesFileUtils.getOnlySubmitterLocalFiles(pySparkFiles) private def provideInitContainerConfigMap( maybeSubmittedResourceIds: Option[SubmittedResourceIds]): ConfigMap = { @@ -130,7 +134,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( } new SparkInitContainerConfigMapBuilderImpl( sparkJars, - sparkFiles, + sparkFiles ++ pySparkSubmitted, jarsDownloadPath, filesDownloadPath, configMapName, @@ -138,9 +142,10 @@ private[spark] class DriverInitContainerComponentsProviderImpl( submittedDependencyConfigPlugin).build() } - override def provideContainerLocalizedFilesResolver(): ContainerLocalizedFilesResolver = { + override def provideContainerLocalizedFilesResolver(mainAppResource: String) + : ContainerLocalizedFilesResolver = { new ContainerLocalizedFilesResolverImpl( - sparkJars, sparkFiles, jarsDownloadPath, filesDownloadPath) + sparkJars, sparkFiles, pySparkFiles, mainAppResource, jarsDownloadPath, filesDownloadPath) } private def provideExecutorInitContainerConfiguration(): ExecutorInitContainerConfiguration = { @@ -159,7 +164,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( namespace, stagingServerUri, sparkJars, - sparkFiles, + sparkFiles ++ pySparkSubmitted, resourceStagingServerExternalSslOptions, RetrofitClientFactoryImpl) } @@ -201,13 +206,15 @@ private[spark] class DriverInitContainerComponentsProviderImpl( configMapKey, resourceStagingServerSecretPlugin) } - + override def provideDriverPodFileMounter(): DriverPodKubernetesFileMounter = { + new DriverPodKubernetesFileMounterImpl() + } override def provideInitContainerBundle( maybeSubmittedResourceIds: Option[SubmittedResourceIds], uris: Iterable[String]): Option[InitContainerBundle] = { - val containerLocalizedFilesResolver = provideContainerLocalizedFilesResolver() - // Bypass init-containers if `spark.jars` and `spark.files` is empty or only has `local://` URIs - if (KubernetesFileUtils.getNonContainerLocalFiles(uris).nonEmpty) { + // Bypass init-containers if `spark.jars` and `spark.files` and '--py-rilfes' + // is empty or only has `local://` URIs + if ((KubernetesFileUtils.getNonContainerLocalFiles(uris) ++ pySparkSubmitted).nonEmpty) { Some(InitContainerBundle(provideInitContainerConfigMap(maybeSubmittedResourceIds), provideInitContainerBootstrap(), provideExecutorInitContainerConfiguration())) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesFileMounter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesFileMounter.scala new file mode 100644 index 0000000000000..cc0ef0eedb457 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesFileMounter.scala @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit + +import io.fabric8.kubernetes.api.model.{Container, PodBuilder} + +import org.apache.spark.deploy.kubernetes.constants._ + + /** + * Trait that is responsible for providing full file-paths dynamically after + * the filesDownloadPath has been defined. The file-names are then stored in the + * environmental variables in the driver-pod. + */ +private[spark] trait DriverPodKubernetesFileMounter { + def addPySparkFiles(primaryFile: String, pySparkFiles: String, + mainContainerName: String, originalPodSpec: PodBuilder) : PodBuilder +} + +private[spark] class DriverPodKubernetesFileMounterImpl() + extends DriverPodKubernetesFileMounter { + override def addPySparkFiles( + primaryFile: String, + pySparkFiles: String, + mainContainerName: String, + originalPodSpec: PodBuilder): PodBuilder = { + + originalPodSpec + .editSpec() + .editMatchingContainer(new ContainerNameEqualityPredicate(mainContainerName)) + .addNewEnv() + .withName(ENV_PYSPARK_PRIMARY) + .withValue(primaryFile) + .endEnv() + .addNewEnv() + .withName(ENV_PYSPARK_FILES) + .withValue(pySparkFiles) + .endEnv() + .endContainer() + .endSpec() + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResources.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResources.scala new file mode 100644 index 0000000000000..c61e930a2b97f --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResources.scala @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit + +import io.fabric8.kubernetes.api.model.{Pod, PodBuilder} + +private[spark] trait PythonSubmissionResources { + def sparkJars: Seq[String] + def pySparkFiles: Array[String] + def arguments: Array[String] + def primaryPySparkResource(containerLocalizedFilesResolver: ContainerLocalizedFilesResolver) + : String + def driverPodWithPySparkEnvs( + driverPodFileMounter: DriverPodKubernetesFileMounter, + resolvedPrimaryPySparkResource: String, + resolvedPySparkFiles: String, + driverContainerName: String, + driverPodBuilder: PodBuilder): Pod +} + +private[spark] class PythonSubmissionResourcesImpl( + private val mainAppResource: String, + private val appArgs: Array[String] ) extends PythonSubmissionResources { + + private val pyFiles: Array[String] = { + Option(appArgs(0)).map(a => mainAppResource +: a.split(",")) + .getOrElse(Array(mainAppResource)) + } + + override def sparkJars: Seq[String] = Seq.empty[String] + + override def pySparkFiles: Array[String] = pyFiles + + override def arguments: Array[String] = { + pyFiles.toList match { + case Nil => appArgs + case a :: b => a match { + case _ if a == mainAppResource && b == Nil => appArgs + case _ => appArgs.drop(1) + } + } + } + override def primaryPySparkResource( + containerLocalizedFilesResolver: ContainerLocalizedFilesResolver) : String = + containerLocalizedFilesResolver.resolvePrimaryResourceFile() + + override def driverPodWithPySparkEnvs( + driverPodFileMounter: DriverPodKubernetesFileMounter, + resolvedPrimaryPySparkResource: String, + resolvedPySparkFiles: String, + driverContainerName: String, + driverPodBuilder: PodBuilder) : Pod = { + driverPodFileMounter + .addPySparkFiles( + resolvedPrimaryPySparkResource, + resolvedPySparkFiles, + driverContainerName, + driverPodBuilder) + .build() + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala index 8992a56e20c80..a58a37691f4eb 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala @@ -18,7 +18,7 @@ package org.apache.spark.deploy.kubernetes.submit import java.io.File -import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, DoneablePod, HasMetadata, Pod, PodBuilder, PodList, Secret, SecretBuilder} +import io.fabric8.kubernetes.api.model._ import io.fabric8.kubernetes.client.{KubernetesClient, Watch} import io.fabric8.kubernetes.client.dsl.{MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource} import org.hamcrest.{BaseMatcher, Description} @@ -27,10 +27,10 @@ import org.mockito.Matchers.{any, anyVararg, argThat, eq => mockitoEq} import org.mockito.Mockito.{times, verify, when} import org.mockito.invocation.InvocationOnMock import org.mockito.stubbing.Answer -import org.scalatest.BeforeAndAfter +import org.scalatest.{BeforeAndAfter, Matchers} + import scala.collection.JavaConverters._ import scala.collection.mutable - import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite} import org.apache.spark.deploy.kubernetes.{KubernetesExternalShuffleService, KubernetesShuffleBlockHandler, SparkPodInitContainerBootstrap} import org.apache.spark.deploy.kubernetes.config._ @@ -63,6 +63,7 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val INIT_CONTAINER_SECRET_NAME = "init-container-secret" private val INIT_CONTAINER_SECRET_DATA = Map("secret-key" -> "secret-data") private val MAIN_CLASS = "org.apache.spark.examples.SparkPi" + private val PYSPARK_APP_ARGS = Array(null, "500") private val APP_ARGS = Array("3", "20") private val SPARK_JARS = Seq( "hdfs://localhost:9000/app/jars/jar1.jar", "file:///app/jars/jar2.jar") @@ -72,6 +73,20 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { "/var/data/spark-jars/jar1.jar", "/var/data/spark-jars/jar2.jar") private val SPARK_FILES = Seq( "hdfs://localhost:9000/app/files/file1.txt", "file:///app/files/file2.txt") + private val PYSPARK_FILES = Seq( + "hdfs://localhost:9000/app/files/file1.py", + "file:///app/files/file2.py", + "local:///app/files/file3.py", + "http://app/files/file4.py", + "file:///app/files/file5.py") + private val RESOLVED_PYSPARK_FILES = Seq( + "hdfs://localhost:9000/app/files/file1.py", + "/var/spark-data/spark-files/file2.py", + "local:///app/files/file3.py", + "http://app/files/file4.py") + private val PYSPARK_PRIMARY_FILE = "file:///app/files/file5.py" + private val RESOLVED_PYSPARK_PRIMARY_FILE = "/var/spark-data/spark-file/file5.py" + private val RESOLVED_SPARK_FILES = Seq( "hdfs://localhost:9000/app/files/file1.txt", "file:///var/data/spark-files/file2.txt") private val INIT_CONTAINER_SECRET = new SecretBuilder() @@ -138,7 +153,8 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { @Mock private var kubernetesClient: KubernetesClient = _ @Mock - private var podOps: MixedOperation[Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]] = _ + private var podOps: MixedOperation[ + Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]] = _ private type ResourceListOps = NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable[ HasMetadata, java.lang.Boolean] @Mock @@ -146,6 +162,8 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { @Mock private var credentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider = _ @Mock + private var fileMounter: DriverPodKubernetesFileMounter = _ + @Mock private var credentialsMounter: DriverPodKubernetesCredentialsMounter = _ @Mock private var loggingPodStatusWatcher: LoggingPodStatusWatcher = _ @@ -169,8 +187,10 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { .endMetadata() } }) - when(initContainerComponentsProvider.provideContainerLocalizedFilesResolver()) - .thenReturn(containerLocalizedFilesResolver) + when(initContainerComponentsProvider.provideContainerLocalizedFilesResolver( + any[String])).thenReturn(containerLocalizedFilesResolver) + when(initContainerComponentsProvider.provideDriverPodFileMounter()) + .thenReturn(fileMounter) when(submittedDependenciesSecretBuilder.build()) .thenReturn(INIT_CONTAINER_SECRET) when(kubernetesClient.pods()).thenReturn(podOps) @@ -178,14 +198,30 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { override def answer(invocation: InvocationOnMock): Pod = { new PodBuilder(invocation.getArgumentAt(0, classOf[Pod])) .editMetadata() - .withUid(DRIVER_POD_UID) + .withUid(DRIVER_POD_UID) .endMetadata() - .withKind(DRIVER_POD_KIND) + .withKind(DRIVER_POD_KIND) .withApiVersion(DRIVER_POD_API_VERSION) .build() } }) when(podOps.withName(s"$APP_RESOURCE_PREFIX-driver")).thenReturn(namedPodResource) + when(fileMounter.addPySparkFiles( + mockitoEq(RESOLVED_PYSPARK_PRIMARY_FILE), + mockitoEq(RESOLVED_PYSPARK_FILES.mkString(",")), + any[String], + any())).thenAnswer( new Answer[PodBuilder] { + override def answer(invocation: InvocationOnMock) : PodBuilder = { + invocation.getArgumentAt(3, classOf[PodBuilder]) + .editMetadata() + .withUid(DRIVER_POD_UID) + .withName(s"$APP_RESOURCE_PREFIX-driver") + .addToLabels("pyspark-test", "true") + .endMetadata() + .withKind(DRIVER_POD_KIND) + .withApiVersion(DRIVER_POD_API_VERSION) + } + }) when(namedPodResource.watch(loggingPodStatusWatcher)).thenReturn(watch) when(containerLocalizedFilesResolver.resolveSubmittedAndRemoteSparkJars()) .thenReturn(RESOLVED_SPARK_REMOTE_AND_LOCAL_JARS) @@ -193,6 +229,10 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { .thenReturn(RESOLVED_SPARK_JARS) when(containerLocalizedFilesResolver.resolveSubmittedSparkFiles()) .thenReturn(RESOLVED_SPARK_FILES) + when(containerLocalizedFilesResolver.resolvePrimaryResourceFile()) + .thenReturn(RESOLVED_PYSPARK_PRIMARY_FILE) + when(containerLocalizedFilesResolver.resolveSubmittedPySparkFiles()) + .thenReturn(RESOLVED_PYSPARK_FILES) when(executorInitContainerConfiguration.configureSparkConfForExecutorInitContainer(SPARK_CONF)) .thenReturn(SPARK_CONF_WITH_EXECUTOR_INIT_CONF) when(kubernetesClient.resourceList(anyVararg[HasMetadata]())).thenReturn(resourceListOps) @@ -208,9 +248,10 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { when(initContainerComponentsProvider .provideSubmittedDependenciesSecretBuilder(Some(SUBMITTED_RESOURCES.secrets()))) .thenReturn(Some(submittedDependenciesSecretBuilder)) - when(initContainerComponentsProvider.provideInitContainerBundle(Some(SUBMITTED_RESOURCES.ids()), - RESOLVED_SPARK_JARS ++ RESOLVED_SPARK_FILES)) - .thenReturn(Some(InitContainerBundle(INIT_CONTAINER_CONFIG_MAP, + when(initContainerComponentsProvider.provideInitContainerBundle(mockitoEq( + Option(SUBMITTED_RESOURCES.ids())), + mockitoEq(RESOLVED_SPARK_JARS ++ RESOLVED_SPARK_FILES))) + .thenReturn(Option(InitContainerBundle(INIT_CONTAINER_CONFIG_MAP, initContainerBootstrap, executorInitContainerConfiguration))) runAndVerifyDriverPodHasCorrectProperties() val resourceListArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata]) @@ -292,11 +333,11 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { APP_NAME, APP_RESOURCE_PREFIX, APP_ID, + "", + None, MAIN_CLASS, SPARK_CONF, APP_ARGS, - SPARK_JARS, - SPARK_FILES, true, kubernetesClient, initContainerComponentsProvider, @@ -305,6 +346,20 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { verify(loggingPodStatusWatcher).awaitCompletion() } + test("Mounting environmental variables correctly onto Driver Pod for PySpark Jobs") { + expectationsForNoMountedCredentials() + expectationsForNoDependencyUploader() + expectationsForNoSparkJarsOrFiles() + runAndVerifyDriverPodHasCorrectPySparkProperties() + } + + private def expectationsForNoSparkJarsOrFiles(): Unit = { + when(containerLocalizedFilesResolver.resolveSubmittedSparkFiles()) + .thenReturn(Nil) + when(containerLocalizedFilesResolver.resolveSubmittedSparkJars()) + .thenReturn(Nil) + } + private def expectationsForNoDependencyUploader(): Unit = { when(initContainerComponentsProvider .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) @@ -312,8 +367,8 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { when(initContainerComponentsProvider .provideSubmittedDependenciesSecretBuilder(None)) .thenReturn(None) - when(initContainerComponentsProvider.provideInitContainerBundle(None, RESOLVED_SPARK_JARS ++ - RESOLVED_SPARK_FILES)) + when(initContainerComponentsProvider.provideInitContainerBundle(mockitoEq(None), + mockitoEq(RESOLVED_SPARK_JARS ++ RESOLVED_SPARK_FILES))) .thenReturn(Some(InitContainerBundle(INIT_CONTAINER_CONFIG_MAP, initContainerBootstrap, executorInitContainerConfiguration))) } @@ -373,16 +428,28 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { } } + private def runAndVerifyDriverPodHasCorrectPySparkProperties(): Unit = { + when(initContainerComponentsProvider.provideContainerLocalizedFilesResolver( + mockitoEq(PYSPARK_PRIMARY_FILE))).thenReturn(containerLocalizedFilesResolver) + when(initContainerComponentsProvider.provideInitContainerBundle( + any[Option[SubmittedResourceIds]], any[Iterable[String]])) + .thenReturn(Some(InitContainerBundle(INIT_CONTAINER_CONFIG_MAP, + initContainerBootstrap, executorInitContainerConfiguration))) + runAndVerifyPySparkPodMatchesPredicate { p => + Option(p).exists(pod => containerHasCorrectPySparkEnvs(pod)) + } + } + private def runAndVerifyPodMatchesPredicate(pred: (Pod => Boolean)): Unit = { new Client( APP_NAME, APP_RESOURCE_PREFIX, APP_ID, + "", + None, MAIN_CLASS, SPARK_CONF, APP_ARGS, - SPARK_JARS, - SPARK_FILES, false, kubernetesClient, initContainerComponentsProvider, @@ -434,6 +501,15 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { expectedBasicEnvs.toSet.subsetOf(envs.toSet) } + private def containerHasCorrectPySparkEnvs(pod: Pod): Boolean = { + val driverPodLabels = + pod.getMetadata.getLabels.asScala.map(env => (env._1.toString, env._2.toString)) + val expectedBasicLabels = Map( + "pyspark-test" -> "true", + "spark-role" -> "driver") + expectedBasicLabels.toSet.subsetOf(driverPodLabels.toSet) + } + private def containerHasCorrectBasicContainerConfiguration(pod: Pod): Boolean = { val containers = pod.getSpec.getContainers.asScala containers.size == 1 && @@ -450,4 +526,33 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { BOOTSTRAPPED_POD_ANNOTATION -> TRUE) pod.getMetadata.getAnnotations.asScala == expectedAnnotations } + + private def runAndVerifyPySparkPodMatchesPredicate(pred: (Pod => Boolean)): Unit = { + new Client( + APP_NAME, + APP_RESOURCE_PREFIX, + APP_ID, + PYSPARK_PRIMARY_FILE, + Option(new PythonSubmissionResourcesImpl(PYSPARK_PRIMARY_FILE, PYSPARK_APP_ARGS)), + MAIN_CLASS, + SPARK_CONF, + PYSPARK_APP_ARGS, + false, + kubernetesClient, + initContainerComponentsProvider, + credentialsMounterProvider, + loggingPodStatusWatcher).run() + val podMatcher = new BaseMatcher[Pod] { + override def matches(o: scala.Any): Boolean = { + o match { + case p: Pod => pred(p) + case _ => false + } + } + override def describeTo(description: Description): Unit = {} + } + verify(podOps).create(argThat(podMatcher)) + } } + + diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala index ca5cd1fff9b74..7e51abcd7b8e0 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala @@ -29,11 +29,20 @@ class ContainerLocalizedFilesResolverSuite extends SparkFunSuite { "file:///app/files/file2.txt", "local:///app/files/file3.txt", "http://app/files/file4.txt") + private val PYSPARK_FILES = Seq( + "hdfs://localhost:9000/app/files/file1.py", + "file:///app/files/file2.py", + "local:///app/files/file3.py", + "http://app/files/file4.py", + "file:///app/files/file5.py") private val JARS_DOWNLOAD_PATH = "/var/data/spark-jars" private val FILES_DOWNLOAD_PATH = "/var/data/spark-files" + private val PYSPARK_PRIMARY_FILE = "file:///app/files/file5.py" private val localizedFilesResolver = new ContainerLocalizedFilesResolverImpl( SPARK_JARS, SPARK_FILES, + PYSPARK_FILES, + PYSPARK_PRIMARY_FILE, JARS_DOWNLOAD_PATH, FILES_DOWNLOAD_PATH) @@ -66,4 +75,19 @@ class ContainerLocalizedFilesResolverSuite extends SparkFunSuite { "http://app/files/file4.txt") assert(resolvedFiles === expectedResolvedFiles) } + test("Submitted PySpark files should resolve to the download path.") { + val resolvedPySparkFiles = localizedFilesResolver.resolveSubmittedPySparkFiles() + val expectedPySparkFiles = Seq( + "hdfs://localhost:9000/app/files/file1.py", + s"$FILES_DOWNLOAD_PATH/file2.py", + "local:///app/files/file3.py", + "http://app/files/file4.py") + assert(resolvedPySparkFiles === expectedPySparkFiles) + } + test("Submitted PySpark Primary resource should resolve to the download path.") { + val resolvedPySparkPrimary = + localizedFilesResolver.resolvePrimaryResourceFile() + val expectedPySparkPrimary = s"$FILES_DOWNLOAD_PATH/file5.py" + assert(resolvedPySparkPrimary === expectedPySparkPrimary) + } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResourcesSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResourcesSuite.scala new file mode 100644 index 0000000000000..9b60b7ef2b786 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResourcesSuite.scala @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit + +import org.apache.spark.{SSLOptions, SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.config._ + +import scala.collection.JavaConverters._ +import io.fabric8.kubernetes.api.model.{ContainerBuilder, Pod, PodBuilder} +import org.mockito.{Mock, MockitoAnnotations} +import org.mockito.Mockito.when +import org.scalatest.BeforeAndAfter + +private[spark] class PythonSubmissionResourcesSuite extends SparkFunSuite with BeforeAndAfter { + private val PYSPARK_FILES = Seq( + "hdfs://localhost:9000/app/files/file1.py", + "file:///app/files/file2.py", + "local:///app/files/file3.py", + "http://app/files/file4.py", + "file:///app/files/file5.py") + private val RESOLVED_PYSPARK_FILES = Seq( + "hdfs://localhost:9000/app/files/file1.py", + "/var/spark-data/spark-files/file2.py", + "local:///app/file`s/file3.py", + "http://app/files/file4.py") + private val PYSPARK_PRIMARY_FILE = "file:///app/files/file5.py" + private val RESOLVED_PYSPARK_PRIMARY_FILE = "/var/data/spark-files/file5.py" + + private val pyFilesResource = new PythonSubmissionResourcesImpl( + PYSPARK_PRIMARY_FILE, Array(PYSPARK_FILES.mkString(","), "500") + ) + private val pyResource = new PythonSubmissionResourcesImpl( + PYSPARK_PRIMARY_FILE, Array(null, "500") + ) + private val DRIVER_CONTAINER_NAME = "pyspark_container" + private val driverContainer = new ContainerBuilder() + .withName(DRIVER_CONTAINER_NAME) + .build() + private val basePodBuilder = new PodBuilder() + .withNewMetadata() + .withName("base_pod") + .endMetadata() + .withNewSpec() + .addToContainers(driverContainer) + .endSpec() + + @Mock + private var driverInitContainer: DriverInitContainerComponentsProviderImpl = _ + @Mock + private var localizedFileResolver: ContainerLocalizedFilesResolverImpl = _ + before { + MockitoAnnotations.initMocks(this) + when(driverInitContainer.provideDriverPodFileMounter()).thenReturn( + new DriverPodKubernetesFileMounterImpl() + ) + when(localizedFileResolver.resolvePrimaryResourceFile()).thenReturn( + RESOLVED_PYSPARK_PRIMARY_FILE) + } + test("Test with --py-files included") { + assert(pyFilesResource.sparkJars === Seq.empty[String]) + assert(pyFilesResource.pySparkFiles === + PYSPARK_PRIMARY_FILE +: PYSPARK_FILES) + assert(pyFilesResource.primaryPySparkResource(localizedFileResolver) === + RESOLVED_PYSPARK_PRIMARY_FILE) + val driverPod: Pod = pyFilesResource.driverPodWithPySparkEnvs( + driverInitContainer.provideDriverPodFileMounter(), + RESOLVED_PYSPARK_PRIMARY_FILE, + RESOLVED_PYSPARK_FILES.mkString(","), + DRIVER_CONTAINER_NAME, + basePodBuilder + ) + val driverContainer = driverPod.getSpec.getContainers.asScala.head + val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)).toMap + envs.get("PYSPARK_PRIMARY") foreach{ a => assert (a === RESOLVED_PYSPARK_PRIMARY_FILE) } + envs.get("PYSPARK_FILES") foreach{ a => assert (a === RESOLVED_PYSPARK_FILES.mkString(",")) } + } + + test("Test without --py-files") { + assert(pyResource.sparkJars === Seq.empty[String]) + assert(pyResource.pySparkFiles === Array(PYSPARK_PRIMARY_FILE)) + assert(pyResource.primaryPySparkResource(localizedFileResolver) === + RESOLVED_PYSPARK_PRIMARY_FILE) + val driverPod: Pod = pyResource.driverPodWithPySparkEnvs( + driverInitContainer.provideDriverPodFileMounter(), + RESOLVED_PYSPARK_PRIMARY_FILE, + "", + DRIVER_CONTAINER_NAME, + basePodBuilder + ) + val driverContainer = driverPod.getSpec.getContainers.asScala.head + val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)).toMap + envs.get("PYSPARK_PRIMARY") foreach{ a => assert (a === RESOLVED_PYSPARK_PRIMARY_FILE) } + envs.get("PYSPARK_FILES") foreach{ a => assert (a === "") } + } +} \ No newline at end of file diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-py/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-py/Dockerfile new file mode 100644 index 0000000000000..6dcc7511c0dd9 --- /dev/null +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-py/Dockerfile @@ -0,0 +1,48 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM spark-base + +# If this docker file is being used in the context of building your images from a Spark distribution, the docker build +# command should be invoked from the top level directory of the Spark distribution. E.g.: +# docker build -t spark-driver-py:latest -f dockerfiles/driver-py/Dockerfile . + +ADD examples /opt/spark/examples +ADD python /opt/spark/python + +RUN apk add --no-cache python && \ + python -m ensurepip && \ + rm -r /usr/lib/python*/ensurepip && \ + pip install --upgrade pip setuptools && \ + rm -r /root/.cache +# UNCOMMENT THE FOLLOWING TO START PIP INSTALLING PYTHON PACKAGES +# RUN apk add --update alpine-sdk python-dev +# RUN pip install numpy + +ENV PYTHON_VERSION 2.7.13 +ENV PYSPARK_PYTHON python +ENV PYSPARK_DRIVER_PYTHON python +ENV PYTHONPATH ${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.4-src.zip:${PYTHONPATH} + +CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ + if ! [ -z ${SPARK_MOUNTED_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_SUBMIT_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_SUBMIT_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_MOUNTED_FILES_DIR} ]; then cp -R "$SPARK_MOUNTED_FILES_DIR/." .; fi && \ + exec /sbin/tini -- ${JAVA_HOME}/bin/java $SPARK_DRIVER_JAVA_OPTS -cp $SPARK_CLASSPATH \ + -Xms$SPARK_DRIVER_MEMORY -Xmx$SPARK_DRIVER_MEMORY \ + $SPARK_DRIVER_CLASS $PYSPARK_PRIMARY $PYSPARK_FILES $SPARK_DRIVER_ARGS diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor-py/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor-py/Dockerfile new file mode 100644 index 0000000000000..7a65a4f879376 --- /dev/null +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor-py/Dockerfile @@ -0,0 +1,46 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM spark-base + +# If this docker file is being used in the context of building your images from a Spark distribution, the docker build +# command should be invoked from the top level directory of the Spark distribution. E.g.: +# docker build -t spark-executor-py:latest -f dockerfiles/executor-py/Dockerfile . + +ADD examples /opt/spark/examples +ADD python /opt/spark/python + +RUN apk add --no-cache python && \ + python -m ensurepip && \ + rm -r /usr/lib/python*/ensurepip && \ + pip install --upgrade pip setuptools && \ + rm -r /root/.cache +# UNCOMMENT THE FOLLOWING TO START PIP INSTALLING PYTHON PACKAGES +# RUN apk add --update alpine-sdk python-dev +# RUN pip install numpy + +ENV PYTHON_VERSION 2.7.13 +ENV PYSPARK_PYTHON python +ENV PYSPARK_DRIVER_PYTHON python +ENV PYTHONPATH ${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.4-src.zip:${PYTHONPATH} + +# TODO support spark.executor.extraClassPath +CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ + if ! [ -z ${SPARK_MOUNTED_CLASSPATH}+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_EXECUTOR_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXECUTOR_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_MOUNTED_FILES_DIR} ]; then cp -R "$SPARK_MOUNTED_FILES_DIR/." .; fi && \ + exec /sbin/tini -- ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp $SPARK_CLASSPATH org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP \ No newline at end of file diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile index 6bff06da12840..4bafe25e2608f 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile @@ -19,6 +19,6 @@ FROM spark-base # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: -# docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . +# docker build -t spark-init:latest -f dockerfiles/init-container/Dockerfile . ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.KubernetesSparkDependencyDownloadInitContainer" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile index c9a92fa1c5b62..9ca96be0f1a88 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile @@ -17,8 +17,9 @@ FROM spark-base + # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: -# docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . +# docker build -t spark-resource-staging-server:latest -f dockerfiles/resource-staging-server/Dockerfile . ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.ResourceStagingServer" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile index 7f4e2aa51b67d..ccb2f1a03d88c 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile @@ -19,7 +19,7 @@ FROM spark-base # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: -# docker build -t spark-shuffle:latest -f dockerfiles/shuffle/Dockerfile . +# docker build -t spark-shuffle:latest -f dockerfiles/shuffle-service/Dockerfile . COPY examples /opt/spark/examples diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index bbf4b02cdaaf9..cd3ccad0a2b22 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -220,6 +220,108 @@ + + copy-integration-python + pre-integration-test + + copy-resources + + + ${project.build.directory}/docker/python + + + ${project.parent.basedir}/python + + ${project.parent.basedir}/python/.egg + ${project.parent.basedir}/python/dist + + + + + + + copy-integration-data + pre-integration-test + + copy-resources + + + ${project.build.directory}/docker/data + + + ${project.parent.basedir}/data + true + + + + + + copy-integration-licenses + pre-integration-test + + copy-resources + + + ${project.build.directory}/docker/licenses + + + ${project.parent.basedir}/licenses + true + + + + + + copy-integration-examples-jar + pre-integration-test + + copy-resources + + + ${project.build.directory}/docker/examples/jars + + + ${project.parent.basedir}/examples/target/scala-2.11/jars + true + + + + + + copy-integration-examples-src + pre-integration-test + + copy-resources + + + ${project.build.directory}/docker/examples/src/main + + + ${project.parent.basedir}/examples/src/main + true + + + + + +
    + + org.apache.maven.plugins + maven-antrun-plugin + 1.6 + + + create-release-file + pre-integration-test + + run + + + + + + + diff --git a/resource-managers/kubernetes/integration-tests/src/test/python/pi.py b/resource-managers/kubernetes/integration-tests/src/test/python/pi.py new file mode 100755 index 0000000000000..e3f0c4aeef1b7 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/python/pi.py @@ -0,0 +1,46 @@ +from __future__ import print_function +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import sys +from random import random +from operator import add + +from pyspark.sql import SparkSession + + +if __name__ == "__main__": + """ + Usage: pi [partitions] + """ + spark = SparkSession\ + .builder\ + .appName("PythonPi")\ + .getOrCreate() + + partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2 + n = 100000 * partitions + + def f(_): + x = random() * 2 - 1 + y = random() * 2 - 1 + return 1 if x ** 2 + y ** 2 < 1 else 0 + + count = spark.sparkContext.parallelize(range(1, n + 1), partitions).map(f).reduce(add) + print("Pi is roughly %f" % (4.0 * count / n)) + + spark.stop() diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index e377f285eb9a6..d2082291eba22 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -72,6 +72,34 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { kubernetesTestComponents.deleteNamespace() } + test("Run PySpark Job on file from SUBMITTER") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + + launchStagingServer(SSLOptions(), None) + sparkConf + .set(DRIVER_DOCKER_IMAGE, + System.getProperty("spark.docker.test.driverImage", "spark-driver-py:latest")) + .set(EXECUTOR_DOCKER_IMAGE, + System.getProperty("spark.docker.test.executorImage", "spark-executor-py:latest")) + + runPySparkPiAndVerifyCompletion( + PYSPARK_PI_SUBMITTER_LOCAL_FILE_LOCATION) + } + + test("Run PySpark Job on file from CONTAINER with spark.jar defined") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + + sparkConf.setJars(Seq(CONTAINER_LOCAL_HELPER_JAR_PATH)) + sparkConf + .set(DRIVER_DOCKER_IMAGE, + System.getProperty("spark.docker.test.driverImage", "spark-driver-py:latest")) + .set(EXECUTOR_DOCKER_IMAGE, + System.getProperty("spark.docker.test.executorImage", "spark-executor-py:latest")) + + runPySparkPiAndVerifyCompletion( + PYSPARK_PI_CONTAINER_LOCAL_FILE_LOCATION) + } + test("Simple submission test with the resource staging server.") { assume(testBackend.name == MINIKUBE_TEST_BACKEND) @@ -223,6 +251,13 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { appResource, SPARK_PI_MAIN_CLASS, "Pi is roughly 3", Array.empty[String]) } + private def runPySparkPiAndVerifyCompletion( + appResource: String): Unit = { + runSparkApplicationAndVerifyCompletion( + appResource, PYSPARK_PI_MAIN_CLASS, "Pi is roughly 3", + Array(null, "5")) + } + private def runSparkApplicationAndVerifyCompletion( appResource: String, mainClass: String, @@ -305,11 +340,14 @@ private[spark] object KubernetesSuite { s"integration-tests-jars/${EXAMPLES_JAR_FILE.getName}" val CONTAINER_LOCAL_HELPER_JAR_PATH = s"local:///opt/spark/examples/" + s"integration-tests-jars/${HELPER_JAR_FILE.getName}" - val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) val SPARK_PI_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + ".integrationtest.jobs.SparkPiWithInfiniteWait" + val PYSPARK_PI_MAIN_CLASS = "org.apache.spark.deploy.PythonRunner" + val PYSPARK_PI_CONTAINER_LOCAL_FILE_LOCATION = + "local:///opt/spark/examples/src/main/python/pi.py" + val PYSPARK_PI_SUBMITTER_LOCAL_FILE_LOCATION = "src/test/python/pi.py" val FILE_EXISTENCE_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + ".integrationtest.jobs.FileExistenceTest" val GROUP_BY_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala index 4db19478f44bc..e240fcf953f8c 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala @@ -16,21 +16,32 @@ */ package org.apache.spark.deploy.kubernetes.integrationtest.docker +import java.io.File import java.net.URI import java.nio.file.Paths +import scala.collection.JavaConverters._ + import com.spotify.docker.client.{DefaultDockerClient, DockerCertificates, LoggingBuildHandler} import org.apache.http.client.utils.URIBuilder import org.scalatest.concurrent.{Eventually, PatienceConfiguration} import org.scalatest.time.{Minutes, Seconds, Span} -private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, String]) { +import org.apache.spark.internal.Logging +import org.apache.spark.util.RedirectThread + + + +private[spark] class SparkDockerImageBuilder + (private val dockerEnv: Map[String, String]) extends Logging{ private val DOCKER_BUILD_PATH = Paths.get("target", "docker") // Dockerfile paths must be relative to the build path. private val BASE_DOCKER_FILE = "dockerfiles/spark-base/Dockerfile" private val DRIVER_DOCKER_FILE = "dockerfiles/driver/Dockerfile" + private val DRIVERPY_DOCKER_FILE = "dockerfiles/driver-py/Dockerfile" private val EXECUTOR_DOCKER_FILE = "dockerfiles/executor/Dockerfile" + private val EXECUTORPY_DOCKER_FILE = "dockerfiles/executor-py/Dockerfile" private val SHUFFLE_SERVICE_DOCKER_FILE = "dockerfiles/shuffle-service/Dockerfile" private val INIT_CONTAINER_DOCKER_FILE = "dockerfiles/init-container/Dockerfile" private val STAGING_SERVER_DOCKER_FILE = "dockerfiles/resource-staging-server/Dockerfile" @@ -61,9 +72,25 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, def buildSparkDockerImages(): Unit = { Eventually.eventually(TIMEOUT, INTERVAL) { dockerClient.ping() } + // Building Python distribution environment + val pythonExec = sys.env.get("PYSPARK_DRIVER_PYTHON") + .orElse(sys.env.get("PYSPARK_PYTHON")) + .getOrElse("/usr/bin/python") + val builder = new ProcessBuilder( + Seq(pythonExec, "setup.py", "sdist").asJava) + builder.directory(new File(DOCKER_BUILD_PATH.toFile, "python")) + builder.redirectErrorStream(true) // Ugly but needed for stdout and stderr to synchronize + val process = builder.start() + new RedirectThread(process.getInputStream, System.out, "redirect output").start() + val exitCode = process.waitFor() + if (exitCode != 0) { + logInfo(s"exitCode: $exitCode") + } buildImage("spark-base", BASE_DOCKER_FILE) buildImage("spark-driver", DRIVER_DOCKER_FILE) + buildImage("spark-driver-py", DRIVERPY_DOCKER_FILE) buildImage("spark-executor", EXECUTOR_DOCKER_FILE) + buildImage("spark-executor-py", EXECUTORPY_DOCKER_FILE) buildImage("spark-shuffle", SHUFFLE_SERVICE_DOCKER_FILE) buildImage("spark-resource-staging-server", STAGING_SERVER_DOCKER_FILE) buildImage("spark-init", INIT_CONTAINER_DOCKER_FILE) From 0f4368f7f485a9ba5e73528242025ef4a3b23c04 Mon Sep 17 00:00:00 2001 From: mccheah Date: Fri, 14 Jul 2017 15:43:44 -0700 Subject: [PATCH 144/156] Submission client redesign to use a step-based builder pattern (#365) * Submission client redesign to use a step-based builder pattern. This change overhauls the underlying architecture of the submission client, but it is intended to entirely preserve existing behavior of Spark applications. Therefore users will find this to be an invisible change. The philosophy behind this design is to reconsider the breakdown of the submission process. It operates off the abstraction of "submission steps", which are transformation functions that take the previous state of the driver and return the new state of the driver. The driver's state includes its Spark configurations and the Kubernetes resources that will be used to deploy it. Such a refactor moves away from a features-first API design, which considers different containers to serve a set of features. The previous design, for example, had a container files resolver API object that returned different resolutions of the dependencies added by the user. However, it was up to the main Client to know how to intelligently invoke all of those APIs. Therefore the API surface area of the file resolver became untenably large and it was not intuitive of how it was to be used or extended. This design changes the encapsulation layout; every module is now responsible for changing the driver specification directly. An orchestrator builds the correct chain of steps and hands it to the client, which then calls it verbatim. The main client then makes any final modifications that put the different pieces of the driver together, particularly to attach the driver container itself to the pod and to apply the Spark configuration as command-line arguments. * Add a unit test for BaseSubmissionStep. * Add unit test for kubernetes credentials mounting. * Add unit test for InitContainerBootstrapStep. * unit tests for initContainer * Add a unit test for DependencyResolutionStep. * further modifications to InitContainer unit tests * Use of resolver in PythonStep and unit tests for PythonStep * refactoring of init unit tests and pythonstep resolver logic * Add unit test for KubernetesSubmissionStepsOrchestrator. * refactoring and addition of secret trustStore+Cert checks in a SubmissionStepSuite * added SparkPodInitContainerBootstrapSuite * Added InitContainerResourceStagingServerSecretPluginSuite * style in Unit tests * extremely minor style fix in variable naming * Address comments. * Rename class for consistency. * Attempt to make spacing consistent. Multi-line methods should have four-space indentation for arguments that aren't on the same line as the method call itself... but this is difficult to do consistently given how IDEs handle Scala multi-line indentation in most cases. --- .../org/apache/spark/deploy/SparkSubmit.scala | 15 +- ...nerResourceStagingServerSecretPlugin.scala | 38 +- .../PodWithDetachedInitContainer.scala | 24 + .../SparkPodInitContainerBootstrap.scala | 50 +- .../deploy/kubernetes/submit/Client.scala | 415 ++++--------- .../ContainerLocalizedFilesResolver.scala | 87 --- ...DriverConfigurationStepsOrchestrator.scala | 138 +++++ ...riverInitContainerComponentsProvider.scala | 223 ------- ...riverPodKubernetesCredentialsMounter.scala | 184 ------ ...KubernetesCredentialsMounterProvider.scala | 49 -- ...iverPodKubernetesCredentialsProvider.scala | 63 -- .../DriverPodKubernetesFileMounter.scala | 55 -- .../ExecutorInitContainerConfiguration.scala | 47 -- .../kubernetes/submit/InitContainerUtil.scala | 18 +- .../submit/KubernetesFileUtils.scala | 45 ++ ...inerBundle.scala => MainAppResource.scala} | 9 +- .../submit/PythonSubmissionResources.scala | 75 --- ...dDependencyInitContainerConfigPlugin.scala | 96 --- .../SubmittedDependencySecretBuilder.scala | 81 --- .../BaseDriverConfigurationStep.scala | 131 ++++ .../DependencyResolutionStep.scala | 66 +++ .../submitsteps/DriverConfigurationStep.scala | 28 + .../DriverKubernetesCredentialsStep.scala | 222 +++++++ .../InitContainerBootstrapStep.scala | 64 ++ .../submitsteps/KubernetesDriverSpec.scala | 47 ++ .../submit/submitsteps/PythonStep.scala | 46 ++ .../BaseInitContainerConfigurationStep.scala} | 41 +- .../InitContainerConfigurationStep.scala | 25 + ...tainerConfigurationStepsOrchestrator.scala | 131 ++++ .../initcontainer/InitContainerSpec.scala | 41 ++ ...ourcesInitContainerConfigurationStep.scala | 146 +++++ ...SparkDependencyDownloadInitContainer.scala | 1 - .../kubernetes/KubernetesClusterManager.scala | 9 +- .../KubernetesClusterSchedulerBackend.scala | 148 +++-- ...sourceStagingServerSecretPluginSuite.scala | 59 ++ .../SparkPodInitContainerBootstrapSuite.scala | 187 ++---- ...dencyInitContainerVolumesPluginSuite.scala | 60 -- .../kubernetes/submit/ClientSuite.scala | 226 +++++++ .../kubernetes/submit/ClientV2Suite.scala | 558 ------------------ ...ContainerLocalizedFilesResolverSuite.scala | 93 --- ...rConfigurationStepsOrchestratorSuite.scala | 95 +++ ...PodKubernetesCredentialsMounterSuite.scala | 171 ------ ...cutorInitContainerConfigurationSuite.scala | 56 -- .../PythonSubmissionResourcesSuite.scala | 109 ---- ...rkInitContainerConfigMapBuilderSuite.scala | 101 ---- ...ndencyInitContainerConfigPluginSuite.scala | 89 --- ...ubmittedDependencySecretBuilderSuite.scala | 109 ---- .../BaseDriverConfigurationStepSuite.scala | 106 ++++ .../DependencyResolutionStepSuite.scala | 79 +++ ...DriverKubernetesCredentialsStepSuite.scala | 152 +++++ .../submit/submitsteps/PythonStepSuite.scala | 76 +++ .../initContainerBootstrapStepSuite.scala | 159 +++++ ...eInitContainerConfigurationStepSuite.scala | 98 +++ ...rConfigurationStepsOrchestratorSuite.scala | 94 +++ ...ittedResourcesInitContainerStepSuite.scala | 252 ++++++++ .../integrationtest/KubernetesSuite.scala | 70 ++- 56 files changed, 2946 insertions(+), 2911 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/PodWithDetachedInitContainer.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverConfigurationStepsOrchestrator.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesFileMounter.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfiguration.scala rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{InitContainerBundle.scala => MainAppResource.scala} (71%) delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResources.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPlugin.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilder.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStep.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DependencyResolutionStep.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverConfigurationStep.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStep.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/InitContainerBootstrapStep.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/KubernetesDriverSpec.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/PythonStep.scala rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{SparkInitContainerConfigMapBuilder.scala => submitsteps/initcontainer/BaseInitContainerConfigurationStep.scala} (62%) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStep.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStepsOrchestrator.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerSpec.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/SubmittedResourcesInitContainerConfigurationStep.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPluginSuite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SubmittedDependencyInitContainerVolumesPluginSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientSuite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverConfigurationStepsOrchestratorSuite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfigurationSuite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResourcesSuite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilderSuite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPluginSuite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilderSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStepSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DependencyResolutionStepSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStepSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/PythonStepSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initContainerBootstrapStepSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/BaseInitContainerConfigurationStepSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStepsOrchestratorSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/SubmittedResourcesInitContainerStepSuite.scala diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 9256a9ddd9960..df50af13f71a3 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -621,14 +621,17 @@ object SparkSubmit { if (isKubernetesCluster) { childMainClass = "org.apache.spark.deploy.kubernetes.submit.Client" if (args.isPython) { - childArgs += args.primaryResource - childArgs += "org.apache.spark.deploy.PythonRunner" - childArgs += args.pyFiles + childArgs ++= Array("--primary-py-file", args.primaryResource) + childArgs ++= Array("--main-class", "org.apache.spark.deploy.PythonRunner") + childArgs ++= Array("--other-py-files", args.pyFiles) } else { - childArgs += args.primaryResource - childArgs += args.mainClass + childArgs ++= Array("--primary-java-resource", args.primaryResource) + childArgs ++= Array("--main-class", args.mainClass) + } + args.childArgs.foreach { arg => + childArgs += "--arg" + childArgs += arg } - childArgs ++= args.childArgs } // Load any properties specified through --conf and the default properties file diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPlugin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPlugin.scala index 45b881a8a3737..265b8f197a102 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPlugin.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPlugin.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.deploy.kubernetes -import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder, Secret} +import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, Pod, PodBuilder, Secret} import org.apache.spark.deploy.kubernetes.constants._ @@ -27,13 +27,13 @@ private[spark] trait InitContainerResourceStagingServerSecretPlugin { * from a resource staging server. */ def mountResourceStagingServerSecretIntoInitContainer( - initContainer: ContainerBuilder): ContainerBuilder + initContainer: Container): Container /** * Configure the pod to attach a Secret volume which hosts secret files allowing the * init-container to retrieve dependencies from the resource staging server. */ - def addResourceStagingServerSecretVolumeToPod(basePod: PodBuilder): PodBuilder + def addResourceStagingServerSecretVolumeToPod(basePod: Pod): Pod } private[spark] class InitContainerResourceStagingServerSecretPluginImpl( @@ -42,21 +42,25 @@ private[spark] class InitContainerResourceStagingServerSecretPluginImpl( extends InitContainerResourceStagingServerSecretPlugin { override def mountResourceStagingServerSecretIntoInitContainer( - initContainer: ContainerBuilder): ContainerBuilder = { - initContainer.addNewVolumeMount() - .withName(INIT_CONTAINER_SECRET_VOLUME_NAME) - .withMountPath(initContainerSecretMountPath) - .endVolumeMount() + initContainer: Container): Container = { + new ContainerBuilder(initContainer) + .addNewVolumeMount() + .withName(INIT_CONTAINER_SECRET_VOLUME_NAME) + .withMountPath(initContainerSecretMountPath) + .endVolumeMount() + .build() } - override def addResourceStagingServerSecretVolumeToPod(basePod: PodBuilder): PodBuilder = { - basePod.editSpec() - .addNewVolume() - .withName(INIT_CONTAINER_SECRET_VOLUME_NAME) - .withNewSecret() - .withSecretName(initContainerSecretName) - .endSecret() - .endVolume() - .endSpec() + override def addResourceStagingServerSecretVolumeToPod(basePod: Pod): Pod = { + new PodBuilder(basePod) + .editSpec() + .addNewVolume() + .withName(INIT_CONTAINER_SECRET_VOLUME_NAME) + .withNewSecret() + .withSecretName(initContainerSecretName) + .endSecret() + .endVolume() + .endSpec() + .build() } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/PodWithDetachedInitContainer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/PodWithDetachedInitContainer.scala new file mode 100644 index 0000000000000..36b1b07dc6bc4 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/PodWithDetachedInitContainer.scala @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import io.fabric8.kubernetes.api.model.{Container, Pod} + +private[spark] case class PodWithDetachedInitContainer( + pod: Pod, + initContainer: Container, + mainContainer: Container) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala index 87462dbde17a5..2df7ac7a204c4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala @@ -19,8 +19,11 @@ package org.apache.spark.deploy.kubernetes import io.fabric8.kubernetes.api.model.{ContainerBuilder, EmptyDirVolumeSource, PodBuilder, VolumeMount, VolumeMountBuilder} import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.submit.{ContainerNameEqualityPredicate, InitContainerUtil} +/** + * This is separated out from the init-container steps API because this component can be reused to + * set up the init-container for executors as well. + */ private[spark] trait SparkPodInitContainerBootstrap { /** * Bootstraps an init-container that downloads dependencies to be used by a main container. @@ -28,10 +31,13 @@ private[spark] trait SparkPodInitContainerBootstrap { * by a ConfigMap that was installed by some other component; that is, the implementation * here makes no assumptions about how the init-container is specifically configured. For * example, this class is unaware if the init-container is fetching remote dependencies or if - * it is fetching dependencies from a resource staging server. + * it is fetching dependencies from a resource staging server. Additionally, the container itself + * is not actually attached to the pod, but the init container is returned so it can be attached + * by InitContainerUtil after the caller has decided to make any changes to it. */ def bootstrapInitContainerAndVolumes( - mainContainerName: String, originalPodSpec: PodBuilder): PodBuilder + originalPodWithUnattachedInitContainer: PodWithDetachedInitContainer) + : PodWithDetachedInitContainer } private[spark] class SparkPodInitContainerBootstrapImpl( @@ -41,13 +47,11 @@ private[spark] class SparkPodInitContainerBootstrapImpl( filesDownloadPath: String, downloadTimeoutMinutes: Long, initContainerConfigMapName: String, - initContainerConfigMapKey: String, - resourceStagingServerSecretPlugin: Option[InitContainerResourceStagingServerSecretPlugin]) + initContainerConfigMapKey: String) extends SparkPodInitContainerBootstrap { override def bootstrapInitContainerAndVolumes( - mainContainerName: String, - originalPodSpec: PodBuilder): PodBuilder = { + podWithDetachedInitContainer: PodWithDetachedInitContainer): PodWithDetachedInitContainer = { val sharedVolumeMounts = Seq[VolumeMount]( new VolumeMountBuilder() .withName(INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME) @@ -58,7 +62,7 @@ private[spark] class SparkPodInitContainerBootstrapImpl( .withMountPath(filesDownloadPath) .build()) - val initContainer = new ContainerBuilder() + val initContainer = new ContainerBuilder(podWithDetachedInitContainer.initContainer) .withName(s"spark-init") .withImage(initContainerImage) .withImagePullPolicy(dockerImagePullPolicy) @@ -68,11 +72,8 @@ private[spark] class SparkPodInitContainerBootstrapImpl( .endVolumeMount() .addToVolumeMounts(sharedVolumeMounts: _*) .addToArgs(INIT_CONTAINER_PROPERTIES_FILE_PATH) - val resolvedInitContainer = resourceStagingServerSecretPlugin.map { plugin => - plugin.mountResourceStagingServerSecretIntoInitContainer(initContainer) - }.getOrElse(initContainer).build() - val podWithBasicVolumes = InitContainerUtil.appendInitContainer( - originalPodSpec, resolvedInitContainer) + .build() + val podWithBasicVolumes = new PodBuilder(podWithDetachedInitContainer.pod) .editSpec() .addNewVolume() .withName(INIT_CONTAINER_PROPERTIES_FILE_VOLUME) @@ -92,17 +93,20 @@ private[spark] class SparkPodInitContainerBootstrapImpl( .withName(INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME) .withEmptyDir(new EmptyDirVolumeSource()) .endVolume() - .editMatchingContainer(new ContainerNameEqualityPredicate(mainContainerName)) - .addToVolumeMounts(sharedVolumeMounts: _*) - .addNewEnv() - .withName(ENV_MOUNTED_FILES_DIR) - .withValue(filesDownloadPath) - .endEnv() - .endContainer() .endSpec() - resourceStagingServerSecretPlugin.map { plugin => - plugin.addResourceStagingServerSecretVolumeToPod(podWithBasicVolumes) - }.getOrElse(podWithBasicVolumes) + .build() + val mainContainerWithMountedFiles = new ContainerBuilder( + podWithDetachedInitContainer.mainContainer) + .addToVolumeMounts(sharedVolumeMounts: _*) + .addNewEnv() + .withName(ENV_MOUNTED_FILES_DIR) + .withValue(filesDownloadPath) + .endEnv() + .build() + PodWithDetachedInitContainer( + podWithBasicVolumes, + initContainer, + mainContainerWithMountedFiles) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index 781ecbd6c5416..98cd7afcd204d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -16,233 +16,99 @@ */ package org.apache.spark.deploy.kubernetes.submit -import java.io.File import java.util.{Collections, UUID} -import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, OwnerReferenceBuilder, PodBuilder, QuantityBuilder} +import io.fabric8.kubernetes.api.model.{ContainerBuilder, OwnerReferenceBuilder, PodBuilder} import io.fabric8.kubernetes.client.KubernetesClient -import scala.collection.JavaConverters._ +import scala.collection.mutable import org.apache.spark.SparkConf -import org.apache.spark.deploy.kubernetes.{ConfigurationUtils, SparkKubernetesClientFactory} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.kubernetes.ResourceStagingServerSslOptionsProviderImpl +import org.apache.spark.deploy.kubernetes.submit.submitsteps.{DriverConfigurationStep, KubernetesDriverSpec} +import org.apache.spark.deploy.kubernetes.SparkKubernetesClientFactory import org.apache.spark.internal.Logging -import org.apache.spark.internal.config.ConfigEntry -import org.apache.spark.launcher.SparkLauncher import org.apache.spark.util.Utils -/** - * Submission client for launching Spark applications on Kubernetes clusters. - * - * This class is responsible for instantiating Kubernetes resources that allow a Spark driver to - * run in a pod on the Kubernetes cluster with the Spark configurations specified by spark-submit. - * The API of this class makes it such that much of the specific behavior can be stubbed for - * testing; most of the detailed logic must be dependency-injected when constructing an instance - * of this client. Therefore the submission process is designed to be as modular as possible, - * where different steps of submission should be factored out into separate classes. - */ +private[spark] case class ClientArguments( + mainAppResource: MainAppResource, + otherPyFiles: Seq[String], + mainClass: String, + driverArgs: Array[String]) + +private[spark] object ClientArguments { + def fromCommandLineArgs(args: Array[String]): ClientArguments = { + var mainAppResource: Option[MainAppResource] = None + var otherPyFiles = Seq.empty[String] + var mainClass: Option[String] = None + val driverArgs = mutable.Buffer.empty[String] + args.sliding(2).toList.collect { + case Array("--primary-py-file", mainPyFile: String) => + mainAppResource = Some(PythonMainAppResource(mainPyFile)) + case Array("--primary-java-resource", primaryJavaResource: String) => + mainAppResource = Some(JavaMainAppResource(primaryJavaResource)) + case Array("--main-class", clazz: String) => + mainClass = Some(clazz) + case Array("--other-py-files", pyFiles: String) => + otherPyFiles = pyFiles.split(",") + case Array("--arg", arg: String) => + driverArgs += arg + case other => + throw new RuntimeException(s"Unknown arguments: $other") + } + require(mainAppResource.isDefined, + "Main app resource must be defined by either --primary-py-file or --primary-java-resource.") + require(mainClass.isDefined, "Main class must be specified via --main-class") + ClientArguments( + mainAppResource.get, + otherPyFiles, + mainClass.get, + driverArgs.toArray) + } +} + private[spark] class Client( - appName: String, - kubernetesResourceNamePrefix: String, - kubernetesAppId: String, - mainAppResource: String, - pythonResource: Option[PythonSubmissionResourcesImpl], - mainClass: String, - sparkConf: SparkConf, - appArgs: Array[String], - waitForAppCompletion: Boolean, + submissionSteps: Seq[DriverConfigurationStep], + submissionSparkConf: SparkConf, kubernetesClient: KubernetesClient, - initContainerComponentsProvider: DriverInitContainerComponentsProvider, - kubernetesCredentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider, + waitForAppCompletion: Boolean, + appName: String, loggingPodStatusWatcher: LoggingPodStatusWatcher) extends Logging { - private val kubernetesDriverPodName = sparkConf.get(KUBERNETES_DRIVER_POD_NAME) - .getOrElse(s"$kubernetesResourceNamePrefix-driver") - private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) - private val dockerImagePullPolicy = sparkConf.get(DOCKER_IMAGE_PULL_POLICY) - - // CPU settings - private val driverCpuCores = sparkConf.getOption("spark.driver.cores").getOrElse("1") - private val driverLimitCores = sparkConf.getOption(KUBERNETES_DRIVER_LIMIT_CORES.key) - // Memory settings - private val driverMemoryMb = sparkConf.get(org.apache.spark.internal.config.DRIVER_MEMORY) - private val memoryOverheadMb = sparkConf - .get(KUBERNETES_DRIVER_MEMORY_OVERHEAD) - .getOrElse(math.max((MEMORY_OVERHEAD_FACTOR * driverMemoryMb).toInt, - MEMORY_OVERHEAD_MIN)) - private val driverContainerMemoryWithOverhead = driverMemoryMb + memoryOverheadMb - private val customLabels = sparkConf.get(KUBERNETES_DRIVER_LABELS) - private val customAnnotations = sparkConf.get(KUBERNETES_DRIVER_ANNOTATIONS) - - private val driverExtraClasspath = sparkConf.get( - org.apache.spark.internal.config.DRIVER_CLASS_PATH) - private val driverJavaOptions = sparkConf.get( + private val driverJavaOptions = submissionSparkConf.get( org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) + /** + * Run command that initalizes a DriverSpec that will be updated after each + * DriverConfigurationStep in the sequence that is passed in. The final KubernetesDriverSpec + * will be used to build the Driver Container, Driver Pod, and Kubernetes Resources + */ def run(): Unit = { - val arguments = (pythonResource map {p => p.arguments}).getOrElse(appArgs) - val driverCustomLabels = ConfigurationUtils.combinePrefixedKeyValuePairsWithDeprecatedConf( - sparkConf, - KUBERNETES_DRIVER_LABEL_PREFIX, - KUBERNETES_DRIVER_LABELS, - "label") - require(!driverCustomLabels.contains(SPARK_APP_ID_LABEL), s"Label with key " + - s" $SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping" + - s" operations.") - - val driverCustomAnnotations = ConfigurationUtils.combinePrefixedKeyValuePairsWithDeprecatedConf( - sparkConf, - KUBERNETES_DRIVER_ANNOTATION_PREFIX, - KUBERNETES_DRIVER_ANNOTATIONS, - "annotation") - require(!driverCustomAnnotations.contains(SPARK_APP_NAME_ANNOTATION), - s"Annotation with key $SPARK_APP_NAME_ANNOTATION is not allowed as it is reserved for" + - s" Spark bookkeeping operations.") - val allDriverLabels = driverCustomLabels ++ Map( - SPARK_APP_ID_LABEL -> kubernetesAppId, - SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE) - - val driverExtraClasspathEnv = driverExtraClasspath.map { classPath => - new EnvVarBuilder() - .withName(ENV_SUBMIT_EXTRA_CLASSPATH) - .withValue(classPath) - .build() + var currentDriverSpec = KubernetesDriverSpec.initialSpec(submissionSparkConf) + // submissionSteps contain steps necessary to take, to resolve varying + // client arguments that are passed in, created by orchestrator + for (nextStep <- submissionSteps) { + currentDriverSpec = nextStep.configureDriver(currentDriverSpec) } - val driverCpuQuantity = new QuantityBuilder(false) - .withAmount(driverCpuCores) - .build() - val driverMemoryQuantity = new QuantityBuilder(false) - .withAmount(s"${driverMemoryMb}M") - .build() - val driverMemoryLimitQuantity = new QuantityBuilder(false) - .withAmount(s"${driverContainerMemoryWithOverhead}M") - .build() - val driverContainer = new ContainerBuilder() - .withName(DRIVER_CONTAINER_NAME) - .withImage(driverDockerImage) - .withImagePullPolicy(dockerImagePullPolicy) - .addToEnv(driverExtraClasspathEnv.toSeq: _*) + val resolvedDriverJavaOpts = currentDriverSpec + .driverSparkConf + // We don't need this anymore since we just set the JVM options on the environment + .remove(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) + .getAll + .map { + case (confKey, confValue) => s"-D$confKey=$confValue" + }.mkString(" ") + driverJavaOptions.map(" " + _).getOrElse("") + val resolvedDriverContainer = new ContainerBuilder(currentDriverSpec.driverContainer) .addNewEnv() - .withName(ENV_DRIVER_MEMORY) - .withValue(driverContainerMemoryWithOverhead + "m") + .withName(ENV_DRIVER_JAVA_OPTS) + .withValue(resolvedDriverJavaOpts) .endEnv() - .addNewEnv() - .withName(ENV_DRIVER_MAIN_CLASS) - .withValue(mainClass) - .endEnv() - .addNewEnv() - .withName(ENV_DRIVER_ARGS) - .withValue(arguments.mkString(" ")) - .endEnv() - .withNewResources() - .addToRequests("cpu", driverCpuQuantity) - .addToRequests("memory", driverMemoryQuantity) - .addToLimits("memory", driverMemoryLimitQuantity) - .endResources() .build() - val basePod = new PodBuilder() - .withNewMetadata() - .withName(kubernetesDriverPodName) - .addToLabels(allDriverLabels.asJava) - .addToAnnotations(driverCustomAnnotations.toMap.asJava) - .addToAnnotations(SPARK_APP_NAME_ANNOTATION, appName) - .endMetadata() - .withNewSpec() - .withRestartPolicy("Never") - .addToContainers(driverContainer) + val resolvedDriverPod = new PodBuilder(currentDriverSpec.driverPod) + .editSpec() + .addToContainers(resolvedDriverContainer) .endSpec() - - driverLimitCores.map { - limitCores => - val driverCpuLimitQuantity = new QuantityBuilder(false) - .withAmount(limitCores) - .build() - basePod - .editSpec() - .editFirstContainer() - .editResources - .addToLimits("cpu", driverCpuLimitQuantity) - .endResources() - .endContainer() - .endSpec() - } - - val maybeSubmittedResourceIdentifiers = initContainerComponentsProvider - .provideInitContainerSubmittedDependencyUploader(allDriverLabels) - .map { uploader => - SubmittedResources(uploader.uploadJars(), uploader.uploadFiles()) - } - val maybeSubmittedDependenciesSecret = initContainerComponentsProvider - .provideSubmittedDependenciesSecretBuilder( - maybeSubmittedResourceIdentifiers.map(_.secrets())) - .map(_.build()) - - val containerLocalizedFilesResolver = initContainerComponentsProvider - .provideContainerLocalizedFilesResolver(mainAppResource) - val resolvedSparkJars = containerLocalizedFilesResolver.resolveSubmittedSparkJars() - val resolvedSparkFiles = containerLocalizedFilesResolver.resolveSubmittedSparkFiles() - val resolvedPySparkFiles = containerLocalizedFilesResolver.resolveSubmittedPySparkFiles() - val resolvedPrimaryPySparkResource = pythonResource.map { - p => p.primaryPySparkResource(containerLocalizedFilesResolver) - }.getOrElse("") - val initContainerBundler = initContainerComponentsProvider - .provideInitContainerBundle(maybeSubmittedResourceIdentifiers.map(_.ids()), - resolvedSparkJars ++ resolvedSparkFiles) - - val podWithInitContainer = initContainerBundler.map( - _.sparkPodInitContainerBootstrap - .bootstrapInitContainerAndVolumes(driverContainer.getName, basePod)) - .getOrElse(basePod) - val sparkConfWithExecutorInit = initContainerBundler.map( - _.executorInitContainerConfiguration - .configureSparkConfForExecutorInitContainer(sparkConf)) - .getOrElse(sparkConf) - val credentialsMounter = kubernetesCredentialsMounterProvider - .getDriverPodKubernetesCredentialsMounter() - val credentialsSecret = credentialsMounter.createCredentialsSecret() - val podWithInitContainerAndMountedCreds = credentialsMounter.mountDriverKubernetesCredentials( - podWithInitContainer, driverContainer.getName, credentialsSecret) - val resolvedSparkConf = credentialsMounter.setDriverPodKubernetesCredentialLocations( - sparkConfWithExecutorInit) - if (resolvedSparkJars.nonEmpty) { - resolvedSparkConf.set("spark.jars", resolvedSparkJars.mkString(",")) - } - if (resolvedSparkFiles.nonEmpty) { - resolvedSparkConf.set("spark.files", resolvedSparkFiles.mkString(",")) - } - resolvedSparkConf.setIfMissing(KUBERNETES_DRIVER_POD_NAME, kubernetesDriverPodName) - resolvedSparkConf.set("spark.app.id", kubernetesAppId) - resolvedSparkConf.set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX, kubernetesResourceNamePrefix) - // We don't need this anymore since we just set the JVM options on the environment - resolvedSparkConf.remove(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) - val resolvedLocalClasspath = containerLocalizedFilesResolver - .resolveSubmittedAndRemoteSparkJars() - val resolvedDriverJavaOpts = resolvedSparkConf.getAll.map { - case (confKey, confValue) => s"-D$confKey=$confValue" - }.mkString(" ") + driverJavaOptions.map(" " + _).getOrElse("") - val resolvedDriverPodBuilder = podWithInitContainerAndMountedCreds.editSpec() - .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainer.getName)) - .addNewEnv() - .withName(ENV_MOUNTED_CLASSPATH) - .withValue(resolvedLocalClasspath.mkString(File.pathSeparator)) - .endEnv() - .addNewEnv() - .withName(ENV_DRIVER_JAVA_OPTS) - .withValue(resolvedDriverJavaOpts) - .endEnv() - .endContainer() - .endSpec() - val driverPodFileMounter = initContainerComponentsProvider.provideDriverPodFileMounter() - val resolvedDriverPod = pythonResource.map { - p => p.driverPodWithPySparkEnvs( - driverPodFileMounter, - resolvedPrimaryPySparkResource, - resolvedPySparkFiles.mkString(","), - driverContainer.getName, - resolvedDriverPodBuilder - )}.getOrElse(resolvedDriverPodBuilder.build()) + .build() Utils.tryWithResource( kubernetesClient .pods() @@ -250,22 +116,21 @@ private[spark] class Client( .watch(loggingPodStatusWatcher)) { _ => val createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod) try { - val driverOwnedResources = initContainerBundler.map( - _.sparkInitContainerConfigMap).toSeq ++ - maybeSubmittedDependenciesSecret.toSeq ++ - credentialsSecret.toSeq - val driverPodOwnerReference = new OwnerReferenceBuilder() - .withName(createdDriverPod.getMetadata.getName) - .withApiVersion(createdDriverPod.getApiVersion) - .withUid(createdDriverPod.getMetadata.getUid) - .withKind(createdDriverPod.getKind) - .withController(true) - .build() - driverOwnedResources.foreach { resource => - val originalMetadata = resource.getMetadata - originalMetadata.setOwnerReferences(Collections.singletonList(driverPodOwnerReference)) + if (currentDriverSpec.otherKubernetesResources.nonEmpty) { + val driverPodOwnerReference = new OwnerReferenceBuilder() + .withName(createdDriverPod.getMetadata.getName) + .withApiVersion(createdDriverPod.getApiVersion) + .withUid(createdDriverPod.getMetadata.getUid) + .withKind(createdDriverPod.getKind) + .withController(true) + .build() + currentDriverSpec.otherKubernetesResources.foreach { resource => + val originalMetadata = resource.getMetadata + originalMetadata.setOwnerReferences(Collections.singletonList(driverPodOwnerReference)) + } + val otherKubernetesResources = currentDriverSpec.otherKubernetesResources + kubernetesClient.resourceList(otherKubernetesResources: _*).createOrReplace() } - kubernetesClient.resourceList(driverOwnedResources: _*).createOrReplace() } catch { case e: Throwable => kubernetesClient.pods().delete(createdDriverPod) @@ -283,61 +148,26 @@ private[spark] class Client( } private[spark] object Client { - def main(args: Array[String]): Unit = { - val sparkConf = new SparkConf(true) - val mainAppResource = args(0) - val mainClass = args(1) - val appArgs = args.drop(2) - run(sparkConf, mainAppResource, mainClass, appArgs) - } - def run( - sparkConf: SparkConf, - mainAppResource: String, - mainClass: String, - appArgs: Array[String]): Unit = { - val isPython = mainAppResource.endsWith(".py") - val pythonResource: Option[PythonSubmissionResourcesImpl] = - if (isPython) { - Option(new PythonSubmissionResourcesImpl(mainAppResource, appArgs)) - } else None - // Since you might need jars for SQL UDFs in PySpark - def sparkJarFilter(): Seq[String] = - pythonResource.map {p => p.sparkJars}.getOrElse( - Option(mainAppResource) - .filterNot(_ == SparkLauncher.NO_RESOURCE) - .toSeq) - val sparkJars = sparkConf.getOption("spark.jars") - .map(_.split(",")) - .getOrElse(Array.empty[String]) ++ sparkJarFilter() - val launchTime = System.currentTimeMillis - val sparkFiles = sparkConf.getOption("spark.files") - .map(_.split(",")) - .getOrElse(Array.empty[String]) - val pySparkFilesOption = pythonResource.map {p => p.pySparkFiles} - validateNoDuplicateFileNames(sparkJars) - validateNoDuplicateFileNames(sparkFiles) - pySparkFilesOption.foreach {b => validateNoDuplicateFileNames(b)} - val pySparkFiles = pySparkFilesOption.getOrElse(Array.empty[String]) - val appName = sparkConf.getOption("spark.app.name").getOrElse("spark") - // The resource name prefix is derived from the application name, making it easy to connect the - // names of the Kubernetes resources from e.g. Kubectl or the Kubernetes dashboard to the - // application the user submitted. However, we can't use the application name in the label, as - // label values are considerably restrictive, e.g. must be no longer than 63 characters in - // length. So we generate a separate identifier for the app ID itself, and bookkeeping that - // requires finding "all pods for this application" should use the kubernetesAppId. - val kubernetesResourceNamePrefix = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") - val kubernetesAppId = s"spark-${UUID.randomUUID().toString.replaceAll("-", "")}" + def run(sparkConf: SparkConf, clientArguments: ClientArguments): Unit = { val namespace = sparkConf.get(KUBERNETES_NAMESPACE) + val kubernetesAppId = s"spark-${UUID.randomUUID().toString.replaceAll("-", "")}" + val launchTime = System.currentTimeMillis() + val waitForAppCompletion = sparkConf.get(WAIT_FOR_APP_COMPLETION) + val appName = sparkConf.getOption("spark.app.name").getOrElse("spark") val master = resolveK8sMaster(sparkConf.get("spark.master")) - val sslOptionsProvider = new ResourceStagingServerSslOptionsProviderImpl(sparkConf) - val initContainerComponentsProvider = new DriverInitContainerComponentsProviderImpl( - sparkConf, - kubernetesResourceNamePrefix, + val loggingInterval = Option(sparkConf.get(REPORT_INTERVAL)).filter( _ => waitForAppCompletion) + val loggingPodStatusWatcher = new LoggingPodStatusWatcherImpl( + kubernetesAppId, loggingInterval) + val configurationStepsOrchestrator = new DriverConfigurationStepsOrchestrator( namespace, - sparkJars, - sparkFiles, - pySparkFiles, - sslOptionsProvider.getSslOptions) + kubernetesAppId, + launchTime, + clientArguments.mainAppResource, + appName, + clientArguments.mainClass, + clientArguments.driverArgs, + clientArguments.otherPyFiles, + sparkConf) Utils.tryWithResource(SparkKubernetesClientFactory.createKubernetesClient( master, Some(namespace), @@ -345,38 +175,25 @@ private[spark] object Client { sparkConf, None, None)) { kubernetesClient => - val kubernetesCredentialsMounterProvider = - new DriverPodKubernetesCredentialsMounterProviderImpl( - sparkConf, kubernetesResourceNamePrefix) - val waitForAppCompletion = sparkConf.get(WAIT_FOR_APP_COMPLETION) - val loggingInterval = Option(sparkConf.get(REPORT_INTERVAL)) - .filter( _ => waitForAppCompletion) - val loggingPodStatusWatcher = new LoggingPodStatusWatcherImpl( - kubernetesResourceNamePrefix, loggingInterval) new Client( - appName, - kubernetesResourceNamePrefix, - kubernetesAppId, - mainAppResource, - pythonResource, - mainClass, + configurationStepsOrchestrator.getAllConfigurationSteps(), sparkConf, - appArgs, - waitForAppCompletion, kubernetesClient, - initContainerComponentsProvider, - kubernetesCredentialsMounterProvider, + waitForAppCompletion, + appName, loggingPodStatusWatcher).run() } } - private def validateNoDuplicateFileNames(allFiles: Seq[String]): Unit = { - val fileNamesToUris = allFiles.map { file => - (new File(Utils.resolveURI(file).getPath).getName, file) - } - fileNamesToUris.groupBy(_._1).foreach { - case (fileName, urisWithFileName) => - require(urisWithFileName.size == 1, "Cannot add multiple files with the same name, but" + - s" file name $fileName is shared by all of these URIs: $urisWithFileName") - } + + /** + * Entry point from SparkSubmit in spark-core + * + * @param args Array of strings that have interchanging values that will be + * parsed by ClientArguments with the identifiers that precede the values + */ + def main(args: Array[String]): Unit = { + val parsedArguments = ClientArguments.fromCommandLineArgs(args) + val sparkConf = new SparkConf() + run(sparkConf, parsedArguments) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala deleted file mode 100644 index c31aa5f306bea..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import java.io.File - -import org.apache.spark.util.Utils - -private[spark] trait ContainerLocalizedFilesResolver { - def resolveSubmittedAndRemoteSparkJars(): Seq[String] - def resolveSubmittedSparkJars(): Seq[String] - def resolveSubmittedSparkFiles(): Seq[String] - def resolveSubmittedPySparkFiles(): Seq[String] - def resolvePrimaryResourceFile(): String -} - -private[spark] class ContainerLocalizedFilesResolverImpl( - sparkJars: Seq[String], - sparkFiles: Seq[String], - pySparkFiles: Seq[String], - primaryPyFile: String, - jarsDownloadPath: String, - filesDownloadPath: String) extends ContainerLocalizedFilesResolver { - - - override def resolveSubmittedAndRemoteSparkJars(): Seq[String] = { - sparkJars.map { jar => - val jarUri = Utils.resolveURI(jar) - Option(jarUri.getScheme).getOrElse("file") match { - case "local" => - jarUri.getPath - case _ => - val jarFileName = new File(jarUri.getPath).getName - s"$jarsDownloadPath/$jarFileName" - } - } - } - - override def resolveSubmittedSparkJars(): Seq[String] = { - resolveSubmittedFiles(sparkJars, jarsDownloadPath) - } - - override def resolveSubmittedSparkFiles(): Seq[String] = { - resolveSubmittedFiles(sparkFiles, filesDownloadPath) - } - - override def resolveSubmittedPySparkFiles(): Seq[String] = { - def filterMainResource(x: String) = x match { - case `primaryPyFile` => None - case _ => Some(resolveFile(x, filesDownloadPath)) - } - pySparkFiles.flatMap(x => filterMainResource(x)) - } - - override def resolvePrimaryResourceFile(): String = { - Option(primaryPyFile).map(p => resolveFile(p, filesDownloadPath)).getOrElse("") - } - - private def resolveFile(file: String, downloadPath: String) = { - val fileUri = Utils.resolveURI(file) - Option(fileUri.getScheme).getOrElse("file") match { - case "file" => - val fileName = new File(fileUri.getPath).getName - s"$downloadPath/$fileName" - case _ => - file - } - } - - private def resolveSubmittedFiles(files: Seq[String], downloadPath: String): Seq[String] = { - files.map { file => resolveFile(file, downloadPath) } - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverConfigurationStepsOrchestrator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverConfigurationStepsOrchestrator.scala new file mode 100644 index 0000000000000..82abe55ac6989 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverConfigurationStepsOrchestrator.scala @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.ConfigurationUtils +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.submitsteps.{BaseDriverConfigurationStep, DependencyResolutionStep, DriverConfigurationStep, DriverKubernetesCredentialsStep, InitContainerBootstrapStep, PythonStep} +import org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer.InitContainerConfigurationStepsOrchestrator +import org.apache.spark.launcher.SparkLauncher +import org.apache.spark.util.Utils + +/** + * Constructs the complete list of driver configuration steps to run to deploy the Spark driver. + */ +private[spark] class DriverConfigurationStepsOrchestrator( + namespace: String, + kubernetesAppId: String, + launchTime: Long, + mainAppResource: MainAppResource, + appName: String, + mainClass: String, + appArgs: Array[String], + additionalPythonFiles: Seq[String], + submissionSparkConf: SparkConf) { + + // The resource name prefix is derived from the application name, making it easy to connect the + // names of the Kubernetes resources from e.g. kubectl or the Kubernetes dashboard to the + // application the user submitted. However, we can't use the application name in the label, as + // label values are considerably restrictive, e.g. must be no longer than 63 characters in + // length. So we generate a separate identifier for the app ID itself, and bookkeeping that + // requires finding "all pods for this application" should use the kubernetesAppId. + private val kubernetesResourceNamePrefix = + s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") + private val jarsDownloadPath = submissionSparkConf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION) + private val filesDownloadPath = submissionSparkConf.get(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION) + private val dockerImagePullPolicy = submissionSparkConf.get(DOCKER_IMAGE_PULL_POLICY) + private val initContainerConfigMapName = s"$kubernetesResourceNamePrefix-init-config" + + def getAllConfigurationSteps(): Seq[DriverConfigurationStep] = { + val additionalMainAppJar = mainAppResource match { + case JavaMainAppResource(resource) if resource != SparkLauncher.NO_RESOURCE => + Option(resource) + case _ => Option.empty + } + val additionalMainAppPythonFile = mainAppResource match { + case PythonMainAppResource(resource) if resource != SparkLauncher.NO_RESOURCE => + Option(resource) + case _ => Option.empty + } + val sparkJars = submissionSparkConf.getOption("spark.jars") + .map(_.split(",")) + .getOrElse(Array.empty[String]) ++ + additionalMainAppJar.toSeq + val sparkFiles = submissionSparkConf.getOption("spark.files") + .map(_.split(",")) + .getOrElse(Array.empty[String]) ++ + additionalMainAppPythonFile.toSeq ++ + additionalPythonFiles + val driverCustomLabels = ConfigurationUtils.combinePrefixedKeyValuePairsWithDeprecatedConf( + submissionSparkConf, + KUBERNETES_DRIVER_LABEL_PREFIX, + KUBERNETES_DRIVER_LABELS, + "label") + require(!driverCustomLabels.contains(SPARK_APP_ID_LABEL), s"Label with key " + + s" $SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping" + + s" operations.") + val allDriverLabels = driverCustomLabels ++ Map( + SPARK_APP_ID_LABEL -> kubernetesAppId, + SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE) + val initialSubmissionStep = new BaseDriverConfigurationStep( + kubernetesAppId, + kubernetesResourceNamePrefix, + allDriverLabels, + dockerImagePullPolicy, + appName, + mainClass, + appArgs, + submissionSparkConf) + val kubernetesCredentialsStep = new DriverKubernetesCredentialsStep( + submissionSparkConf, kubernetesResourceNamePrefix) + val pythonStep = mainAppResource match { + case PythonMainAppResource(mainPyResource) => + Option(new PythonStep(mainPyResource, additionalPythonFiles, filesDownloadPath)) + case _ => Option.empty[DriverConfigurationStep] + } + val initContainerBootstrapStep = if ((sparkJars ++ sparkFiles).exists { uri => + Option(Utils.resolveURI(uri).getScheme).getOrElse("file") != "local" + }) { + val initContainerConfigurationStepsOrchestrator = + new InitContainerConfigurationStepsOrchestrator( + namespace, + kubernetesResourceNamePrefix, + sparkJars, + sparkFiles, + jarsDownloadPath, + filesDownloadPath, + dockerImagePullPolicy, + allDriverLabels, + initContainerConfigMapName, + INIT_CONTAINER_CONFIG_MAP_KEY, + submissionSparkConf) + val initContainerConfigurationSteps = + initContainerConfigurationStepsOrchestrator.getAllConfigurationSteps() + Some(new InitContainerBootstrapStep(initContainerConfigurationSteps, + initContainerConfigMapName, + INIT_CONTAINER_CONFIG_MAP_KEY)) + } else { + Option.empty[DriverConfigurationStep] + } + val dependencyResolutionStep = new DependencyResolutionStep( + sparkJars, + sparkFiles, + jarsDownloadPath, + filesDownloadPath) + Seq( + initialSubmissionStep, + kubernetesCredentialsStep, + dependencyResolutionStep) ++ + initContainerBootstrapStep.toSeq ++ + pythonStep.toSeq + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala deleted file mode 100644 index 6e185d2c069f6..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import io.fabric8.kubernetes.api.model.ConfigMap - -import org.apache.spark.{SparkConf, SSLOptions} -import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, OptionRequirements, SparkPodInitContainerBootstrap, SparkPodInitContainerBootstrapImpl} -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.kubernetes.RetrofitClientFactoryImpl -import org.apache.spark.util.Utils - -/** - * Interface that wraps the provision of everything the submission client needs to set up the - * driver's init-container. This is all wrapped in the same place to ensure that related - * components are being constructed with consistent configurations with respect to one another. - */ -private[spark] trait DriverInitContainerComponentsProvider { - - def provideContainerLocalizedFilesResolver( - mainAppResource: String): ContainerLocalizedFilesResolver - def provideInitContainerSubmittedDependencyUploader( - driverPodLabels: Map[String, String]): Option[SubmittedDependencyUploader] - def provideSubmittedDependenciesSecretBuilder( - maybeSubmittedResourceSecrets: Option[SubmittedResourceSecrets]) - : Option[SubmittedDependencySecretBuilder] - def provideInitContainerBootstrap(): SparkPodInitContainerBootstrap - def provideDriverPodFileMounter(): DriverPodKubernetesFileMounter - def provideInitContainerBundle(maybeSubmittedResourceIds: Option[SubmittedResourceIds], - uris: Iterable[String]): Option[InitContainerBundle] -} - -private[spark] class DriverInitContainerComponentsProviderImpl( - sparkConf: SparkConf, - kubernetesResourceNamePrefix: String, - namespace: String, - sparkJars: Seq[String], - sparkFiles: Seq[String], - pySparkFiles: Seq[String], - resourceStagingServerExternalSslOptions: SSLOptions) - extends DriverInitContainerComponentsProvider { - - private val maybeResourceStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) - private val maybeResourceStagingServerInternalUri = - sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_URI) - private val maybeResourceStagingServerInternalTrustStore = - sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_FILE) - .orElse(sparkConf.get(RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE)) - private val maybeResourceStagingServerInternalTrustStorePassword = - sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_PASSWORD) - .orElse(sparkConf.get(RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD)) - private val maybeResourceStagingServerInternalTrustStoreType = - sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_TYPE) - .orElse(sparkConf.get(RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE)) - private val maybeResourceStagingServerInternalClientCert = - sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_CLIENT_CERT_PEM) - .orElse(sparkConf.get(RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM)) - private val resourceStagingServerInternalSslEnabled = - sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_SSL_ENABLED) - .orElse(sparkConf.get(RESOURCE_STAGING_SERVER_SSL_ENABLED)) - .getOrElse(false) - - OptionRequirements.requireNandDefined( - maybeResourceStagingServerInternalClientCert, - maybeResourceStagingServerInternalTrustStore, - "Cannot provide both a certificate file and a trustStore file for init-containers to" + - " use for contacting the resource staging server over TLS.") - - require(maybeResourceStagingServerInternalTrustStore.forall { trustStore => - Option(Utils.resolveURI(trustStore).getScheme).getOrElse("file") match { - case "file" | "local" => true - case _ => false - } - }, "TrustStore URI used for contacting the resource staging server from init containers must" + - " have no scheme, or scheme file://, or scheme local://.") - - require(maybeResourceStagingServerInternalClientCert.forall { trustStore => - Option(Utils.resolveURI(trustStore).getScheme).getOrElse("file") match { - case "file" | "local" => true - case _ => false - } - }, "Client cert file URI used for contacting the resource staging server from init containers" + - " must have no scheme, or scheme file://, or scheme local://.") - - private val jarsDownloadPath = sparkConf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION) - private val filesDownloadPath = sparkConf.get(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION) - private val maybeSecretName = maybeResourceStagingServerUri.map { _ => - s"$kubernetesResourceNamePrefix-init-secret" - } - private val configMapName = s"$kubernetesResourceNamePrefix-init-config" - private val configMapKey = s"$kubernetesResourceNamePrefix-init-config-key" - private val initContainerImage = sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE) - private val dockerImagePullPolicy = sparkConf.get(DOCKER_IMAGE_PULL_POLICY) - private val downloadTimeoutMinutes = sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT) - private val pySparkSubmitted = KubernetesFileUtils.getOnlySubmitterLocalFiles(pySparkFiles) - - private def provideInitContainerConfigMap( - maybeSubmittedResourceIds: Option[SubmittedResourceIds]): ConfigMap = { - val submittedDependencyConfigPlugin = for { - stagingServerUri <- maybeResourceStagingServerUri - jarsResourceId <- maybeSubmittedResourceIds.map(_.jarsResourceId) - filesResourceId <- maybeSubmittedResourceIds.map(_.filesResourceId) - } yield { - new SubmittedDependencyInitContainerConfigPluginImpl( - // Configure the init-container with the internal URI over the external URI. - maybeResourceStagingServerInternalUri.getOrElse(stagingServerUri), - jarsResourceId, - filesResourceId, - INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY, - INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY, - INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, - INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY, - resourceStagingServerInternalSslEnabled, - maybeResourceStagingServerInternalTrustStore, - maybeResourceStagingServerInternalClientCert, - maybeResourceStagingServerInternalTrustStorePassword, - maybeResourceStagingServerInternalTrustStoreType, - INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH) - } - new SparkInitContainerConfigMapBuilderImpl( - sparkJars, - sparkFiles ++ pySparkSubmitted, - jarsDownloadPath, - filesDownloadPath, - configMapName, - configMapKey, - submittedDependencyConfigPlugin).build() - } - - override def provideContainerLocalizedFilesResolver(mainAppResource: String) - : ContainerLocalizedFilesResolver = { - new ContainerLocalizedFilesResolverImpl( - sparkJars, sparkFiles, pySparkFiles, mainAppResource, jarsDownloadPath, filesDownloadPath) - } - - private def provideExecutorInitContainerConfiguration(): ExecutorInitContainerConfiguration = { - new ExecutorInitContainerConfigurationImpl( - maybeSecretName, - INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH, - configMapName, - configMapKey) - } - - override def provideInitContainerSubmittedDependencyUploader( - driverPodLabels: Map[String, String]): Option[SubmittedDependencyUploader] = { - maybeResourceStagingServerUri.map { stagingServerUri => - new SubmittedDependencyUploaderImpl( - driverPodLabels, - namespace, - stagingServerUri, - sparkJars, - sparkFiles ++ pySparkSubmitted, - resourceStagingServerExternalSslOptions, - RetrofitClientFactoryImpl) - } - } - - override def provideSubmittedDependenciesSecretBuilder( - maybeSubmittedResourceSecrets: Option[SubmittedResourceSecrets]) - : Option[SubmittedDependencySecretBuilder] = { - for { - secretName <- maybeSecretName - jarsResourceSecret <- maybeSubmittedResourceSecrets.map(_.jarsResourceSecret) - filesResourceSecret <- maybeSubmittedResourceSecrets.map(_.filesResourceSecret) - } yield { - new SubmittedDependencySecretBuilderImpl( - secretName, - jarsResourceSecret, - filesResourceSecret, - INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY, - INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY, - INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, - INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY, - maybeResourceStagingServerInternalTrustStore, - maybeResourceStagingServerInternalClientCert) - } - } - - override def provideInitContainerBootstrap(): SparkPodInitContainerBootstrap = { - val resourceStagingServerSecretPlugin = maybeSecretName.map { secret => - new InitContainerResourceStagingServerSecretPluginImpl( - secret, INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH) - } - new SparkPodInitContainerBootstrapImpl( - initContainerImage, - dockerImagePullPolicy, - jarsDownloadPath, - filesDownloadPath, - downloadTimeoutMinutes, - configMapName, - configMapKey, - resourceStagingServerSecretPlugin) - } - override def provideDriverPodFileMounter(): DriverPodKubernetesFileMounter = { - new DriverPodKubernetesFileMounterImpl() - } - override def provideInitContainerBundle( - maybeSubmittedResourceIds: Option[SubmittedResourceIds], - uris: Iterable[String]): Option[InitContainerBundle] = { - // Bypass init-containers if `spark.jars` and `spark.files` and '--py-rilfes' - // is empty or only has `local://` URIs - if ((KubernetesFileUtils.getNonContainerLocalFiles(uris) ++ pySparkSubmitted).nonEmpty) { - Some(InitContainerBundle(provideInitContainerConfigMap(maybeSubmittedResourceIds), - provideInitContainerBootstrap(), - provideExecutorInitContainerConfiguration())) - } else None - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala deleted file mode 100644 index 25e7c3b3ebd89..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import io.fabric8.kubernetes.api.model.{PodBuilder, Secret, SecretBuilder} -import scala.collection.JavaConverters._ -import scala.language.implicitConversions - -import org.apache.spark.SparkConf -import org.apache.spark.deploy.kubernetes.KubernetesCredentials -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ - -private[spark] trait DriverPodKubernetesCredentialsMounter { - - /** - * Set fields on the Spark configuration that indicate where the driver pod is - * to find its Kubernetes credentials for requesting executors. - */ - def setDriverPodKubernetesCredentialLocations(sparkConf: SparkConf): SparkConf - - /** - * Create the Kubernetes secret object that correspond to the driver's credentials - * that have to be created and mounted into the driver pod. The single Secret - * object contains all of the data entries for the driver pod's Kubernetes - * credentials. Returns empty if no secrets are to be mounted. - */ - def createCredentialsSecret(): Option[Secret] - - /** - * Mount any Kubernetes credentials from the submitting machine's disk into the driver pod. The - * secret that is passed in here should have been created from createCredentialsSecret so that - * the implementation does not need to hold its state. - */ - def mountDriverKubernetesCredentials( - originalPodSpec: PodBuilder, - driverContainerName: String, - credentialsSecret: Option[Secret]): PodBuilder -} - -private[spark] class DriverPodKubernetesCredentialsMounterImpl( - kubernetesAppId: String, - submitterLocalDriverPodKubernetesCredentials: KubernetesCredentials, - maybeUserSpecifiedMountedClientKeyFile: Option[String], - maybeUserSpecifiedMountedClientCertFile: Option[String], - maybeUserSpecifiedMountedOAuthTokenFile: Option[String], - maybeUserSpecifiedMountedCaCertFile: Option[String]) - extends DriverPodKubernetesCredentialsMounter { - - override def setDriverPodKubernetesCredentialLocations(sparkConf: SparkConf): SparkConf = { - val resolvedMountedClientKeyFile = resolveSecretLocation( - maybeUserSpecifiedMountedClientKeyFile, - submitterLocalDriverPodKubernetesCredentials.clientKeyDataBase64, - DRIVER_CREDENTIALS_CLIENT_KEY_PATH) - val resolvedMountedClientCertFile = resolveSecretLocation( - maybeUserSpecifiedMountedClientCertFile, - submitterLocalDriverPodKubernetesCredentials.clientCertDataBase64, - DRIVER_CREDENTIALS_CLIENT_CERT_PATH) - val resolvedMountedCaCertFile = resolveSecretLocation( - maybeUserSpecifiedMountedCaCertFile, - submitterLocalDriverPodKubernetesCredentials.caCertDataBase64, - DRIVER_CREDENTIALS_CA_CERT_PATH) - val resolvedMountedOAuthTokenFile = resolveSecretLocation( - maybeUserSpecifiedMountedOAuthTokenFile, - submitterLocalDriverPodKubernetesCredentials.oauthTokenBase64, - DRIVER_CREDENTIALS_OAUTH_TOKEN_PATH) - val sparkConfWithCredentialLocations = sparkConf.clone() - .setOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX", - resolvedMountedCaCertFile) - .setOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX", - resolvedMountedClientKeyFile) - .setOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX", - resolvedMountedClientCertFile) - .setOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX", - resolvedMountedOAuthTokenFile) - // Redact all OAuth token values - sparkConfWithCredentialLocations - .getAll - .filter(_._1.endsWith(OAUTH_TOKEN_CONF_SUFFIX)).map(_._1) - .foreach { - sparkConfWithCredentialLocations.set(_, "") - } - sparkConfWithCredentialLocations - } - - override def createCredentialsSecret(): Option[Secret] = { - val allSecretData = - resolveSecretData( - maybeUserSpecifiedMountedClientKeyFile, - submitterLocalDriverPodKubernetesCredentials.clientKeyDataBase64, - DRIVER_CREDENTIALS_CLIENT_KEY_SECRET_NAME) ++ - resolveSecretData( - maybeUserSpecifiedMountedClientCertFile, - submitterLocalDriverPodKubernetesCredentials.clientCertDataBase64, - DRIVER_CREDENTIALS_CLIENT_CERT_SECRET_NAME) ++ - resolveSecretData( - maybeUserSpecifiedMountedCaCertFile, - submitterLocalDriverPodKubernetesCredentials.caCertDataBase64, - DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME) ++ - resolveSecretData( - maybeUserSpecifiedMountedOAuthTokenFile, - submitterLocalDriverPodKubernetesCredentials.oauthTokenBase64, - DRIVER_CREDENTIALS_OAUTH_TOKEN_SECRET_NAME) - if (allSecretData.isEmpty) { - None - } else { - Some(new SecretBuilder() - .withNewMetadata().withName(s"$kubernetesAppId-kubernetes-credentials").endMetadata() - .withData(allSecretData.asJava) - .build()) - } - } - - override def mountDriverKubernetesCredentials( - originalPodSpec: PodBuilder, - driverContainerName: String, - credentialsSecret: Option[Secret]): PodBuilder = { - credentialsSecret.map { secret => - originalPodSpec.editSpec() - .addNewVolume() - .withName(DRIVER_CREDENTIALS_SECRET_VOLUME_NAME) - .withNewSecret().withSecretName(secret.getMetadata.getName).endSecret() - .endVolume() - .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainerName)) - .addNewVolumeMount() - .withName(DRIVER_CREDENTIALS_SECRET_VOLUME_NAME) - .withMountPath(DRIVER_CREDENTIALS_SECRETS_BASE_DIR) - .endVolumeMount() - .endContainer() - .endSpec() - }.getOrElse(originalPodSpec) - } - - private def resolveSecretLocation( - mountedUserSpecified: Option[String], - valueMountedFromSubmitter: Option[String], - mountedCanonicalLocation: String): Option[String] = { - mountedUserSpecified.orElse(valueMountedFromSubmitter.map( _ => { - mountedCanonicalLocation - })) - } - - private def resolveSecretData( - mountedUserSpecified: Option[String], - valueMountedFromSubmitter: Option[String], - secretName: String): Map[String, String] = { - mountedUserSpecified.map { _ => Map.empty[String, String]} - .getOrElse { - valueMountedFromSubmitter.map { valueBase64 => - Map(secretName -> valueBase64) - }.getOrElse(Map.empty[String, String]) - } - } - - private implicit def augmentSparkConf(sparkConf: SparkConf): OptionSettableSparkConf = { - new OptionSettableSparkConf(sparkConf) - } -} - -private class OptionSettableSparkConf(sparkConf: SparkConf) { - def setOption(configEntry: String, option: Option[String]): SparkConf = { - option.map( opt => { - sparkConf.set(configEntry, opt) - }).getOrElse(sparkConf) - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala deleted file mode 100644 index 913279198146a..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import org.apache.spark.SparkConf -import org.apache.spark.deploy.kubernetes.config._ - -private[spark] trait DriverPodKubernetesCredentialsMounterProvider { - - def getDriverPodKubernetesCredentialsMounter() - : DriverPodKubernetesCredentialsMounter -} - -private[spark] class DriverPodKubernetesCredentialsMounterProviderImpl( - sparkConf: SparkConf, - kubernetesAppId: String) - extends DriverPodKubernetesCredentialsMounterProvider { - - override def getDriverPodKubernetesCredentialsMounter() - : DriverPodKubernetesCredentialsMounter = { - val submitterLocalDriverPodKubernetesCredentials = - new DriverPodKubernetesCredentialsProvider(sparkConf).get() - new DriverPodKubernetesCredentialsMounterImpl( - kubernetesAppId, - submitterLocalDriverPodKubernetesCredentials, - sparkConf.getOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX"), - sparkConf.getOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX"), - sparkConf.getOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX"), - sparkConf.getOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX")) - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala deleted file mode 100644 index 41b0cf8ceaeab..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import java.io.File - -import com.google.common.base.Charsets -import com.google.common.io.{BaseEncoding, Files} - -import org.apache.spark.SparkConf -import org.apache.spark.deploy.kubernetes.KubernetesCredentials -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.internal.config.OptionalConfigEntry - -private[spark] class DriverPodKubernetesCredentialsProvider(sparkConf: SparkConf) { - - def get(): KubernetesCredentials = { - val oauthTokenBase64 = sparkConf - .getOption(s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$OAUTH_TOKEN_CONF_SUFFIX") - .map { token => - BaseEncoding.base64().encode(token.getBytes(Charsets.UTF_8)) - } - val caCertDataBase64 = safeFileConfToBase64( - s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX", - s"Driver CA cert file provided at %s does not exist or is not a file.") - val clientKeyDataBase64 = safeFileConfToBase64( - s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX", - s"Driver client key file provided at %s does not exist or is not a file.") - val clientCertDataBase64 = safeFileConfToBase64( - s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX", - s"Driver client cert file provided at %s does not exist or is not a file.") - KubernetesCredentials( - oauthTokenBase64 = oauthTokenBase64, - caCertDataBase64 = caCertDataBase64, - clientKeyDataBase64 = clientKeyDataBase64, - clientCertDataBase64 = clientCertDataBase64) - } - - private def safeFileConfToBase64( - conf: String, - fileNotFoundFormatString: String): Option[String] = { - sparkConf.getOption(conf) - .map(new File(_)) - .map { file => - require(file.isFile, String.format(fileNotFoundFormatString, file.getAbsolutePath)) - BaseEncoding.base64().encode(Files.toByteArray(file)) - } - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesFileMounter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesFileMounter.scala deleted file mode 100644 index cc0ef0eedb457..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesFileMounter.scala +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import io.fabric8.kubernetes.api.model.{Container, PodBuilder} - -import org.apache.spark.deploy.kubernetes.constants._ - - /** - * Trait that is responsible for providing full file-paths dynamically after - * the filesDownloadPath has been defined. The file-names are then stored in the - * environmental variables in the driver-pod. - */ -private[spark] trait DriverPodKubernetesFileMounter { - def addPySparkFiles(primaryFile: String, pySparkFiles: String, - mainContainerName: String, originalPodSpec: PodBuilder) : PodBuilder -} - -private[spark] class DriverPodKubernetesFileMounterImpl() - extends DriverPodKubernetesFileMounter { - override def addPySparkFiles( - primaryFile: String, - pySparkFiles: String, - mainContainerName: String, - originalPodSpec: PodBuilder): PodBuilder = { - - originalPodSpec - .editSpec() - .editMatchingContainer(new ContainerNameEqualityPredicate(mainContainerName)) - .addNewEnv() - .withName(ENV_PYSPARK_PRIMARY) - .withValue(primaryFile) - .endEnv() - .addNewEnv() - .withName(ENV_PYSPARK_FILES) - .withValue(pySparkFiles) - .endEnv() - .endContainer() - .endSpec() - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfiguration.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfiguration.scala deleted file mode 100644 index 2292365995d1f..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfiguration.scala +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import org.apache.spark.SparkConf -import org.apache.spark.deploy.kubernetes.config._ - -private[spark] trait ExecutorInitContainerConfiguration { - /** - * Provide the driver with configuration that allows it to configure executors to - * fetch resources in the same way the driver does. - */ - def configureSparkConfForExecutorInitContainer(originalSparkConf: SparkConf): SparkConf -} - -private[spark] class ExecutorInitContainerConfigurationImpl( - initContainerSecretName: Option[String], - initContainerSecretMountDir: String, - initContainerConfigMapName: String, - initContainerConfigMapKey: String) - extends ExecutorInitContainerConfiguration { - def configureSparkConfForExecutorInitContainer(originalSparkConf: SparkConf): SparkConf = { - val configuredSparkConf = originalSparkConf.clone() - .set(EXECUTOR_INIT_CONTAINER_CONFIG_MAP, - initContainerConfigMapName) - .set(EXECUTOR_INIT_CONTAINER_CONFIG_MAP_KEY, - initContainerConfigMapKey) - .set(EXECUTOR_INIT_CONTAINER_SECRET_MOUNT_DIR, initContainerSecretMountDir) - initContainerSecretName.map { secret => - configuredSparkConf.set(EXECUTOR_INIT_CONTAINER_SECRET, secret) - }.getOrElse(configuredSparkConf) - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerUtil.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerUtil.scala index 9b7faaa78a9aa..837ec0e8c867e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerUtil.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerUtil.scala @@ -18,19 +18,18 @@ package org.apache.spark.deploy.kubernetes.submit import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule -import io.fabric8.kubernetes.api.model.{Container, PodBuilder} +import io.fabric8.kubernetes.api.model.{Container, Pod, PodBuilder} import scala.collection.JavaConverters._ import org.apache.spark.deploy.kubernetes.constants._ private[spark] object InitContainerUtil { - private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + private val OBJECT_MAPPER = new ObjectMapper().registerModule(DefaultScalaModule) - def appendInitContainer( - originalPodSpec: PodBuilder, initContainer: Container): PodBuilder = { + def appendInitContainer(originalPodSpec: Pod, initContainer: Container): Pod = { val resolvedInitContainers = originalPodSpec - .editMetadata() + .getMetadata .getAnnotations .asScala .get(INIT_CONTAINER_ANNOTATION) @@ -40,10 +39,11 @@ private[spark] object InitContainerUtil { existingInitContainers ++ Seq(initContainer) }.getOrElse(Seq(initContainer)) val resolvedSerializedInitContainers = OBJECT_MAPPER.writeValueAsString(resolvedInitContainers) - originalPodSpec + new PodBuilder(originalPodSpec) .editMetadata() - .removeFromAnnotations(INIT_CONTAINER_ANNOTATION) - .addToAnnotations(INIT_CONTAINER_ANNOTATION, resolvedSerializedInitContainers) - .endMetadata() + .removeFromAnnotations(INIT_CONTAINER_ANNOTATION) + .addToAnnotations(INIT_CONTAINER_ANNOTATION, resolvedSerializedInitContainers) + .endMetadata() + .build() } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala index d688bf29808fb..ec591923f1472 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala @@ -16,6 +16,8 @@ */ package org.apache.spark.deploy.kubernetes.submit +import java.io.File + import org.apache.spark.util.Utils private[spark] object KubernetesFileUtils { @@ -48,4 +50,47 @@ private[spark] object KubernetesFileUtils { def getOnlyRemoteFiles(uris: Iterable[String]): Iterable[String] = { filterUriStringsByScheme(uris, scheme => scheme != "file" && scheme != "local") } + + /** + * For the collection of uris, resolves any files as follows: + * - Files with scheme file:// are resolved to the given download path + * - Files with scheme local:// resolve to just the path of the URI + * - Otherwise, the URI is returned as-is. + */ + def resolveSubmittedUris(fileUris: Iterable[String], fileDownloadPath: String) + : Iterable[String] = { + fileUris.map { uri => + val fileUri = Utils.resolveURI(uri) + val fileScheme = Option(fileUri.getScheme).getOrElse("file") + fileScheme match { + case "file" => + val fileName = new File(fileUri.getPath).getName + s"$fileDownloadPath/$fileName" + case "local" => + fileUri.getPath + case _ => uri + } + } + } + + /** + * If any file uri has any scheme other than local:// it is mapped as if the file + * was downloaded to the file download path. Otherwise, it is mapped to the path + * part of the URI. + */ + def resolveFilePaths(fileUris: Iterable[String], fileDownloadPath: String): Iterable[String] = { + fileUris.map { uri => + resolveFilePath(uri, fileDownloadPath) + } + } + + def resolveFilePath(uri: String, fileDownloadPath: String): String = { + val fileUri = Utils.resolveURI(uri) + if (Option(fileUri.getScheme).getOrElse("file") == "local") { + fileUri.getPath + } else { + val fileName = new File(fileUri.getPath).getName + s"$fileDownloadPath/$fileName" + } + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerBundle.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/MainAppResource.scala similarity index 71% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerBundle.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/MainAppResource.scala index ba44f794d5811..436d531a850ff 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerBundle.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/MainAppResource.scala @@ -16,11 +16,8 @@ */ package org.apache.spark.deploy.kubernetes.submit -import io.fabric8.kubernetes.api.model.ConfigMap +private[spark] sealed trait MainAppResource -import org.apache.spark.deploy.kubernetes.{SparkPodInitContainerBootstrap} +private[spark] case class PythonMainAppResource(primaryPyFile: String) extends MainAppResource -case class InitContainerBundle( - sparkInitContainerConfigMap: ConfigMap, - sparkPodInitContainerBootstrap: SparkPodInitContainerBootstrap, - executorInitContainerConfiguration: ExecutorInitContainerConfiguration) +private[spark] case class JavaMainAppResource(primaryResource: String) extends MainAppResource diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResources.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResources.scala deleted file mode 100644 index c61e930a2b97f..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResources.scala +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import io.fabric8.kubernetes.api.model.{Pod, PodBuilder} - -private[spark] trait PythonSubmissionResources { - def sparkJars: Seq[String] - def pySparkFiles: Array[String] - def arguments: Array[String] - def primaryPySparkResource(containerLocalizedFilesResolver: ContainerLocalizedFilesResolver) - : String - def driverPodWithPySparkEnvs( - driverPodFileMounter: DriverPodKubernetesFileMounter, - resolvedPrimaryPySparkResource: String, - resolvedPySparkFiles: String, - driverContainerName: String, - driverPodBuilder: PodBuilder): Pod -} - -private[spark] class PythonSubmissionResourcesImpl( - private val mainAppResource: String, - private val appArgs: Array[String] ) extends PythonSubmissionResources { - - private val pyFiles: Array[String] = { - Option(appArgs(0)).map(a => mainAppResource +: a.split(",")) - .getOrElse(Array(mainAppResource)) - } - - override def sparkJars: Seq[String] = Seq.empty[String] - - override def pySparkFiles: Array[String] = pyFiles - - override def arguments: Array[String] = { - pyFiles.toList match { - case Nil => appArgs - case a :: b => a match { - case _ if a == mainAppResource && b == Nil => appArgs - case _ => appArgs.drop(1) - } - } - } - override def primaryPySparkResource( - containerLocalizedFilesResolver: ContainerLocalizedFilesResolver) : String = - containerLocalizedFilesResolver.resolvePrimaryResourceFile() - - override def driverPodWithPySparkEnvs( - driverPodFileMounter: DriverPodKubernetesFileMounter, - resolvedPrimaryPySparkResource: String, - resolvedPySparkFiles: String, - driverContainerName: String, - driverPodBuilder: PodBuilder) : Pod = { - driverPodFileMounter - .addPySparkFiles( - resolvedPrimaryPySparkResource, - resolvedPySparkFiles, - driverContainerName, - driverPodBuilder) - .build() - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPlugin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPlugin.scala deleted file mode 100644 index 06d3648efb89f..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPlugin.scala +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import org.apache.spark.SparkException -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.internal.config.OptionalConfigEntry -import org.apache.spark.util.Utils - -private[spark] trait SubmittedDependencyInitContainerConfigPlugin { - /** - * Obtain configuration to fetch submitted dependencies from a resource staging server. - * This includes the resource identifiers for the jar and file bundles, as well as the - * remote location of the resource staging server, and the location of secret files for - * authenticating to the resource staging server. Note that the secret file paths here need to - * line up with the locations the secrets are mounted by - * SubmittedDependencyInitContainerVolumesPlugin; constants provide the consistency and - * convention for these to line up. - */ - def configurationsToFetchSubmittedDependencies(): Map[String, String] -} - -private[spark] class SubmittedDependencyInitContainerConfigPluginImpl( - internalResourceStagingServerUri: String, - jarsResourceId: String, - filesResourceId: String, - jarsSecretKey: String, - filesSecretKey: String, - trustStoreSecretKey: String, - clientCertSecretKey: String, - resourceStagingServerSslEnabled: Boolean, - maybeInternalTrustStoreUri: Option[String], - maybeInternalClientCertUri: Option[String], - maybeInternalTrustStorePassword: Option[String], - maybeInternalTrustStoreType: Option[String], - secretsVolumeMountPath: String) - extends SubmittedDependencyInitContainerConfigPlugin { - - override def configurationsToFetchSubmittedDependencies(): Map[String, String] = { - Map[String, String]( - RESOURCE_STAGING_SERVER_URI.key -> internalResourceStagingServerUri, - INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> jarsResourceId, - INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> - s"$secretsVolumeMountPath/$jarsSecretKey", - INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> filesResourceId, - INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> - s"$secretsVolumeMountPath/$filesSecretKey", - RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> resourceStagingServerSslEnabled.toString) ++ - resolveSecretPath( - maybeInternalTrustStoreUri, - trustStoreSecretKey, - RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE, - "TrustStore URI") ++ - resolveSecretPath( - maybeInternalClientCertUri, - clientCertSecretKey, - RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM, - "Client certificate URI") ++ - maybeInternalTrustStorePassword.map { password => - (RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key, password) - }.toMap ++ - maybeInternalTrustStoreType.map { storeType => - (RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key, storeType) - }.toMap - } - - private def resolveSecretPath( - maybeUri: Option[String], - secretKey: String, - configEntry: OptionalConfigEntry[String], - uriType: String): Map[String, String] = { - maybeUri.map(Utils.resolveURI).map { uri => - val resolvedPath = Option(uri.getScheme).getOrElse("file") match { - case "file" => s"$secretsVolumeMountPath/$secretKey" - case "local" => uri.getPath - case invalid => throw new SparkException(s"$uriType has invalid scheme $invalid must be" + - s" local://, file://, or empty.") - } - (configEntry.key, resolvedPath) - }.toMap - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilder.scala deleted file mode 100644 index 7850853df97e6..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilder.scala +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import java.io.File - -import com.google.common.base.Charsets -import com.google.common.io.{BaseEncoding, Files} -import io.fabric8.kubernetes.api.model.{Secret, SecretBuilder} -import scala.collection.JavaConverters._ - -import org.apache.spark.util.Utils - -private[spark] trait SubmittedDependencySecretBuilder { - /** - * Construct a Kubernetes secret bundle that init-containers can use to retrieve an - * application's dependencies. - */ - def build(): Secret -} - -private[spark] class SubmittedDependencySecretBuilderImpl( - secretName: String, - jarsResourceSecret: String, - filesResourceSecret: String, - jarsSecretKey: String, - filesSecretKey: String, - trustStoreSecretKey: String, - clientCertSecretKey: String, - internalTrustStoreUri: Option[String], - internalClientCertUri: Option[String]) - extends SubmittedDependencySecretBuilder { - - override def build(): Secret = { - val trustStoreBase64 = convertFileToBase64IfSubmitterLocal( - trustStoreSecretKey, internalTrustStoreUri) - val clientCertBase64 = convertFileToBase64IfSubmitterLocal( - clientCertSecretKey, internalClientCertUri) - val jarsSecretBase64 = BaseEncoding.base64().encode(jarsResourceSecret.getBytes(Charsets.UTF_8)) - val filesSecretBase64 = BaseEncoding.base64().encode( - filesResourceSecret.getBytes(Charsets.UTF_8)) - val secretData = Map( - jarsSecretKey -> jarsSecretBase64, - filesSecretKey -> filesSecretBase64) ++ - trustStoreBase64 ++ - clientCertBase64 - val kubernetesSecret = new SecretBuilder() - .withNewMetadata() - .withName(secretName) - .endMetadata() - .addToData(secretData.asJava) - .build() - kubernetesSecret - } - - private def convertFileToBase64IfSubmitterLocal(secretKey: String, secretUri: Option[String]) - : Map[String, String] = { - secretUri.filter { trustStore => - Option(Utils.resolveURI(trustStore).getScheme).getOrElse("file") == "file" - }.map { uri => - val file = new File(Utils.resolveURI(uri).getPath) - require(file.isFile, "Dependency server trustStore provided at" + - file.getAbsolutePath + " does not exist or is not a file.") - (secretKey, BaseEncoding.base64().encode(Files.toByteArray(file))) - }.toMap - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStep.scala new file mode 100644 index 0000000000000..022b5fccdc5e1 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStep.scala @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, PodBuilder, QuantityBuilder} +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.ConfigurationUtils +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ + +/** + * Represents the initial setup required for the driver. + */ +private[spark] class BaseDriverConfigurationStep( + kubernetesAppId: String, + kubernetesResourceNamePrefix: String, + driverLabels: Map[String, String], + dockerImagePullPolicy: String, + appName: String, + mainClass: String, + appArgs: Array[String], + submissionSparkConf: SparkConf) extends DriverConfigurationStep { + + private val kubernetesDriverPodName = submissionSparkConf.get(KUBERNETES_DRIVER_POD_NAME) + .getOrElse(s"$kubernetesResourceNamePrefix-driver") + private val driverExtraClasspath = submissionSparkConf.get( + org.apache.spark.internal.config.DRIVER_CLASS_PATH) + // CPU settings + private val driverCpuCores = submissionSparkConf.getOption("spark.driver.cores").getOrElse("1") + private val driverLimitCores = submissionSparkConf.get(KUBERNETES_DRIVER_LIMIT_CORES) + + // Memory settings + private val driverMemoryMb = submissionSparkConf.get( + org.apache.spark.internal.config.DRIVER_MEMORY) + private val memoryOverheadMb = submissionSparkConf + .get(KUBERNETES_DRIVER_MEMORY_OVERHEAD) + .getOrElse(math.max((MEMORY_OVERHEAD_FACTOR * driverMemoryMb).toInt, + MEMORY_OVERHEAD_MIN)) + private val driverContainerMemoryWithOverhead = driverMemoryMb + memoryOverheadMb + private val driverDockerImage = submissionSparkConf.get(DRIVER_DOCKER_IMAGE) + + override def configureDriver( + driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = { + val driverExtraClasspathEnv = driverExtraClasspath.map { classPath => + new EnvVarBuilder() + .withName(ENV_SUBMIT_EXTRA_CLASSPATH) + .withValue(classPath) + .build() + } + val driverCustomAnnotations = ConfigurationUtils + .combinePrefixedKeyValuePairsWithDeprecatedConf( + submissionSparkConf, + KUBERNETES_DRIVER_ANNOTATION_PREFIX, + KUBERNETES_DRIVER_ANNOTATIONS, + "annotation") + require(!driverCustomAnnotations.contains(SPARK_APP_NAME_ANNOTATION), + s"Annotation with key $SPARK_APP_NAME_ANNOTATION is not allowed as it is reserved for" + + s" Spark bookkeeping operations.") + val allDriverAnnotations = driverCustomAnnotations ++ Map(SPARK_APP_NAME_ANNOTATION -> appName) + val driverCpuQuantity = new QuantityBuilder(false) + .withAmount(driverCpuCores) + .build() + val driverMemoryQuantity = new QuantityBuilder(false) + .withAmount(s"${driverMemoryMb}M") + .build() + val driverMemoryLimitQuantity = new QuantityBuilder(false) + .withAmount(s"${driverContainerMemoryWithOverhead}M") + .build() + val maybeCpuLimitQuantity = driverLimitCores.map { limitCores => + ("cpu", new QuantityBuilder(false).withAmount(limitCores).build()) + } + val driverContainer = new ContainerBuilder(driverSpec.driverContainer) + .withName(DRIVER_CONTAINER_NAME) + .withImage(driverDockerImage) + .withImagePullPolicy(dockerImagePullPolicy) + .addToEnv(driverExtraClasspathEnv.toSeq: _*) + .addNewEnv() + .withName(ENV_DRIVER_MEMORY) + .withValue(driverContainerMemoryWithOverhead + "m") + .endEnv() + .addNewEnv() + .withName(ENV_DRIVER_MAIN_CLASS) + .withValue(mainClass) + .endEnv() + .addNewEnv() + .withName(ENV_DRIVER_ARGS) + .withValue(appArgs.mkString(" ")) + .endEnv() + .withNewResources() + .addToRequests("cpu", driverCpuQuantity) + .addToRequests("memory", driverMemoryQuantity) + .addToLimits("memory", driverMemoryLimitQuantity) + .addToLimits(maybeCpuLimitQuantity.toMap.asJava) + .endResources() + .build() + val baseDriverPod = new PodBuilder(driverSpec.driverPod) + .editOrNewMetadata() + .withName(kubernetesDriverPodName) + .addToLabels(driverLabels.asJava) + .addToAnnotations(allDriverAnnotations.asJava) + .endMetadata() + .withNewSpec() + .withRestartPolicy("Never") + .endSpec() + .build() + val resolvedSparkConf = driverSpec.driverSparkConf.clone() + .setIfMissing(KUBERNETES_DRIVER_POD_NAME, kubernetesDriverPodName) + .set("spark.app.id", kubernetesAppId) + .set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX, kubernetesResourceNamePrefix) + driverSpec.copy( + driverPod = baseDriverPod, + driverSparkConf = resolvedSparkConf, + driverContainer = driverContainer) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DependencyResolutionStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DependencyResolutionStep.scala new file mode 100644 index 0000000000000..dddc62410d6c9 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DependencyResolutionStep.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + + +import java.io.File + +import io.fabric8.kubernetes.api.model.ContainerBuilder + +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils +import org.apache.spark.util.Utils + +/** + * Step that configures the classpath, spark.jars, and spark.files for the driver given that + * the init-container will download files to the download paths and that the user may provide + * files with local:// schemes. Note that this is separate from the init-container bootstrap + * step because jars with local schemes can still be provided even if the init-container is + * not being used, and those URIs still need to be resolved. + */ +private[spark] class DependencyResolutionStep( + sparkJars: Seq[String], + sparkFiles: Seq[String], + jarsDownloadPath: String, + filesDownloadPath: String) extends DriverConfigurationStep { + + override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = { + val resolvedSparkJars = KubernetesFileUtils.resolveSubmittedUris(sparkJars, jarsDownloadPath) + val resolvedSparkFiles = KubernetesFileUtils.resolveSubmittedUris(sparkFiles, filesDownloadPath) + val sparkConfResolvedSparkDependencies = driverSpec.driverSparkConf.clone() + if (resolvedSparkJars.nonEmpty) { + sparkConfResolvedSparkDependencies.set("spark.jars", resolvedSparkJars.mkString(",")) + } + if (resolvedSparkFiles.nonEmpty) { + sparkConfResolvedSparkDependencies.set("spark.files", resolvedSparkFiles.mkString(",")) + } + val resolvedClasspath = KubernetesFileUtils.resolveFilePaths(sparkJars, jarsDownloadPath) + val driverContainerWithResolvedClasspath = if (resolvedClasspath.nonEmpty) { + new ContainerBuilder(driverSpec.driverContainer) + .addNewEnv() + .withName(ENV_MOUNTED_CLASSPATH) + .withValue(resolvedClasspath.mkString(File.pathSeparator)) + .endEnv() + .build() + } else { + driverSpec.driverContainer + } + driverSpec.copy( + driverContainer = driverContainerWithResolvedClasspath, + driverSparkConf = sparkConfResolvedSparkDependencies) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverConfigurationStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverConfigurationStep.scala new file mode 100644 index 0000000000000..8070e32371f94 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverConfigurationStep.scala @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +/** + * Represents a step in preparing the Kubernetes driver. + */ +private[spark] trait DriverConfigurationStep { + + /** + * Apply some transformation to the previous state of the driver to add a new feature to it. + */ + def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStep.scala new file mode 100644 index 0000000000000..0c58006130659 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStep.scala @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +import java.io.File +import java.nio.charset.StandardCharsets + +import com.google.common.io.{BaseEncoding, Files} +import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder, Secret, SecretBuilder} +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ + +/** + * Mounts Kubernetes credentials into the driver pod. The driver will use such mounted credentials + * to request executors. + */ +private[spark] class DriverKubernetesCredentialsStep( + submissionSparkConf: SparkConf, + kubernetesResourceNamePrefix: String) extends DriverConfigurationStep { + + private val maybeMountedOAuthTokenFile = submissionSparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX") + private val maybeMountedClientKeyFile = submissionSparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX") + private val maybeMountedClientCertFile = submissionSparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX") + private val maybeMountedCaCertFile = submissionSparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX") + + override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = { + val driverSparkConf = driverSpec.driverSparkConf.clone() + val oauthTokenBase64 = submissionSparkConf + .getOption(s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$OAUTH_TOKEN_CONF_SUFFIX") + .map { token => + BaseEncoding.base64().encode(token.getBytes(StandardCharsets.UTF_8)) + } + val caCertDataBase64 = safeFileConfToBase64( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX", + s"Driver CA cert file provided at %s does not exist or is not a file.") + val clientKeyDataBase64 = safeFileConfToBase64( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX", + s"Driver client key file provided at %s does not exist or is not a file.") + val clientCertDataBase64 = safeFileConfToBase64( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX", + s"Driver client cert file provided at %s does not exist or is not a file.") + val driverSparkConfWithCredentialsLocations = setDriverPodKubernetesCredentialLocations( + driverSparkConf, + oauthTokenBase64, + caCertDataBase64, + clientKeyDataBase64, + clientCertDataBase64) + val kubernetesCredentialsSecret = createCredentialsSecret( + oauthTokenBase64, + caCertDataBase64, + clientKeyDataBase64, + clientCertDataBase64) + val driverPodWithMountedKubernetesCredentials = kubernetesCredentialsSecret.map { secret => + new PodBuilder(driverSpec.driverPod) + .editOrNewSpec() + .addNewVolume() + .withName(DRIVER_CREDENTIALS_SECRET_VOLUME_NAME) + .withNewSecret().withSecretName(secret.getMetadata.getName).endSecret() + .endVolume() + .endSpec() + .build() + }.getOrElse(driverSpec.driverPod) + val driverContainerWithMountedSecretVolume = kubernetesCredentialsSecret.map { secret => + new ContainerBuilder(driverSpec.driverContainer) + .addNewVolumeMount() + .withName(DRIVER_CREDENTIALS_SECRET_VOLUME_NAME) + .withMountPath(DRIVER_CREDENTIALS_SECRETS_BASE_DIR) + .endVolumeMount() + .build() + }.getOrElse(driverSpec.driverContainer) + driverSpec.copy( + driverPod = driverPodWithMountedKubernetesCredentials, + otherKubernetesResources = + driverSpec.otherKubernetesResources ++ kubernetesCredentialsSecret.toSeq, + driverSparkConf = driverSparkConfWithCredentialsLocations, + driverContainer = driverContainerWithMountedSecretVolume) + } + + private def createCredentialsSecret( + driverOAuthTokenBase64: Option[String], + driverCaCertDataBase64: Option[String], + driverClientKeyDataBase64: Option[String], + driverClientCertDataBase64: Option[String]): Option[Secret] = { + val allSecretData = + resolveSecretData( + maybeMountedClientKeyFile, + driverClientKeyDataBase64, + DRIVER_CREDENTIALS_CLIENT_KEY_SECRET_NAME) ++ + resolveSecretData( + maybeMountedClientCertFile, + driverClientCertDataBase64, + DRIVER_CREDENTIALS_CLIENT_CERT_SECRET_NAME) ++ + resolveSecretData( + maybeMountedCaCertFile, + driverCaCertDataBase64, + DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME) ++ + resolveSecretData( + maybeMountedOAuthTokenFile, + driverOAuthTokenBase64, + DRIVER_CREDENTIALS_OAUTH_TOKEN_SECRET_NAME) + if (allSecretData.isEmpty) { + None + } else { + Some(new SecretBuilder() + .withNewMetadata() + .withName(s"$kubernetesResourceNamePrefix-kubernetes-credentials") + .endMetadata() + .withData(allSecretData.asJava) + .build()) + } + } + + private def setDriverPodKubernetesCredentialLocations( + driverSparkConf: SparkConf, + driverOauthTokenBase64: Option[String], + driverCaCertDataBase64: Option[String], + driverClientKeyDataBase64: Option[String], + driverClientCertDataBase64: Option[String]): SparkConf = { + val resolvedMountedOAuthTokenFile = resolveSecretLocation( + maybeMountedOAuthTokenFile, + driverOauthTokenBase64, + DRIVER_CREDENTIALS_OAUTH_TOKEN_PATH) + val resolvedMountedClientKeyFile = resolveSecretLocation( + maybeMountedClientKeyFile, + driverClientKeyDataBase64, + DRIVER_CREDENTIALS_CLIENT_KEY_PATH) + val resolvedMountedClientCertFile = resolveSecretLocation( + maybeMountedClientCertFile, + driverClientCertDataBase64, + DRIVER_CREDENTIALS_CLIENT_CERT_PATH) + val resolvedMountedCaCertFile = resolveSecretLocation( + maybeMountedCaCertFile, + driverCaCertDataBase64, + DRIVER_CREDENTIALS_CA_CERT_PATH) + val sparkConfWithCredentialLocations = driverSparkConf + .setOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX", + resolvedMountedCaCertFile) + .setOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX", + resolvedMountedClientKeyFile) + .setOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX", + resolvedMountedClientCertFile) + .setOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX", + resolvedMountedOAuthTokenFile) + // Redact all OAuth token values + sparkConfWithCredentialLocations + .getAll + .filter(_._1.endsWith(OAUTH_TOKEN_CONF_SUFFIX)).map(_._1) + .foreach { + sparkConfWithCredentialLocations.set(_, "") + } + sparkConfWithCredentialLocations + } + + private def safeFileConfToBase64( + conf: String, + fileNotFoundFormatString: String): Option[String] = { + submissionSparkConf.getOption(conf) + .map(new File(_)) + .map { file => + require(file.isFile, String.format(fileNotFoundFormatString, file.getAbsolutePath)) + BaseEncoding.base64().encode(Files.toByteArray(file)) + } + } + + private def resolveSecretLocation( + mountedUserSpecified: Option[String], + valueMountedFromSubmitter: Option[String], + mountedCanonicalLocation: String): Option[String] = { + mountedUserSpecified.orElse(valueMountedFromSubmitter.map( _ => { + mountedCanonicalLocation + })) + } + + private def resolveSecretData( + mountedUserSpecified: Option[String], + valueMountedFromSubmitter: Option[String], + secretName: String): Map[String, String] = { + mountedUserSpecified.map { _ => Map.empty[String, String]} + .getOrElse { + valueMountedFromSubmitter.map { valueBase64 => + Map(secretName -> valueBase64) + }.getOrElse(Map.empty[String, String]) + } + } + + private implicit def augmentSparkConf(sparkConf: SparkConf): OptionSettableSparkConf = { + new OptionSettableSparkConf(sparkConf) + } +} + +private class OptionSettableSparkConf(sparkConf: SparkConf) { + def setOption(configEntry: String, option: Option[String]): SparkConf = { + option.map( opt => { + sparkConf.set(configEntry, opt) + }).getOrElse(sparkConf) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/InitContainerBootstrapStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/InitContainerBootstrapStep.scala new file mode 100644 index 0000000000000..29cad18c484c0 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/InitContainerBootstrapStep.scala @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata} + +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.submit.{InitContainerUtil, PropertiesConfigMapFromScalaMapBuilder} +import org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer.{InitContainerConfigurationStep, InitContainerSpec} + +/** + * Configures the init-container that bootstraps dependencies into the driver pod. + */ +private[spark] class InitContainerBootstrapStep( + initContainerConfigurationSteps: Seq[InitContainerConfigurationStep], + initContainerConfigMapName: String, + initContainerConfigMapKey: String) + extends DriverConfigurationStep { + + override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = { + var currentInitContainerSpec = InitContainerSpec( + initContainerProperties = Map.empty[String, String], + additionalDriverSparkConf = Map.empty[String, String], + initContainer = new ContainerBuilder().build(), + driverContainer = driverSpec.driverContainer, + podToInitialize = driverSpec.driverPod, + initContainerDependentResources = Seq.empty[HasMetadata]) + for (nextStep <- initContainerConfigurationSteps) { + currentInitContainerSpec = nextStep.configureInitContainer(currentInitContainerSpec) + } + val configMap = PropertiesConfigMapFromScalaMapBuilder.buildConfigMap( + initContainerConfigMapName, + initContainerConfigMapKey, + currentInitContainerSpec.initContainerProperties) + val resolvedDriverSparkConf = driverSpec.driverSparkConf.clone() + .set(EXECUTOR_INIT_CONTAINER_CONFIG_MAP, initContainerConfigMapName) + .set(EXECUTOR_INIT_CONTAINER_CONFIG_MAP_KEY, initContainerConfigMapKey) + .setAll(currentInitContainerSpec.additionalDriverSparkConf) + val resolvedDriverPod = InitContainerUtil.appendInitContainer( + currentInitContainerSpec.podToInitialize, currentInitContainerSpec.initContainer) + driverSpec.copy( + driverPod = resolvedDriverPod, + driverContainer = currentInitContainerSpec.driverContainer, + driverSparkConf = resolvedDriverSparkConf, + otherKubernetesResources = + driverSpec.otherKubernetesResources ++ + currentInitContainerSpec.initContainerDependentResources ++ + Seq(configMap)) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/KubernetesDriverSpec.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/KubernetesDriverSpec.scala new file mode 100644 index 0000000000000..3ec4b6c4df10f --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/KubernetesDriverSpec.scala @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, HasMetadata, Pod, PodBuilder} + +import org.apache.spark.SparkConf + +/** + * Represents the components and characteristics of a Spark driver. The driver can be considered + * as being comprised of the driver pod itself, any other Kubernetes resources that the driver + * pod depends on, and the SparkConf that should be supplied to the Spark application. The driver + * container should be operated on via the specific field of this case class as opposed to trying + * to edit the container directly on the pod. The driver container should be attached at the + * end of executing all submission steps. + */ +private[spark] case class KubernetesDriverSpec( + driverPod: Pod, + driverContainer: Container, + otherKubernetesResources: Seq[HasMetadata], + driverSparkConf: SparkConf) + +private[spark] object KubernetesDriverSpec { + def initialSpec(initialSparkConf: SparkConf): KubernetesDriverSpec = { + KubernetesDriverSpec( + // Set new metadata and a new spec so that submission steps can use + // PodBuilder#editMetadata() and/or PodBuilder#editSpec() safely. + new PodBuilder().withNewMetadata().endMetadata().withNewSpec().endSpec().build(), + new ContainerBuilder().build(), + Seq.empty[HasMetadata], + initialSparkConf.clone()) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/PythonStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/PythonStep.scala new file mode 100644 index 0000000000000..024d643ddf9fd --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/PythonStep.scala @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +import io.fabric8.kubernetes.api.model.ContainerBuilder + +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils + +private[spark] class PythonStep( + primaryPyFile: String, + otherPyFiles: Seq[String], + filesDownloadPath: String) extends DriverConfigurationStep { + + override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = { + val resolvedOtherPyFilesString = if (otherPyFiles.isEmpty) { + "null" + } else { + KubernetesFileUtils.resolveFilePaths(otherPyFiles, filesDownloadPath).mkString(",") + } + val withPythonPrimaryFileContainer = new ContainerBuilder(driverSpec.driverContainer) + .addNewEnv() + .withName(ENV_PYSPARK_PRIMARY) + .withValue(KubernetesFileUtils.resolveFilePath(primaryPyFile, filesDownloadPath)) + .endEnv() + .addNewEnv() + .withName(ENV_PYSPARK_FILES) + .withValue(resolvedOtherPyFilesString) + .endEnv() + driverSpec.copy(driverContainer = withPythonPrimaryFileContainer.build()) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/BaseInitContainerConfigurationStep.scala similarity index 62% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilder.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/BaseInitContainerConfigurationStep.scala index 4062a3113eddf..60bf27beacaaf 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/BaseInitContainerConfigurationStep.scala @@ -14,32 +14,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit - -import io.fabric8.kubernetes.api.model.ConfigMap +package org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer +import org.apache.spark.deploy.kubernetes.{PodWithDetachedInitContainer, SparkPodInitContainerBootstrap} import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils -private[spark] trait SparkInitContainerConfigMapBuilder { - /** - * Construct a config map that an init-container should reference for fetching - * remote dependencies. The config map includes the remote jars and files to download, - * as well as details to fetch files from a resource staging server, if applicable. - */ - def build(): ConfigMap -} - -private[spark] class SparkInitContainerConfigMapBuilderImpl( +private[spark] class BaseInitContainerConfigurationStep( sparkJars: Seq[String], sparkFiles: Seq[String], jarsDownloadPath: String, filesDownloadPath: String, configMapName: String, configMapKey: String, - submittedDependenciesPlugin: Option[SubmittedDependencyInitContainerConfigPlugin]) - extends SparkInitContainerConfigMapBuilder { + podAndInitContainerBootstrap: SparkPodInitContainerBootstrap) + extends InitContainerConfigurationStep { - override def build(): ConfigMap = { + override def configureInitContainer(initContainerSpec: InitContainerSpec): InitContainerSpec = { val remoteJarsToDownload = KubernetesFileUtils.getOnlyRemoteFiles(sparkJars) val remoteFilesToDownload = KubernetesFileUtils.getOnlyRemoteFiles(sparkFiles) val remoteJarsConf = if (remoteJarsToDownload.nonEmpty) { @@ -57,12 +48,16 @@ private[spark] class SparkInitContainerConfigMapBuilderImpl( INIT_CONTAINER_FILES_DOWNLOAD_LOCATION.key -> filesDownloadPath) ++ remoteJarsConf ++ remoteFilesConf - val submittedDependenciesConfig = submittedDependenciesPlugin.map { plugin => - plugin.configurationsToFetchSubmittedDependencies() - }.toSeq.flatten.toMap - PropertiesConfigMapFromScalaMapBuilder.buildConfigMap( - configMapName, - configMapKey, - baseInitContainerConfig ++ submittedDependenciesConfig) + val bootstrappedPodAndInitContainer = + podAndInitContainerBootstrap.bootstrapInitContainerAndVolumes( + PodWithDetachedInitContainer( + initContainerSpec.podToInitialize, + initContainerSpec.initContainer, + initContainerSpec.driverContainer)) + initContainerSpec.copy( + initContainer = bootstrappedPodAndInitContainer.initContainer, + driverContainer = bootstrappedPodAndInitContainer.mainContainer, + podToInitialize = bootstrappedPodAndInitContainer.pod, + initContainerProperties = baseInitContainerConfig) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStep.scala new file mode 100644 index 0000000000000..7b7622c3d4f8b --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStep.scala @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer + +/** + * Represents a step in preparing the init-container for the driver and executors. + */ +private[spark] trait InitContainerConfigurationStep { + + def configureInitContainer(initContainerSpec: InitContainerSpec): InitContainerSpec +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStepsOrchestrator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStepsOrchestrator.scala new file mode 100644 index 0000000000000..e4ea5235af18f --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStepsOrchestrator.scala @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, OptionRequirements, SparkPodInitContainerBootstrapImpl} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.SubmittedDependencyUploaderImpl +import org.apache.spark.deploy.rest.kubernetes.{ResourceStagingServerSslOptionsProviderImpl, RetrofitClientFactoryImpl} +import org.apache.spark.util.Utils + +/** + * Returns the complete ordered list of steps required to configure the init-container. + */ +private[spark] class InitContainerConfigurationStepsOrchestrator( + namespace: String, + kubernetesResourceNamePrefix: String, + sparkJars: Seq[String], + sparkFiles: Seq[String], + jarsDownloadPath: String, + filesDownloadPath: String, + dockerImagePullPolicy: String, + driverLabels: Map[String, String], + initContainerConfigMapName: String, + initContainerConfigMapKey: String, + submissionSparkConf: SparkConf) { + + private val submittedResourcesSecretName = s"$kubernetesResourceNamePrefix-init-secret" + private val resourceStagingServerUri = submissionSparkConf.get(RESOURCE_STAGING_SERVER_URI) + private val resourceStagingServerInternalUri = + submissionSparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_URI) + private val initContainerImage = submissionSparkConf.get(INIT_CONTAINER_DOCKER_IMAGE) + private val downloadTimeoutMinutes = submissionSparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT) + private val maybeResourceStagingServerInternalTrustStore = + submissionSparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_FILE) + .orElse(submissionSparkConf.get(RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE)) + private val maybeResourceStagingServerInternalTrustStorePassword = + submissionSparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_PASSWORD) + .orElse(submissionSparkConf.get(RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD)) + private val maybeResourceStagingServerInternalTrustStoreType = + submissionSparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_TYPE) + .orElse(submissionSparkConf.get(RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE)) + private val maybeResourceStagingServerInternalClientCert = + submissionSparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_CLIENT_CERT_PEM) + .orElse(submissionSparkConf.get(RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM)) + private val resourceStagingServerInternalSslEnabled = + submissionSparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_SSL_ENABLED) + .orElse(submissionSparkConf.get(RESOURCE_STAGING_SERVER_SSL_ENABLED)) + .getOrElse(false) + OptionRequirements.requireNandDefined( + maybeResourceStagingServerInternalClientCert, + maybeResourceStagingServerInternalTrustStore, + "Cannot provide both a certificate file and a trustStore file for init-containers to" + + " use for contacting the resource staging server over TLS.") + + require(maybeResourceStagingServerInternalTrustStore.forall { trustStore => + Option(Utils.resolveURI(trustStore).getScheme).getOrElse("file") match { + case "file" | "local" => true + case _ => false + } + }, "TrustStore URI used for contacting the resource staging server from init containers must" + + " have no scheme, or scheme file://, or scheme local://.") + + require(maybeResourceStagingServerInternalClientCert.forall { trustStore => + Option(Utils.resolveURI(trustStore).getScheme).getOrElse("file") match { + case "file" | "local" => true + case _ => false + } + }, "Client cert file URI used for contacting the resource staging server from init containers" + + " must have no scheme, or scheme file://, or scheme local://.") + + def getAllConfigurationSteps(): Seq[InitContainerConfigurationStep] = { + val initContainerBootstrap = new SparkPodInitContainerBootstrapImpl( + initContainerImage, + dockerImagePullPolicy, + jarsDownloadPath, + filesDownloadPath, + downloadTimeoutMinutes, + initContainerConfigMapName, + initContainerConfigMapKey) + val baseInitContainerStep = new BaseInitContainerConfigurationStep( + sparkJars, + sparkFiles, + jarsDownloadPath, + filesDownloadPath, + initContainerConfigMapName, + initContainerConfigMapKey, + initContainerBootstrap) + val submittedResourcesInitContainerStep = resourceStagingServerUri.map { + stagingServerUri => + val mountSecretPlugin = new InitContainerResourceStagingServerSecretPluginImpl( + submittedResourcesSecretName, + INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH) + val submittedDependencyUploader = new SubmittedDependencyUploaderImpl( + driverLabels, + namespace, + stagingServerUri, + sparkJars, + sparkFiles, + new ResourceStagingServerSslOptionsProviderImpl(submissionSparkConf).getSslOptions, + RetrofitClientFactoryImpl) + new SubmittedResourcesInitContainerConfigurationStep( + submittedResourcesSecretName, + resourceStagingServerInternalUri.getOrElse(stagingServerUri), + INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH, + resourceStagingServerInternalSslEnabled, + maybeResourceStagingServerInternalTrustStore, + maybeResourceStagingServerInternalClientCert, + maybeResourceStagingServerInternalTrustStorePassword, + maybeResourceStagingServerInternalTrustStoreType, + submittedDependencyUploader, + mountSecretPlugin) + } + Seq(baseInitContainerStep) ++ submittedResourcesInitContainerStep.toSeq + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerSpec.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerSpec.scala new file mode 100644 index 0000000000000..5b5ac3c1f17c2 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerSpec.scala @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer + +import io.fabric8.kubernetes.api.model.{Container, HasMetadata, Pod} + +/** + * Represents a given configuration of the init-container, informing the main + * InitContainerBootstrapStep of how the driver should be configured. This includes: + *

    + * - What properties should be set on the init-container, + * - What Spark properties should be set on the driver's SparkConf given this init-container, + * - The spec of the init container itself, + * - The spec of the main container so that it can be modified to share volumes with the + * init-container + * - The spec of the pod EXCEPT for the addition of the given init-container (e.g. volumes + * the init-container needs or modifications to a main container that shares data with the + * init-container), + * - Any Kubernetes resources that need to be created for the init-container's function. + */ +private[spark] case class InitContainerSpec( + initContainerProperties: Map[String, String], + additionalDriverSparkConf: Map[String, String], + initContainer: Container, + driverContainer: Container, + podToInitialize: Pod, + initContainerDependentResources: Seq[HasMetadata]) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/SubmittedResourcesInitContainerConfigurationStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/SubmittedResourcesInitContainerConfigurationStep.scala new file mode 100644 index 0000000000000..7aa27a1de6811 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/SubmittedResourcesInitContainerConfigurationStep.scala @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer + +import java.io.File + +import com.google.common.base.Charsets +import com.google.common.io.{BaseEncoding, Files} +import io.fabric8.kubernetes.api.model.{Secret, SecretBuilder} +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkException +import org.apache.spark.deploy.kubernetes.InitContainerResourceStagingServerSecretPlugin +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.SubmittedDependencyUploader +import org.apache.spark.internal.config.OptionalConfigEntry +import org.apache.spark.util.Utils + +private[spark] class SubmittedResourcesInitContainerConfigurationStep( + submittedResourcesSecretName: String, + internalResourceStagingServerUri: String, + initContainerSecretMountPath: String, + resourceStagingServerSslEnabled: Boolean, + maybeInternalTrustStoreUri: Option[String], + maybeInternalClientCertUri: Option[String], + maybeInternalTrustStorePassword: Option[String], + maybeInternalTrustStoreType: Option[String], + submittedDependencyUploader: SubmittedDependencyUploader, + submittedResourcesSecretPlugin: InitContainerResourceStagingServerSecretPlugin) + extends InitContainerConfigurationStep { + + override def configureInitContainer(initContainerSpec: InitContainerSpec): InitContainerSpec = { + val jarsIdAndSecret = submittedDependencyUploader.uploadJars() + val filesIdAndSecret = submittedDependencyUploader.uploadFiles() + + val submittedResourcesInitContainerProperties = Map[String, String]( + RESOURCE_STAGING_SERVER_URI.key -> internalResourceStagingServerUri, + INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> jarsIdAndSecret.resourceId, + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> + s"$initContainerSecretMountPath/$INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY", + INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> filesIdAndSecret.resourceId, + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> + s"$initContainerSecretMountPath/$INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY", + RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> resourceStagingServerSslEnabled.toString) ++ + resolveSecretPath( + maybeInternalTrustStoreUri, + INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, + RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE, + "TrustStore URI") ++ + resolveSecretPath( + maybeInternalClientCertUri, + INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY, + RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM, + "Client certificate URI") ++ + maybeInternalTrustStorePassword.map { password => + (RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key, password) + }.toMap ++ + maybeInternalTrustStoreType.map { storeType => + (RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key, storeType) + }.toMap + val initContainerSecret = createResourceStagingServerSecret( + jarsIdAndSecret.resourceSecret, filesIdAndSecret.resourceSecret) + val additionalDriverSparkConf = + Map( + EXECUTOR_INIT_CONTAINER_SECRET.key -> initContainerSecret.getMetadata.getName, + EXECUTOR_INIT_CONTAINER_SECRET_MOUNT_DIR.key -> initContainerSecretMountPath) + val initContainerWithSecretVolumeMount = submittedResourcesSecretPlugin + .mountResourceStagingServerSecretIntoInitContainer(initContainerSpec.initContainer) + val podWithSecretVolume = submittedResourcesSecretPlugin + .addResourceStagingServerSecretVolumeToPod(initContainerSpec.podToInitialize) + initContainerSpec.copy( + initContainer = initContainerWithSecretVolumeMount, + podToInitialize = podWithSecretVolume, + initContainerDependentResources = + initContainerSpec.initContainerDependentResources ++ Seq(initContainerSecret), + initContainerProperties = + initContainerSpec.initContainerProperties ++ submittedResourcesInitContainerProperties, + additionalDriverSparkConf = additionalDriverSparkConf) + } + + private def createResourceStagingServerSecret( + jarsResourceSecret: String, filesResourceSecret: String): Secret = { + val trustStoreBase64 = convertFileToBase64IfSubmitterLocal( + INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, maybeInternalTrustStoreUri) + val clientCertBase64 = convertFileToBase64IfSubmitterLocal( + INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY, maybeInternalClientCertUri) + val jarsSecretBase64 = BaseEncoding.base64().encode(jarsResourceSecret.getBytes(Charsets.UTF_8)) + val filesSecretBase64 = BaseEncoding.base64().encode( + filesResourceSecret.getBytes(Charsets.UTF_8)) + val secretData = Map( + INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY -> jarsSecretBase64, + INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY -> filesSecretBase64) ++ + trustStoreBase64 ++ + clientCertBase64 + val kubernetesSecret = new SecretBuilder() + .withNewMetadata() + .withName(submittedResourcesSecretName) + .endMetadata() + .addToData(secretData.asJava) + .build() + kubernetesSecret + } + + private def convertFileToBase64IfSubmitterLocal(secretKey: String, secretUri: Option[String]) + : Map[String, String] = { + secretUri.filter { trustStore => + Option(Utils.resolveURI(trustStore).getScheme).getOrElse("file") == "file" + }.map { uri => + val file = new File(Utils.resolveURI(uri).getPath) + require(file.isFile, "Dependency server trustStore provided at" + + file.getAbsolutePath + " does not exist or is not a file.") + (secretKey, BaseEncoding.base64().encode(Files.toByteArray(file))) + }.toMap + } + + private def resolveSecretPath( + maybeUri: Option[String], + secretKey: String, + configEntry: OptionalConfigEntry[String], + uriType: String): Map[String, String] = { + maybeUri.map(Utils.resolveURI).map { uri => + val resolvedPath = Option(uri.getScheme).getOrElse("file") match { + case "file" => s"$initContainerSecretMountPath/$secretKey" + case "local" => uri.getPath + case invalid => throw new SparkException(s"$uriType has invalid scheme $invalid must be" + + s" local://, file://, or empty.") + } + (configEntry.key, resolvedPath) + }.toMap + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala index ac19c2463218b..0e274678ad6f0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala @@ -76,7 +76,6 @@ private[spark] class KubernetesSparkDependencyDownloadInitContainer( fileFetcher: FileFetcher, resourceStagingServerSslOptions: SSLOptions) extends Logging { - private implicit val downloadExecutor = ExecutionContext.fromExecutorService( ThreadUtils.newDaemonCachedThreadPool("download-executor")) private val maybeResourceStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala index 2a0f6e78c2aea..fa0ecca3b4ee6 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala @@ -70,8 +70,7 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit sparkConf.get(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION), sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT), configMap, - configMapKey, - executorInitContainerSecretVolumePlugin) + configMapKey) } if (maybeConfigMap.isEmpty) { logWarning("The executor's init-container config map was not specified. Executors will" + @@ -89,7 +88,11 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH)), Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH))) new KubernetesClusterSchedulerBackend( - sc.taskScheduler.asInstanceOf[TaskSchedulerImpl], sc, bootStrap, kubernetesClient) + sc.taskScheduler.asInstanceOf[TaskSchedulerImpl], + sc, + bootStrap, + executorInitContainerSecretVolumePlugin, + kubernetesClient) } override def initialize(scheduler: TaskScheduler, backend: SchedulerBackend): Unit = { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index d880cee315c0d..e5f980ad1f366 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -23,7 +23,7 @@ import java.util.concurrent.atomic.{AtomicInteger, AtomicLong, AtomicReference} import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule -import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, EnvVarSourceBuilder, Pod, PodBuilder, QuantityBuilder} +import io.fabric8.kubernetes.api.model.{ContainerBuilder, ContainerPortBuilder, EnvVarBuilder, EnvVarSourceBuilder, Pod, PodBuilder, QuantityBuilder} import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action import org.apache.commons.io.FilenameUtils @@ -32,9 +32,10 @@ import scala.collection.mutable import scala.concurrent.{ExecutionContext, Future} import org.apache.spark.{SparkContext, SparkEnv, SparkException} -import org.apache.spark.deploy.kubernetes.{ConfigurationUtils, SparkPodInitContainerBootstrap} +import org.apache.spark.deploy.kubernetes.{ConfigurationUtils, InitContainerResourceStagingServerSecretPlugin, PodWithDetachedInitContainer, SparkPodInitContainerBootstrap} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.InitContainerUtil import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.network.shuffle.kubernetes.KubernetesExternalShuffleClient import org.apache.spark.rpc.{RpcCallContext, RpcEndpointAddress, RpcEnv} @@ -47,6 +48,7 @@ private[spark] class KubernetesClusterSchedulerBackend( scheduler: TaskSchedulerImpl, val sc: SparkContext, executorInitContainerBootstrap: Option[SparkPodInitContainerBootstrap], + executorMountInitContainerSecretPlugin: Option[InitContainerResourceStagingServerSecretPlugin], kubernetesClient: KubernetesClient) extends CoarseGrainedSchedulerBackend(scheduler, sc.env.rpcEnv) { @@ -319,8 +321,8 @@ private[spark] class KubernetesClusterSchedulerBackend( nodeToLocalTaskCount.toMap[String, Int] } - private def addNodeAffinityAnnotationIfUseful(basePodBuilder: PodBuilder, - nodeToTaskCount: Map[String, Int]): PodBuilder = { + private def addNodeAffinityAnnotationIfUseful( + baseExecutorPod: Pod, nodeToTaskCount: Map[String, Int]): Pod = { def scaleToRange(value: Int, baseMin: Double, baseMax: Double, rangeMin: Double, rangeMax: Double): Int = (((rangeMax - rangeMin) * (value - baseMin) / (baseMax - baseMin)) + rangeMin).toInt @@ -341,11 +343,12 @@ private[spark] class KubernetesClusterSchedulerBackend( ))) // TODO: Use non-annotation syntax when we switch to K8s version 1.6. logDebug(s"Adding nodeAffinity as annotation $nodeAffinityJson") - basePodBuilder.editMetadata() + new PodBuilder(baseExecutorPod).editMetadata() .addToAnnotations(ANNOTATION_EXECUTOR_NODE_AFFINITY, nodeAffinityJson) .endMetadata() + .build() } else { - basePodBuilder + baseExecutorPod } } @@ -416,7 +419,21 @@ private[spark] class KubernetesClusterSchedulerBackend( .build() }) - val basePodBuilder = new PodBuilder() + val executorContainer = new ContainerBuilder() + .withName(s"executor") + .withImage(executorDockerImage) + .withImagePullPolicy(dockerImagePullPolicy) + .withNewResources() + .addToRequests("memory", executorMemoryQuantity) + .addToLimits("memory", executorMemoryLimitQuantity) + .addToRequests("cpu", executorCpuQuantity) + .endResources() + .addAllToEnv(requiredEnv.asJava) + .addToEnv(executorExtraClasspathEnv.toSeq: _*) + .withPorts(requiredPorts.asJava) + .build() + + val executorPod = new PodBuilder() .withNewMetadata() .withName(name) .withLabels(resolvedExecutorLabels.asJava) @@ -432,69 +449,77 @@ private[spark] class KubernetesClusterSchedulerBackend( .endMetadata() .withNewSpec() .withHostname(hostname) - .addNewContainer() - .withName(s"executor") - .withImage(executorDockerImage) - .withImagePullPolicy(dockerImagePullPolicy) - .withNewResources() - .addToRequests("memory", executorMemoryQuantity) - .addToLimits("memory", executorMemoryLimitQuantity) - .addToRequests("cpu", executorCpuQuantity) - .endResources() - .addAllToEnv(requiredEnv.asJava) - .addToEnv(executorExtraClasspathEnv.toSeq: _*) - .withPorts(requiredPorts.asJava) - .endContainer() .endSpec() + .build() - executorLimitCores.map { + val containerWithExecutorLimitCores = executorLimitCores.map { limitCores => val executorCpuLimitQuantity = new QuantityBuilder(false) .withAmount(limitCores) .build() - basePodBuilder + new ContainerBuilder(executorContainer) + .editResources() + .addToLimits("cpu", executorCpuLimitQuantity) + .endResources() + .build() + }.getOrElse(executorContainer) + + val withMaybeShuffleConfigExecutorContainer = shuffleServiceConfig.map { config => + config.shuffleDirs.foldLeft(containerWithExecutorLimitCores) { (container, dir) => + new ContainerBuilder(container) + .addNewVolumeMount() + .withName(FilenameUtils.getBaseName(dir)) + .withMountPath(dir) + .endVolumeMount() + .build() + } + }.getOrElse(containerWithExecutorLimitCores) + val withMaybeShuffleConfigPod = shuffleServiceConfig.map { config => + config.shuffleDirs.foldLeft(executorPod) { (builder, dir) => + new PodBuilder(builder) .editSpec() - .editFirstContainer() - .editResources - .addToLimits("cpu", executorCpuLimitQuantity) - .endResources() - .endContainer() - .endSpec() - } - - val withMaybeShuffleConfigPodBuilder = shuffleServiceConfig - .map { config => - config.shuffleDirs.foldLeft(basePodBuilder) { (builder, dir) => - builder - .editSpec() - .addNewVolume() - .withName(FilenameUtils.getBaseName(dir)) - .withNewHostPath() - .withPath(dir) + .addNewVolume() + .withName(FilenameUtils.getBaseName(dir)) + .withNewHostPath() + .withPath(dir) .endHostPath() .endVolume() - .editFirstContainer() - .addNewVolumeMount() - .withName(FilenameUtils.getBaseName(dir)) - .withMountPath(dir) - .endVolumeMount() - .endContainer() .endSpec() - } - }.getOrElse(basePodBuilder) - - val executorInitContainerPodBuilder = executorInitContainerBootstrap.map { - bootstrap => - bootstrap.bootstrapInitContainerAndVolumes( - "executor", - withMaybeShuffleConfigPodBuilder) - }.getOrElse(withMaybeShuffleConfigPodBuilder) - - val resolvedExecutorPodBuilder = addNodeAffinityAnnotationIfUseful( - executorInitContainerPodBuilder, nodeToLocalTaskCount) - + .build() + } + }.getOrElse(executorPod) + val (executorPodWithInitContainer, initBootstrappedExecutorContainer) = + executorInitContainerBootstrap.map { bootstrap => + val podWithDetachedInitContainer = bootstrap.bootstrapInitContainerAndVolumes( + PodWithDetachedInitContainer( + withMaybeShuffleConfigPod, + new ContainerBuilder().build(), + withMaybeShuffleConfigExecutorContainer)) + + val resolvedInitContainer = executorMountInitContainerSecretPlugin.map { plugin => + plugin.mountResourceStagingServerSecretIntoInitContainer( + podWithDetachedInitContainer.initContainer) + }.getOrElse(podWithDetachedInitContainer.initContainer) + + val podWithAttachedInitContainer = InitContainerUtil.appendInitContainer( + podWithDetachedInitContainer.pod, resolvedInitContainer) + + val resolvedPodWithMountedSecret = executorMountInitContainerSecretPlugin.map { plugin => + plugin.addResourceStagingServerSecretVolumeToPod(podWithAttachedInitContainer) + }.getOrElse(podWithAttachedInitContainer) + + (resolvedPodWithMountedSecret, podWithDetachedInitContainer.mainContainer) + }.getOrElse((withMaybeShuffleConfigPod, withMaybeShuffleConfigExecutorContainer)) + + val executorPodWithNodeAffinity = addNodeAffinityAnnotationIfUseful( + executorPodWithInitContainer, nodeToLocalTaskCount) + val resolvedExecutorPod = new PodBuilder(executorPodWithNodeAffinity) + .editSpec() + .addToContainers(initBootstrappedExecutorContainer) + .endSpec() + .build() try { - (executorId, kubernetesClient.pods.create(resolvedExecutorPodBuilder.build())) + (executorId, kubernetesClient.pods.create(resolvedExecutorPod)) } catch { case throwable: Throwable => logError("Failed to allocate executor pod.", throwable) @@ -606,10 +631,11 @@ private[spark] class KubernetesClusterSchedulerBackend( } } - case class ShuffleServiceConfig(shuffleNamespace: String, +} +case class ShuffleServiceConfig( + shuffleNamespace: String, shuffleLabels: Map[String, String], shuffleDirs: Seq[String]) -} private object KubernetesClusterSchedulerBackend { private val DEFAULT_STATIC_PORT = 10000 diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPluginSuite.scala new file mode 100644 index 0000000000000..f5b2db36aff8f --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPluginSuite.scala @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import org.scalatest.BeforeAndAfter +import io.fabric8.kubernetes.api.model._ +import org.apache.spark.deploy.kubernetes.constants._ + +import scala.collection.JavaConverters._ +import org.apache.spark.SparkFunSuite + +class InitContainerResourceStagingServerSecretPluginSuite extends SparkFunSuite with BeforeAndAfter{ + private val INIT_CONTAINER_SECRET_NAME = "init-secret" + private val INIT_CONTAINER_SECRET_MOUNT = "/tmp/secret" + + private val initContainerSecretPlugin = new InitContainerResourceStagingServerSecretPluginImpl( + INIT_CONTAINER_SECRET_NAME, + INIT_CONTAINER_SECRET_MOUNT) + + test("Volume Mount into InitContainer") { + val returnedCont = initContainerSecretPlugin.mountResourceStagingServerSecretIntoInitContainer( + new ContainerBuilder().withName("init-container").build()) + assert(returnedCont.getName === "init-container") + assert(returnedCont.getVolumeMounts.asScala.map( + vm => (vm.getName, vm.getMountPath)) === + List((INIT_CONTAINER_SECRET_VOLUME_NAME, INIT_CONTAINER_SECRET_MOUNT))) + } + + test("Add Volume with Secret to Pod") { + val returnedPod = initContainerSecretPlugin.addResourceStagingServerSecretVolumeToPod( + basePod().build) + assert(returnedPod.getMetadata.getName === "spark-pod") + val volume = returnedPod.getSpec.getVolumes.asScala.head + assert(volume.getName === INIT_CONTAINER_SECRET_VOLUME_NAME) + assert(volume.getSecret.getSecretName === INIT_CONTAINER_SECRET_NAME) + } + private def basePod(): PodBuilder = { + new PodBuilder() + .withNewMetadata() + .withName("spark-pod") + .endMetadata() + .withNewSpec() + .endSpec() + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala index 90d7b10df211c..0557b5677b919 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala @@ -16,16 +16,14 @@ */ package org.apache.spark.deploy.kubernetes -import com.fasterxml.jackson.databind.ObjectMapper -import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, Pod, PodBuilder} import org.scalatest.BeforeAndAfter -import scala.collection.JavaConverters._ +import io.fabric8.kubernetes.api.model._ +import org.apache.spark.deploy.kubernetes.constants._ +import scala.collection.JavaConverters._ import org.apache.spark.SparkFunSuite -import org.apache.spark.deploy.kubernetes.constants._ class SparkPodInitContainerBootstrapSuite extends SparkFunSuite with BeforeAndAfter { - private val OBJECT_MAPPER = new ObjectMapper() private val INIT_CONTAINER_IMAGE = "spark-init:latest" private val DOCKER_IMAGE_PULL_POLICY = "IfNotPresent" private val JARS_DOWNLOAD_PATH = "/var/data/spark-jars" @@ -33,134 +31,66 @@ class SparkPodInitContainerBootstrapSuite extends SparkFunSuite with BeforeAndAf private val DOWNLOAD_TIMEOUT_MINUTES = 5 private val INIT_CONTAINER_CONFIG_MAP_NAME = "spark-init-config-map" private val INIT_CONTAINER_CONFIG_MAP_KEY = "spark-init-config-map-key" - private val ADDED_SUBMITTED_DEPENDENCY_ENV = "ADDED_SUBMITTED_DEPENDENCY" - private val ADDED_SUBMITTED_DEPENDENCY_ANNOTATION = "added-submitted-dependencies" private val MAIN_CONTAINER_NAME = "spark-main" - private val TRUE = "true" - private val submittedDependencyPlugin = new InitContainerResourceStagingServerSecretPlugin { - override def addResourceStagingServerSecretVolumeToPod(basePod: PodBuilder) - : PodBuilder = { - basePod.editMetadata() - .addToAnnotations(ADDED_SUBMITTED_DEPENDENCY_ANNOTATION, TRUE) - .endMetadata() - } + private val sparkPodInit = new SparkPodInitContainerBootstrapImpl( + INIT_CONTAINER_IMAGE, + DOCKER_IMAGE_PULL_POLICY, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH, + DOWNLOAD_TIMEOUT_MINUTES, + INIT_CONTAINER_CONFIG_MAP_NAME, + INIT_CONTAINER_CONFIG_MAP_KEY) + private val expectedSharedVolumeMap = Map( + JARS_DOWNLOAD_PATH -> INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME, + FILES_DOWNLOAD_PATH -> INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME) - override def mountResourceStagingServerSecretIntoInitContainer(container: ContainerBuilder) - : ContainerBuilder = { - container - .addNewEnv() - .withName(ADDED_SUBMITTED_DEPENDENCY_ENV) - .withValue(TRUE) - .endEnv() - } - } - - test("Running without submitted dependencies adds init-container with volume mounts.") { - val bootstrappedPod = bootstrapPodWithoutSubmittedDependencies() - val podAnnotations = bootstrappedPod.getMetadata.getAnnotations.asScala - assert(podAnnotations.contains(INIT_CONTAINER_ANNOTATION)) - val initContainers = OBJECT_MAPPER.readValue( - podAnnotations(INIT_CONTAINER_ANNOTATION), classOf[Array[Container]]) - assert(initContainers.length === 1) - val initContainer = initContainers.head - val initContainerVolumeMounts = initContainer.getVolumeMounts.asScala.map { - mount => (mount.getName, mount.getMountPath) - }.toMap - val expectedInitContainerVolumeMounts = Map( - INIT_CONTAINER_PROPERTIES_FILE_VOLUME -> INIT_CONTAINER_PROPERTIES_FILE_DIR, - INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME -> JARS_DOWNLOAD_PATH, - INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME -> FILES_DOWNLOAD_PATH) - assert(initContainerVolumeMounts === expectedInitContainerVolumeMounts) + test("InitContainer: Volume mounts, args, and builder specs") { + val returnedPodWithCont = sparkPodInit.bootstrapInitContainerAndVolumes( + PodWithDetachedInitContainer( + pod = basePod().build(), + initContainer = new Container(), + mainContainer = new ContainerBuilder().withName(MAIN_CONTAINER_NAME).build())) + val initContainer: Container = returnedPodWithCont.initContainer + val volumes = initContainer.getVolumeMounts.asScala + assert(volumes.map(vm => (vm.getMountPath, vm.getName)).toMap === expectedSharedVolumeMap + ++ Map("/etc/spark-init" -> "spark-init-properties")) assert(initContainer.getName === "spark-init") assert(initContainer.getImage === INIT_CONTAINER_IMAGE) - assert(initContainer.getImagePullPolicy === "IfNotPresent") - assert(initContainer.getArgs.asScala === List(INIT_CONTAINER_PROPERTIES_FILE_PATH)) + assert(initContainer.getImagePullPolicy === DOCKER_IMAGE_PULL_POLICY) + assert(initContainer.getArgs.asScala.head === INIT_CONTAINER_PROPERTIES_FILE_PATH) } - - test("Running without submitted dependencies adds volume mounts to main container.") { - val bootstrappedPod = bootstrapPodWithoutSubmittedDependencies() - val containers = bootstrappedPod.getSpec.getContainers.asScala - val mainContainer = containers.find(_.getName === MAIN_CONTAINER_NAME) - assert(mainContainer.isDefined) - val volumeMounts = mainContainer.map(_.getVolumeMounts.asScala).toSeq.flatten.map { - mount => (mount.getName, mount.getMountPath) - }.toMap - val expectedVolumeMounts = Map( - INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME -> JARS_DOWNLOAD_PATH, - INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME -> FILES_DOWNLOAD_PATH) - assert(volumeMounts === expectedVolumeMounts) + test("Main: Volume mounts and env") { + val returnedPodWithCont = sparkPodInit.bootstrapInitContainerAndVolumes( + PodWithDetachedInitContainer( + pod = basePod().build(), + initContainer = new Container(), + mainContainer = new ContainerBuilder().withName(MAIN_CONTAINER_NAME).build())) + val mainContainer: Container = returnedPodWithCont.mainContainer + assert(mainContainer.getName === MAIN_CONTAINER_NAME) + val volumeMounts = mainContainer.getVolumeMounts.asScala + assert(volumeMounts.map(vm => (vm.getMountPath, vm.getName)).toMap === expectedSharedVolumeMap) + assert(mainContainer.getEnv.asScala.map(e => (e.getName, e.getValue)).toMap === + Map(ENV_MOUNTED_FILES_DIR -> FILES_DOWNLOAD_PATH)) } - - test("Running without submitted dependencies adds volumes to the pod") { - val bootstrappedPod = bootstrapPodWithoutSubmittedDependencies() - val podVolumes = bootstrappedPod.getSpec.getVolumes.asScala - assert(podVolumes.size === 3) - assert(podVolumes.exists { volume => - volume.getName == INIT_CONTAINER_PROPERTIES_FILE_VOLUME && - Option(volume.getConfigMap).map { configMap => - configMap.getItems.asScala.map { - keyToPath => (keyToPath.getKey, keyToPath.getPath) - }.toMap - }.contains(Map(INIT_CONTAINER_CONFIG_MAP_KEY -> INIT_CONTAINER_PROPERTIES_FILE_NAME)) - }) - assert(podVolumes.exists { volume => - volume.getName == INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME && volume.getEmptyDir != null - }) - assert(podVolumes.exists { volume => - volume.getName == INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME && volume.getEmptyDir != null - }) - } - - test("Files download path is set as environment variable") { - val bootstrappedPod = bootstrapPodWithoutSubmittedDependencies() - val containers = bootstrappedPod.getSpec.getContainers.asScala - val maybeMainContainer = containers.find(_.getName === MAIN_CONTAINER_NAME) - assert(maybeMainContainer.exists { mainContainer => - mainContainer.getEnv.asScala.exists(envVar => - envVar.getName == ENV_MOUNTED_FILES_DIR && envVar.getValue == FILES_DOWNLOAD_PATH) - }) - } - - test("Running with submitted dependencies modifies the init container with the plugin.") { - val bootstrappedPod = bootstrapPodWithSubmittedDependencies() - val podAnnotations = bootstrappedPod.getMetadata.getAnnotations.asScala - assert(podAnnotations(ADDED_SUBMITTED_DEPENDENCY_ANNOTATION) === TRUE) - val initContainers = OBJECT_MAPPER.readValue( - podAnnotations(INIT_CONTAINER_ANNOTATION), classOf[Array[Container]]) - assert(initContainers.length === 1) - val initContainer = initContainers.head - assert(initContainer.getEnv.asScala.exists { - env => env.getName === ADDED_SUBMITTED_DEPENDENCY_ENV && env.getValue === TRUE - }) - } - - private def bootstrapPodWithoutSubmittedDependencies(): Pod = { - val bootstrapUnderTest = new SparkPodInitContainerBootstrapImpl( - INIT_CONTAINER_IMAGE, - DOCKER_IMAGE_PULL_POLICY, - JARS_DOWNLOAD_PATH, - FILES_DOWNLOAD_PATH, - DOWNLOAD_TIMEOUT_MINUTES, - INIT_CONTAINER_CONFIG_MAP_NAME, - INIT_CONTAINER_CONFIG_MAP_KEY, - None) - bootstrapUnderTest.bootstrapInitContainerAndVolumes( - MAIN_CONTAINER_NAME, basePod()).build() - } - - private def bootstrapPodWithSubmittedDependencies(): Pod = { - val bootstrapUnderTest = new SparkPodInitContainerBootstrapImpl( - INIT_CONTAINER_IMAGE, - DOCKER_IMAGE_PULL_POLICY, - JARS_DOWNLOAD_PATH, - FILES_DOWNLOAD_PATH, - DOWNLOAD_TIMEOUT_MINUTES, - INIT_CONTAINER_CONFIG_MAP_NAME, - INIT_CONTAINER_CONFIG_MAP_KEY, - Some(submittedDependencyPlugin)) - bootstrapUnderTest.bootstrapInitContainerAndVolumes( - MAIN_CONTAINER_NAME, basePod()).build() + test("Pod: Volume Mounts") { + val returnedPodWithCont = sparkPodInit.bootstrapInitContainerAndVolumes( + PodWithDetachedInitContainer( + pod = basePod().build(), + initContainer = new Container(), + mainContainer = new ContainerBuilder().withName(MAIN_CONTAINER_NAME).build())) + val returnedPod = returnedPodWithCont.pod + assert(returnedPod.getMetadata.getName === "spark-pod") + val volumes = returnedPod.getSpec.getVolumes.asScala.toList + assert(volumes.head.getName === INIT_CONTAINER_PROPERTIES_FILE_VOLUME) + assert(volumes.head.getConfigMap.getName === INIT_CONTAINER_CONFIG_MAP_NAME) + assert(volumes.head.getConfigMap.getItems.asScala.map( + i => (i.getKey, i.getPath)) === + List((INIT_CONTAINER_CONFIG_MAP_KEY, INIT_CONTAINER_PROPERTIES_FILE_NAME))) + assert(volumes(1).getName === INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME) + assert(volumes(1).getEmptyDir === new EmptyDirVolumeSource()) + assert(volumes(2).getName === INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME) + assert(volumes(2).getEmptyDir === new EmptyDirVolumeSource()) } private def basePod(): PodBuilder = { @@ -169,9 +99,6 @@ class SparkPodInitContainerBootstrapSuite extends SparkFunSuite with BeforeAndAf .withName("spark-pod") .endMetadata() .withNewSpec() - .addNewContainer() - .withName(MAIN_CONTAINER_NAME) - .endContainer() - .endSpec() + .endSpec() } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SubmittedDependencyInitContainerVolumesPluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SubmittedDependencyInitContainerVolumesPluginSuite.scala deleted file mode 100644 index 473d369c8eca3..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SubmittedDependencyInitContainerVolumesPluginSuite.scala +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes - -import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder} -import scala.collection.JavaConverters._ - -import org.apache.spark.SparkFunSuite -import org.apache.spark.deploy.kubernetes.constants._ - -class SubmittedDependencyInitContainerVolumesPluginSuite extends SparkFunSuite { - - private val SECRET_NAME = "secret" - private val SECRET_MOUNT_PATH = "/mnt/secrets" - private val plugin = new InitContainerResourceStagingServerSecretPluginImpl( - SECRET_NAME, SECRET_MOUNT_PATH) - - test("The init container should have the secret volume mount.") { - val baseInitContainer = new ContainerBuilder().withName("container") - val configuredInitContainer = plugin.mountResourceStagingServerSecretIntoInitContainer( - baseInitContainer).build() - val volumeMounts = configuredInitContainer.getVolumeMounts.asScala - assert(volumeMounts.size === 1) - assert(volumeMounts.exists { volumeMount => - volumeMount.getName === INIT_CONTAINER_SECRET_VOLUME_NAME && - volumeMount.getMountPath === SECRET_MOUNT_PATH - }) - } - - test("The pod should have the secret volume.") { - val basePod = new PodBuilder() - .withNewMetadata().withName("pod").endMetadata() - .withNewSpec() - .addNewContainer() - .withName("container") - .endContainer() - .endSpec() - val configuredPod = plugin.addResourceStagingServerSecretVolumeToPod(basePod).build() - val volumes = configuredPod.getSpec.getVolumes.asScala - assert(volumes.size === 1) - assert(volumes.exists { volume => - volume.getName === INIT_CONTAINER_SECRET_VOLUME_NAME && - Option(volume.getSecret).map(_.getSecretName).contains(SECRET_NAME) - }) - } -} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientSuite.scala new file mode 100644 index 0000000000000..965ee75c248b8 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientSuite.scala @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit + +import com.google.common.collect.Iterables +import io.fabric8.kubernetes.api.model.{ContainerBuilder, DoneablePod, HasMetadata, Pod, PodBuilder, PodList, Secret, SecretBuilder} +import io.fabric8.kubernetes.client.{KubernetesClient, Watch} +import io.fabric8.kubernetes.client.dsl.{MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, NamespaceVisitFromServerGetWatchDeleteRecreateWaitApplicable, PodResource, Resource} +import org.mockito.{ArgumentCaptor, Mock, MockitoAnnotations} +import org.mockito.Mockito.{doReturn, verify, when} +import org.mockito.invocation.InvocationOnMock +import org.mockito.stubbing.Answer +import org.scalatest.BeforeAndAfter +import org.scalatest.mock.MockitoSugar._ +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.submitsteps.{DriverConfigurationStep, KubernetesDriverSpec} + +class ClientSuite extends SparkFunSuite with BeforeAndAfter { + + private val DRIVER_POD_UID = "pod-id" + private val DRIVER_POD_API_VERSION = "v1" + private val DRIVER_POD_KIND = "pod" + + private type ResourceList = NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable[ + HasMetadata, Boolean] + private type Pods = MixedOperation[Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]] + + @Mock + private var kubernetesClient: KubernetesClient = _ + + @Mock + private var podOperations: Pods = _ + + @Mock + private var namedPods: PodResource[Pod, DoneablePod] = _ + + @Mock + private var loggingPodStatusWatcher: LoggingPodStatusWatcher = _ + + @Mock + private var resourceList: ResourceList = _ + + private val submissionSteps = Seq(FirstTestConfigurationStep, SecondTestConfigurationStep) + private var createdPodArgumentCaptor: ArgumentCaptor[Pod] = _ + private var createdResourcesArgumentCaptor: ArgumentCaptor[HasMetadata] = _ + + before { + MockitoAnnotations.initMocks(this) + when(kubernetesClient.pods()).thenReturn(podOperations) + when(podOperations.withName(FirstTestConfigurationStep.podName)).thenReturn(namedPods) + + createdPodArgumentCaptor = ArgumentCaptor.forClass(classOf[Pod]) + createdResourcesArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata]) + when(podOperations.create(createdPodArgumentCaptor.capture())).thenAnswer(new Answer[Pod] { + override def answer(invocation: InvocationOnMock): Pod = { + new PodBuilder(invocation.getArgumentAt(0, classOf[Pod])) + .editMetadata() + .withUid(DRIVER_POD_UID) + .endMetadata() + .withApiVersion(DRIVER_POD_API_VERSION) + .withKind(DRIVER_POD_KIND) + .build() + } + }) + when(podOperations.withName(FirstTestConfigurationStep.podName)).thenReturn(namedPods) + when(namedPods.watch(loggingPodStatusWatcher)).thenReturn(mock[Watch]) + doReturn(resourceList) + .when(kubernetesClient) + .resourceList(createdResourcesArgumentCaptor.capture()) + } + + test("The client should configure the pod with the submission steps.") { + val submissionClient = new Client( + submissionSteps, + new SparkConf(false), + kubernetesClient, + false, + "spark", + loggingPodStatusWatcher) + submissionClient.run() + val createdPod = createdPodArgumentCaptor.getValue + assert(createdPod.getMetadata.getName === FirstTestConfigurationStep.podName) + assert(createdPod.getMetadata.getLabels.asScala === + Map(FirstTestConfigurationStep.labelKey -> FirstTestConfigurationStep.labelValue)) + assert(createdPod.getMetadata.getAnnotations.asScala === + Map(SecondTestConfigurationStep.annotationKey -> + SecondTestConfigurationStep.annotationValue)) + assert(createdPod.getSpec.getContainers.size() === 1) + assert(createdPod.getSpec.getContainers.get(0).getName === + SecondTestConfigurationStep.containerName) + } + + test("The client should create the secondary Kubernetes resources.") { + val submissionClient = new Client( + submissionSteps, + new SparkConf(false), + kubernetesClient, + false, + "spark", + loggingPodStatusWatcher) + submissionClient.run() + val createdPod = createdPodArgumentCaptor.getValue + val otherCreatedResources = createdResourcesArgumentCaptor.getAllValues + assert(otherCreatedResources.size === 1) + val createdResource = Iterables.getOnlyElement(otherCreatedResources).asInstanceOf[Secret] + assert(createdResource.getMetadata.getName === FirstTestConfigurationStep.secretName) + assert(createdResource.getData.asScala === + Map(FirstTestConfigurationStep.secretKey -> FirstTestConfigurationStep.secretData)) + val ownerReference = Iterables.getOnlyElement(createdResource.getMetadata.getOwnerReferences) + assert(ownerReference.getName === createdPod.getMetadata.getName) + assert(ownerReference.getKind === DRIVER_POD_KIND) + assert(ownerReference.getUid === DRIVER_POD_UID) + assert(ownerReference.getApiVersion === DRIVER_POD_API_VERSION) + } + + test("The client should attach the driver container with the appropriate JVM options.") { + val sparkConf = new SparkConf(false) + .set("spark.logConf", "true") + .set( + org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS, + "-XX:+|-HeapDumpOnOutOfMemoryError") + val submissionClient = new Client( + submissionSteps, + sparkConf, + kubernetesClient, + false, + "spark", + loggingPodStatusWatcher) + submissionClient.run() + val createdPod = createdPodArgumentCaptor.getValue + val driverContainer = Iterables.getOnlyElement(createdPod.getSpec.getContainers) + assert(driverContainer.getName === SecondTestConfigurationStep.containerName) + val driverJvmOptsEnv = Iterables.getOnlyElement(driverContainer.getEnv) + assert(driverJvmOptsEnv.getName === ENV_DRIVER_JAVA_OPTS) + val driverJvmOpts = driverJvmOptsEnv.getValue.split(" ").toSet + assert(driverJvmOpts.contains("-Dspark.logConf=true")) + assert(driverJvmOpts.contains( + s"-D${SecondTestConfigurationStep.sparkConfKey}=" + + SecondTestConfigurationStep.sparkConfValue)) + assert(driverJvmOpts.contains( + "-XX:+|-HeapDumpOnOutOfMemoryError")) + } + + test("Waiting for app completion should stall on the watcher") { + val submissionClient = new Client( + submissionSteps, + new SparkConf(false), + kubernetesClient, + true, + "spark", + loggingPodStatusWatcher) + submissionClient.run() + verify(loggingPodStatusWatcher).awaitCompletion() + } + +} + +private object FirstTestConfigurationStep extends DriverConfigurationStep { + + val podName = "test-pod" + val secretName = "test-secret" + val labelKey = "first-submit" + val labelValue = "true" + val secretKey = "secretKey" + val secretData = "secretData" + + override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = { + val modifiedPod = new PodBuilder(driverSpec.driverPod) + .editMetadata() + .withName(podName) + .addToLabels(labelKey, labelValue) + .endMetadata() + .build() + val additionalResource = new SecretBuilder() + .withNewMetadata() + .withName(secretName) + .endMetadata() + .addToData(secretKey, secretData) + .build() + driverSpec.copy( + driverPod = modifiedPod, + otherKubernetesResources = driverSpec.otherKubernetesResources ++ Seq(additionalResource)) + } +} + +private object SecondTestConfigurationStep extends DriverConfigurationStep { + + val annotationKey = "second-submit" + val annotationValue = "submitted" + val sparkConfKey = "spark.custom-conf" + val sparkConfValue = "custom-conf-value" + val containerName = "driverContainer" + + override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = { + val modifiedPod = new PodBuilder(driverSpec.driverPod) + .editMetadata() + .addToAnnotations(annotationKey, annotationValue) + .endMetadata() + .build() + val resolvedSparkConf = driverSpec.driverSparkConf.clone().set(sparkConfKey, sparkConfValue) + val modifiedContainer = new ContainerBuilder(driverSpec.driverContainer) + .withName(containerName) + .build() + driverSpec.copy( + driverPod = modifiedPod, + driverSparkConf = resolvedSparkConf, + driverContainer = modifiedContainer) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala deleted file mode 100644 index a58a37691f4eb..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala +++ /dev/null @@ -1,558 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import java.io.File - -import io.fabric8.kubernetes.api.model._ -import io.fabric8.kubernetes.client.{KubernetesClient, Watch} -import io.fabric8.kubernetes.client.dsl.{MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource} -import org.hamcrest.{BaseMatcher, Description} -import org.mockito.{AdditionalAnswers, ArgumentCaptor, Mock, MockitoAnnotations} -import org.mockito.Matchers.{any, anyVararg, argThat, eq => mockitoEq} -import org.mockito.Mockito.{times, verify, when} -import org.mockito.invocation.InvocationOnMock -import org.mockito.stubbing.Answer -import org.scalatest.{BeforeAndAfter, Matchers} - -import scala.collection.JavaConverters._ -import scala.collection.mutable -import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite} -import org.apache.spark.deploy.kubernetes.{KubernetesExternalShuffleService, KubernetesShuffleBlockHandler, SparkPodInitContainerBootstrap} -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.network.netty.SparkTransportConf -import org.apache.spark.network.shuffle.kubernetes.KubernetesExternalShuffleClient - -class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { - private val JARS_RESOURCE = SubmittedResourceIdAndSecret("jarsId", "jarsSecret") - private val FILES_RESOURCE = SubmittedResourceIdAndSecret("filesId", "filesSecret") - private val SUBMITTED_RESOURCES = SubmittedResources(JARS_RESOURCE, FILES_RESOURCE) - private val BOOTSTRAPPED_POD_ANNOTATION = "bootstrapped" - private val TRUE = "true" - private val APP_NAME = "spark-test" - private val APP_RESOURCE_PREFIX = "spark-prefix" - private val APP_ID = "spark-id" - private val CUSTOM_LABEL_KEY = "customLabel" - private val CUSTOM_LABEL_VALUE = "customLabelValue" - private val DEPRECATED_CUSTOM_LABEL_KEY = "deprecatedCustomLabel" - private val DEPRECATED_CUSTOM_LABEL_VALUE = "deprecatedCustomLabelValue" - private val ALL_EXPECTED_LABELS = Map( - CUSTOM_LABEL_KEY -> CUSTOM_LABEL_VALUE, - DEPRECATED_CUSTOM_LABEL_KEY -> DEPRECATED_CUSTOM_LABEL_VALUE, - SPARK_APP_ID_LABEL -> APP_ID, - SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE) - private val CUSTOM_ANNOTATION_KEY = "customAnnotation" - private val CUSTOM_ANNOTATION_VALUE = "customAnnotationValue" - private val DEPRECATED_CUSTOM_ANNOTATION_KEY = "deprecatedCustomAnnotation" - private val DEPRECATED_CUSTOM_ANNOTATION_VALUE = "deprecatedCustomAnnotationValue" - private val INIT_CONTAINER_SECRET_NAME = "init-container-secret" - private val INIT_CONTAINER_SECRET_DATA = Map("secret-key" -> "secret-data") - private val MAIN_CLASS = "org.apache.spark.examples.SparkPi" - private val PYSPARK_APP_ARGS = Array(null, "500") - private val APP_ARGS = Array("3", "20") - private val SPARK_JARS = Seq( - "hdfs://localhost:9000/app/jars/jar1.jar", "file:///app/jars/jar2.jar") - private val RESOLVED_SPARK_JARS = Seq( - "hdfs://localhost:9000/app/jars/jar1.jar", "file:///var/data/spark-jars/jar2.jar") - private val RESOLVED_SPARK_REMOTE_AND_LOCAL_JARS = Seq( - "/var/data/spark-jars/jar1.jar", "/var/data/spark-jars/jar2.jar") - private val SPARK_FILES = Seq( - "hdfs://localhost:9000/app/files/file1.txt", "file:///app/files/file2.txt") - private val PYSPARK_FILES = Seq( - "hdfs://localhost:9000/app/files/file1.py", - "file:///app/files/file2.py", - "local:///app/files/file3.py", - "http://app/files/file4.py", - "file:///app/files/file5.py") - private val RESOLVED_PYSPARK_FILES = Seq( - "hdfs://localhost:9000/app/files/file1.py", - "/var/spark-data/spark-files/file2.py", - "local:///app/files/file3.py", - "http://app/files/file4.py") - private val PYSPARK_PRIMARY_FILE = "file:///app/files/file5.py" - private val RESOLVED_PYSPARK_PRIMARY_FILE = "/var/spark-data/spark-file/file5.py" - - private val RESOLVED_SPARK_FILES = Seq( - "hdfs://localhost:9000/app/files/file1.txt", "file:///var/data/spark-files/file2.txt") - private val INIT_CONTAINER_SECRET = new SecretBuilder() - .withNewMetadata() - .withName(INIT_CONTAINER_SECRET_NAME) - .endMetadata() - .withData(INIT_CONTAINER_SECRET_DATA.asJava) - .build() - private val CUSTOM_JAVA_OPTION_KEY = "myappoption" - private val CUSTOM_JAVA_OPTION_VALUE = "myappoptionvalue" - private val DRIVER_JAVA_OPTIONS = s"-D$CUSTOM_JAVA_OPTION_KEY=$CUSTOM_JAVA_OPTION_VALUE" - private val DRIVER_EXTRA_CLASSPATH = "/var/data/spark-app-custom/custom-jar.jar" - private val CONFIG_MAP_NAME = "config-map" - private val CONFIG_MAP_DATA = Map("config-map-key" -> "config-map-data") - private val INIT_CONTAINER_CONFIG_MAP = new ConfigMapBuilder() - .withNewMetadata() - .withName(CONFIG_MAP_NAME) - .endMetadata() - .withData(CONFIG_MAP_DATA.asJava) - .build() - private val CUSTOM_DRIVER_IMAGE = "spark-custom-driver:latest" - private val DRIVER_MEMORY_MB = 512 - private val DRIVER_MEMORY_OVERHEAD_MB = 128 - private val SPARK_CONF = new SparkConf(true) - .set(DRIVER_DOCKER_IMAGE, CUSTOM_DRIVER_IMAGE) - .set(org.apache.spark.internal.config.DRIVER_MEMORY, DRIVER_MEMORY_MB.toLong) - .set(KUBERNETES_DRIVER_MEMORY_OVERHEAD, DRIVER_MEMORY_OVERHEAD_MB.toLong) - .set(KUBERNETES_DRIVER_LABELS, s"$DEPRECATED_CUSTOM_LABEL_KEY=$DEPRECATED_CUSTOM_LABEL_VALUE") - .set(KUBERNETES_DRIVER_ANNOTATIONS, - s"$DEPRECATED_CUSTOM_ANNOTATION_KEY=$DEPRECATED_CUSTOM_ANNOTATION_VALUE") - .set(s"$KUBERNETES_DRIVER_LABEL_PREFIX$CUSTOM_LABEL_KEY", CUSTOM_LABEL_VALUE) - .set(s"$KUBERNETES_DRIVER_ANNOTATION_PREFIX$CUSTOM_ANNOTATION_KEY", CUSTOM_ANNOTATION_VALUE) - .set(org.apache.spark.internal.config.DRIVER_CLASS_PATH, DRIVER_EXTRA_CLASSPATH) - .set(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS, DRIVER_JAVA_OPTIONS) - private val EXECUTOR_INIT_CONF_KEY = "executor-init-conf" - private val SPARK_CONF_WITH_EXECUTOR_INIT_CONF = SPARK_CONF.clone() - .set(EXECUTOR_INIT_CONF_KEY, TRUE) - private val DRIVER_POD_UID = "driver-pod-uid" - private val DRIVER_POD_KIND = "pod" - private val DRIVER_POD_API_VERSION = "v1" - private val CREDENTIALS_SECRET_NAME = "credentials-secret" - private val CREDENTIALS_SECRET_DATA = Map("credentials-secret-key" -> "credentials-secret-value") - private val CREDENTIALS_SECRET = new SecretBuilder() - .withNewMetadata() - .withName(CREDENTIALS_SECRET_NAME) - .endMetadata() - .withData(CREDENTIALS_SECRET_DATA.asJava) - .build() - private val CREDENTIALS_SET_CONF = "spark.kubernetes.driverCredentials.provided" - private val CREDENTIALS_SET_ANNOTATION = "credentials-set" - - @Mock - private var containerLocalizedFilesResolver: ContainerLocalizedFilesResolver = _ - @Mock - private var executorInitContainerConfiguration: ExecutorInitContainerConfiguration = _ - @Mock - private var submittedDependencyUploader: SubmittedDependencyUploader = _ - @Mock - private var submittedDependenciesSecretBuilder: SubmittedDependencySecretBuilder = _ - @Mock - private var initContainerBootstrap: SparkPodInitContainerBootstrap = _ - @Mock - private var initContainerComponentsProvider: DriverInitContainerComponentsProvider = _ - @Mock - private var kubernetesClient: KubernetesClient = _ - @Mock - private var podOps: MixedOperation[ - Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]] = _ - private type ResourceListOps = NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable[ - HasMetadata, java.lang.Boolean] - @Mock - private var resourceListOps: ResourceListOps = _ - @Mock - private var credentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider = _ - @Mock - private var fileMounter: DriverPodKubernetesFileMounter = _ - @Mock - private var credentialsMounter: DriverPodKubernetesCredentialsMounter = _ - @Mock - private var loggingPodStatusWatcher: LoggingPodStatusWatcher = _ - @Mock - private var namedPodResource: PodResource[Pod, DoneablePod] = _ - @Mock - private var watch: Watch = _ - - before { - MockitoAnnotations.initMocks(this) - when(initContainerComponentsProvider.provideInitContainerBootstrap()) - .thenReturn(initContainerBootstrap) - when(submittedDependencyUploader.uploadJars()).thenReturn(JARS_RESOURCE) - when(submittedDependencyUploader.uploadFiles()).thenReturn(FILES_RESOURCE) - when(initContainerBootstrap - .bootstrapInitContainerAndVolumes(mockitoEq(DRIVER_CONTAINER_NAME), any())) - .thenAnswer(new Answer[PodBuilder] { - override def answer(invocationOnMock: InvocationOnMock): PodBuilder = { - invocationOnMock.getArgumentAt(1, classOf[PodBuilder]).editMetadata() - .addToAnnotations(BOOTSTRAPPED_POD_ANNOTATION, TRUE) - .endMetadata() - } - }) - when(initContainerComponentsProvider.provideContainerLocalizedFilesResolver( - any[String])).thenReturn(containerLocalizedFilesResolver) - when(initContainerComponentsProvider.provideDriverPodFileMounter()) - .thenReturn(fileMounter) - when(submittedDependenciesSecretBuilder.build()) - .thenReturn(INIT_CONTAINER_SECRET) - when(kubernetesClient.pods()).thenReturn(podOps) - when(podOps.create(any())).thenAnswer(new Answer[Pod] { - override def answer(invocation: InvocationOnMock): Pod = { - new PodBuilder(invocation.getArgumentAt(0, classOf[Pod])) - .editMetadata() - .withUid(DRIVER_POD_UID) - .endMetadata() - .withKind(DRIVER_POD_KIND) - .withApiVersion(DRIVER_POD_API_VERSION) - .build() - } - }) - when(podOps.withName(s"$APP_RESOURCE_PREFIX-driver")).thenReturn(namedPodResource) - when(fileMounter.addPySparkFiles( - mockitoEq(RESOLVED_PYSPARK_PRIMARY_FILE), - mockitoEq(RESOLVED_PYSPARK_FILES.mkString(",")), - any[String], - any())).thenAnswer( new Answer[PodBuilder] { - override def answer(invocation: InvocationOnMock) : PodBuilder = { - invocation.getArgumentAt(3, classOf[PodBuilder]) - .editMetadata() - .withUid(DRIVER_POD_UID) - .withName(s"$APP_RESOURCE_PREFIX-driver") - .addToLabels("pyspark-test", "true") - .endMetadata() - .withKind(DRIVER_POD_KIND) - .withApiVersion(DRIVER_POD_API_VERSION) - } - }) - when(namedPodResource.watch(loggingPodStatusWatcher)).thenReturn(watch) - when(containerLocalizedFilesResolver.resolveSubmittedAndRemoteSparkJars()) - .thenReturn(RESOLVED_SPARK_REMOTE_AND_LOCAL_JARS) - when(containerLocalizedFilesResolver.resolveSubmittedSparkJars()) - .thenReturn(RESOLVED_SPARK_JARS) - when(containerLocalizedFilesResolver.resolveSubmittedSparkFiles()) - .thenReturn(RESOLVED_SPARK_FILES) - when(containerLocalizedFilesResolver.resolvePrimaryResourceFile()) - .thenReturn(RESOLVED_PYSPARK_PRIMARY_FILE) - when(containerLocalizedFilesResolver.resolveSubmittedPySparkFiles()) - .thenReturn(RESOLVED_PYSPARK_FILES) - when(executorInitContainerConfiguration.configureSparkConfForExecutorInitContainer(SPARK_CONF)) - .thenReturn(SPARK_CONF_WITH_EXECUTOR_INIT_CONF) - when(kubernetesClient.resourceList(anyVararg[HasMetadata]())).thenReturn(resourceListOps) - when(credentialsMounterProvider.getDriverPodKubernetesCredentialsMounter()) - .thenReturn(credentialsMounter) - } - - test("Run with dependency uploader") { - expectationsForNoMountedCredentials() - when(initContainerComponentsProvider - .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) - .thenReturn(Some(submittedDependencyUploader)) - when(initContainerComponentsProvider - .provideSubmittedDependenciesSecretBuilder(Some(SUBMITTED_RESOURCES.secrets()))) - .thenReturn(Some(submittedDependenciesSecretBuilder)) - when(initContainerComponentsProvider.provideInitContainerBundle(mockitoEq( - Option(SUBMITTED_RESOURCES.ids())), - mockitoEq(RESOLVED_SPARK_JARS ++ RESOLVED_SPARK_FILES))) - .thenReturn(Option(InitContainerBundle(INIT_CONTAINER_CONFIG_MAP, - initContainerBootstrap, executorInitContainerConfiguration))) - runAndVerifyDriverPodHasCorrectProperties() - val resourceListArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata]) - verify(kubernetesClient).resourceList(resourceListArgumentCaptor.capture()) - val createdResources = resourceListArgumentCaptor.getAllValues.asScala - assert(createdResources.size === 2) - verifyCreatedResourcesHaveOwnerReferences(createdResources) - assert(createdResources.exists { - case secret: Secret => - secret.getMetadata.getName == INIT_CONTAINER_SECRET_NAME && - secret.getData.asScala == INIT_CONTAINER_SECRET_DATA - case _ => false - }) - verifyConfigMapWasCreated(createdResources) - verify(submittedDependencyUploader).uploadJars() - verify(submittedDependencyUploader).uploadFiles() - verify(initContainerComponentsProvider) - .provideSubmittedDependenciesSecretBuilder(Some(SUBMITTED_RESOURCES.secrets())) - } - - test("Run without dependency uploader") { - expectationsForNoMountedCredentials() - expectationsForNoDependencyUploader() - runAndVerifyDriverPodHasCorrectProperties() - val resourceListArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata]) - verify(kubernetesClient).resourceList(resourceListArgumentCaptor.capture()) - val createdResources = resourceListArgumentCaptor.getAllValues.asScala - assert(createdResources.size === 1) - verifyCreatedResourcesHaveOwnerReferences(createdResources) - verifyConfigMapWasCreated(createdResources) - verify(submittedDependencyUploader, times(0)).uploadJars() - verify(submittedDependencyUploader, times(0)).uploadFiles() - verify(initContainerComponentsProvider) - .provideSubmittedDependenciesSecretBuilder(None) - } - - test("Run with mounted credentials") { - expectationsForNoDependencyUploader() - when(credentialsMounter.createCredentialsSecret()).thenReturn(Some(CREDENTIALS_SECRET)) - when(credentialsMounter.mountDriverKubernetesCredentials( - any(), mockitoEq(DRIVER_CONTAINER_NAME), mockitoEq(Some(CREDENTIALS_SECRET)))) - .thenAnswer(new Answer[PodBuilder] { - override def answer(invocation: InvocationOnMock): PodBuilder = { - invocation.getArgumentAt(0, classOf[PodBuilder]).editMetadata() - .addToAnnotations(CREDENTIALS_SET_ANNOTATION, TRUE) - .endMetadata() - } - }) - when(credentialsMounter.setDriverPodKubernetesCredentialLocations(any())) - .thenAnswer(new Answer[SparkConf] { - override def answer(invocation: InvocationOnMock): SparkConf = { - invocation.getArgumentAt(0, classOf[SparkConf]).clone().set(CREDENTIALS_SET_CONF, TRUE) - } - }) - runAndVerifyPodMatchesPredicate { p => - Option(p) - .filter(pod => containerHasCorrectJvmOptions(pod, _(CREDENTIALS_SET_CONF) == TRUE)) - .exists { pod => - pod.getMetadata.getAnnotations.asScala(CREDENTIALS_SET_ANNOTATION) == TRUE - } - } - val resourceListArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata]) - verify(kubernetesClient).resourceList(resourceListArgumentCaptor.capture()) - val createdResources = resourceListArgumentCaptor.getAllValues.asScala - assert(createdResources.size === 2) - verifyCreatedResourcesHaveOwnerReferences(createdResources) - assert(createdResources.exists { - case secret: Secret => - secret.getMetadata.getName == CREDENTIALS_SECRET_NAME && - secret.getData.asScala == CREDENTIALS_SECRET_DATA - case _ => false - }) - } - - test("Waiting for completion should await completion on the status watcher.") { - expectationsForNoMountedCredentials() - expectationsForNoDependencyUploader() - new Client( - APP_NAME, - APP_RESOURCE_PREFIX, - APP_ID, - "", - None, - MAIN_CLASS, - SPARK_CONF, - APP_ARGS, - true, - kubernetesClient, - initContainerComponentsProvider, - credentialsMounterProvider, - loggingPodStatusWatcher).run() - verify(loggingPodStatusWatcher).awaitCompletion() - } - - test("Mounting environmental variables correctly onto Driver Pod for PySpark Jobs") { - expectationsForNoMountedCredentials() - expectationsForNoDependencyUploader() - expectationsForNoSparkJarsOrFiles() - runAndVerifyDriverPodHasCorrectPySparkProperties() - } - - private def expectationsForNoSparkJarsOrFiles(): Unit = { - when(containerLocalizedFilesResolver.resolveSubmittedSparkFiles()) - .thenReturn(Nil) - when(containerLocalizedFilesResolver.resolveSubmittedSparkJars()) - .thenReturn(Nil) - } - - private def expectationsForNoDependencyUploader(): Unit = { - when(initContainerComponentsProvider - .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) - .thenReturn(None) - when(initContainerComponentsProvider - .provideSubmittedDependenciesSecretBuilder(None)) - .thenReturn(None) - when(initContainerComponentsProvider.provideInitContainerBundle(mockitoEq(None), - mockitoEq(RESOLVED_SPARK_JARS ++ RESOLVED_SPARK_FILES))) - .thenReturn(Some(InitContainerBundle(INIT_CONTAINER_CONFIG_MAP, - initContainerBootstrap, executorInitContainerConfiguration))) - } - - private def expectationsForNoMountedCredentials(): Unit = { - when(credentialsMounter.setDriverPodKubernetesCredentialLocations(any())) - .thenAnswer(AdditionalAnswers.returnsFirstArg()) - when(credentialsMounter.createCredentialsSecret()).thenReturn(None) - when(credentialsMounter.mountDriverKubernetesCredentials( - any(), mockitoEq(DRIVER_CONTAINER_NAME), mockitoEq(None))) - .thenAnswer(AdditionalAnswers.returnsFirstArg()) - } - - private def verifyCreatedResourcesHaveOwnerReferences( - createdResources: mutable.Buffer[HasMetadata]): Unit = { - assert(createdResources.forall { resource => - val owners = resource.getMetadata.getOwnerReferences.asScala - owners.size === 1 && - owners.head.getController && - owners.head.getKind == DRIVER_POD_KIND && - owners.head.getUid == DRIVER_POD_UID && - owners.head.getName == s"$APP_RESOURCE_PREFIX-driver" && - owners.head.getApiVersion == DRIVER_POD_API_VERSION - }) - } - - private def verifyConfigMapWasCreated(createdResources: mutable.Buffer[HasMetadata]): Unit = { - assert(createdResources.exists { - case configMap: ConfigMap => - configMap.getMetadata.getName == CONFIG_MAP_NAME && - configMap.getData.asScala == CONFIG_MAP_DATA - case _ => false - }) - } - - private def runAndVerifyDriverPodHasCorrectProperties(): Unit = { - val expectedOptions = SPARK_CONF.getAll - .filterNot(_._1 == org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS.key) - .toMap ++ - Map( - "spark.app.id" -> APP_ID, - KUBERNETES_DRIVER_POD_NAME.key -> s"$APP_RESOURCE_PREFIX-driver", - KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key -> APP_RESOURCE_PREFIX, - EXECUTOR_INIT_CONF_KEY -> TRUE, - CUSTOM_JAVA_OPTION_KEY -> CUSTOM_JAVA_OPTION_VALUE, - "spark.jars" -> RESOLVED_SPARK_JARS.mkString(","), - "spark.files" -> RESOLVED_SPARK_FILES.mkString(",")) - runAndVerifyPodMatchesPredicate { p => - Option(p) - .filter(_.getMetadata.getName == s"$APP_RESOURCE_PREFIX-driver") - .filter(podHasCorrectAnnotations) - .filter(_.getMetadata.getLabels.asScala == ALL_EXPECTED_LABELS) - .filter(containerHasCorrectBasicContainerConfiguration) - .filter(containerHasCorrectBasicEnvs) - .filter(containerHasCorrectMountedClasspath) - .exists(pod => containerHasCorrectJvmOptions(pod, _ == expectedOptions)) - } - } - - private def runAndVerifyDriverPodHasCorrectPySparkProperties(): Unit = { - when(initContainerComponentsProvider.provideContainerLocalizedFilesResolver( - mockitoEq(PYSPARK_PRIMARY_FILE))).thenReturn(containerLocalizedFilesResolver) - when(initContainerComponentsProvider.provideInitContainerBundle( - any[Option[SubmittedResourceIds]], any[Iterable[String]])) - .thenReturn(Some(InitContainerBundle(INIT_CONTAINER_CONFIG_MAP, - initContainerBootstrap, executorInitContainerConfiguration))) - runAndVerifyPySparkPodMatchesPredicate { p => - Option(p).exists(pod => containerHasCorrectPySparkEnvs(pod)) - } - } - - private def runAndVerifyPodMatchesPredicate(pred: (Pod => Boolean)): Unit = { - new Client( - APP_NAME, - APP_RESOURCE_PREFIX, - APP_ID, - "", - None, - MAIN_CLASS, - SPARK_CONF, - APP_ARGS, - false, - kubernetesClient, - initContainerComponentsProvider, - credentialsMounterProvider, - loggingPodStatusWatcher).run() - val podMatcher = new BaseMatcher[Pod] { - override def matches(o: scala.Any): Boolean = { - o match { - case p: Pod => pred(p) - case _ => false - } - } - override def describeTo(description: Description): Unit = {} - } - verify(podOps).create(argThat(podMatcher)) - } - - private def containerHasCorrectJvmOptions( - pod: Pod, optionsCorrectnessPredicate: (Map[String, String] => Boolean)): Boolean = { - val driverContainer = pod.getSpec.getContainers.asScala.head - val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)) - envs.toMap.get(ENV_DRIVER_JAVA_OPTS).exists { javaOptions => - val splitOptions = javaOptions.split(" ") - splitOptions.forall(_.startsWith("-D")) && - optionsCorrectnessPredicate(splitOptions.map { option => - val withoutPrefix = option.substring(2) - (withoutPrefix.split("=", 2)(0), withoutPrefix.split("=", 2)(1)) - }.toMap) - } - } - - private def containerHasCorrectMountedClasspath(pod: Pod): Boolean = { - val driverContainer = pod.getSpec.getContainers.asScala.head - val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)) - envs.toMap.get(ENV_MOUNTED_CLASSPATH).exists { classpath => - val mountedClasspathEntities = classpath.split(File.pathSeparator) - mountedClasspathEntities.toSet == RESOLVED_SPARK_REMOTE_AND_LOCAL_JARS.toSet - } - } - - private def containerHasCorrectBasicEnvs(pod: Pod): Boolean = { - val driverContainer = pod.getSpec.getContainers.asScala.head - val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)) - val expectedBasicEnvs = Map( - ENV_SUBMIT_EXTRA_CLASSPATH -> DRIVER_EXTRA_CLASSPATH, - ENV_DRIVER_MEMORY -> s"${DRIVER_MEMORY_MB + DRIVER_MEMORY_OVERHEAD_MB}m", - ENV_DRIVER_MAIN_CLASS -> MAIN_CLASS, - ENV_DRIVER_ARGS -> APP_ARGS.mkString(" ")) - expectedBasicEnvs.toSet.subsetOf(envs.toSet) - } - - private def containerHasCorrectPySparkEnvs(pod: Pod): Boolean = { - val driverPodLabels = - pod.getMetadata.getLabels.asScala.map(env => (env._1.toString, env._2.toString)) - val expectedBasicLabels = Map( - "pyspark-test" -> "true", - "spark-role" -> "driver") - expectedBasicLabels.toSet.subsetOf(driverPodLabels.toSet) - } - - private def containerHasCorrectBasicContainerConfiguration(pod: Pod): Boolean = { - val containers = pod.getSpec.getContainers.asScala - containers.size == 1 && - containers.head.getName == DRIVER_CONTAINER_NAME && - containers.head.getImage == CUSTOM_DRIVER_IMAGE && - containers.head.getImagePullPolicy == "IfNotPresent" - } - - private def podHasCorrectAnnotations(pod: Pod): Boolean = { - val expectedAnnotations = Map( - DEPRECATED_CUSTOM_ANNOTATION_KEY -> DEPRECATED_CUSTOM_ANNOTATION_VALUE, - CUSTOM_ANNOTATION_KEY -> CUSTOM_ANNOTATION_VALUE, - SPARK_APP_NAME_ANNOTATION -> APP_NAME, - BOOTSTRAPPED_POD_ANNOTATION -> TRUE) - pod.getMetadata.getAnnotations.asScala == expectedAnnotations - } - - private def runAndVerifyPySparkPodMatchesPredicate(pred: (Pod => Boolean)): Unit = { - new Client( - APP_NAME, - APP_RESOURCE_PREFIX, - APP_ID, - PYSPARK_PRIMARY_FILE, - Option(new PythonSubmissionResourcesImpl(PYSPARK_PRIMARY_FILE, PYSPARK_APP_ARGS)), - MAIN_CLASS, - SPARK_CONF, - PYSPARK_APP_ARGS, - false, - kubernetesClient, - initContainerComponentsProvider, - credentialsMounterProvider, - loggingPodStatusWatcher).run() - val podMatcher = new BaseMatcher[Pod] { - override def matches(o: scala.Any): Boolean = { - o match { - case p: Pod => pred(p) - case _ => false - } - } - override def describeTo(description: Description): Unit = {} - } - verify(podOps).create(argThat(podMatcher)) - } -} - - diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala deleted file mode 100644 index 7e51abcd7b8e0..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import org.apache.spark.SparkFunSuite - -class ContainerLocalizedFilesResolverSuite extends SparkFunSuite { - private val SPARK_JARS = Seq( - "hdfs://localhost:9000/app/jars/jar1.jar", - "file:///app/jars/jar2.jar", - "local:///app/jars/jar3.jar", - "http://app/jars/jar4.jar") - private val SPARK_FILES = Seq( - "hdfs://localhost:9000/app/files/file1.txt", - "file:///app/files/file2.txt", - "local:///app/files/file3.txt", - "http://app/files/file4.txt") - private val PYSPARK_FILES = Seq( - "hdfs://localhost:9000/app/files/file1.py", - "file:///app/files/file2.py", - "local:///app/files/file3.py", - "http://app/files/file4.py", - "file:///app/files/file5.py") - private val JARS_DOWNLOAD_PATH = "/var/data/spark-jars" - private val FILES_DOWNLOAD_PATH = "/var/data/spark-files" - private val PYSPARK_PRIMARY_FILE = "file:///app/files/file5.py" - private val localizedFilesResolver = new ContainerLocalizedFilesResolverImpl( - SPARK_JARS, - SPARK_FILES, - PYSPARK_FILES, - PYSPARK_PRIMARY_FILE, - JARS_DOWNLOAD_PATH, - FILES_DOWNLOAD_PATH) - - test("Submitted and remote Spark jars should resolve non-local uris to download path.") { - val resolvedJars = localizedFilesResolver.resolveSubmittedAndRemoteSparkJars() - val expectedResolvedJars = Seq( - s"$JARS_DOWNLOAD_PATH/jar1.jar", - s"$JARS_DOWNLOAD_PATH/jar2.jar", - "/app/jars/jar3.jar", - s"$JARS_DOWNLOAD_PATH/jar4.jar") - assert(resolvedJars === expectedResolvedJars) - } - - test("Submitted Spark jars should resolve to the download path.") { - val resolvedJars = localizedFilesResolver.resolveSubmittedSparkJars() - val expectedResolvedJars = Seq( - "hdfs://localhost:9000/app/jars/jar1.jar", - s"$JARS_DOWNLOAD_PATH/jar2.jar", - "local:///app/jars/jar3.jar", - "http://app/jars/jar4.jar") - assert(resolvedJars === expectedResolvedJars) - } - - test("Submitted Spark files should resolve to the download path.") { - val resolvedFiles = localizedFilesResolver.resolveSubmittedSparkFiles() - val expectedResolvedFiles = Seq( - "hdfs://localhost:9000/app/files/file1.txt", - s"$FILES_DOWNLOAD_PATH/file2.txt", - "local:///app/files/file3.txt", - "http://app/files/file4.txt") - assert(resolvedFiles === expectedResolvedFiles) - } - test("Submitted PySpark files should resolve to the download path.") { - val resolvedPySparkFiles = localizedFilesResolver.resolveSubmittedPySparkFiles() - val expectedPySparkFiles = Seq( - "hdfs://localhost:9000/app/files/file1.py", - s"$FILES_DOWNLOAD_PATH/file2.py", - "local:///app/files/file3.py", - "http://app/files/file4.py") - assert(resolvedPySparkFiles === expectedPySparkFiles) - } - test("Submitted PySpark Primary resource should resolve to the download path.") { - val resolvedPySparkPrimary = - localizedFilesResolver.resolvePrimaryResourceFile() - val expectedPySparkPrimary = s"$FILES_DOWNLOAD_PATH/file5.py" - assert(resolvedPySparkPrimary === expectedPySparkPrimary) - } -} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverConfigurationStepsOrchestratorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverConfigurationStepsOrchestratorSuite.scala new file mode 100644 index 0000000000000..e4f221ad99cc5 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverConfigurationStepsOrchestratorSuite.scala @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.submit.submitsteps.{BaseDriverConfigurationStep, DependencyResolutionStep, DriverKubernetesCredentialsStep, InitContainerBootstrapStep, PythonStep} + +private[spark] class DriverConfigurationStepsOrchestratorSuite extends SparkFunSuite { + + private val NAMESPACE = "default" + private val APP_ID = "spark-app-id" + private val LAUNCH_TIME = 975256L + private val APP_NAME = "spark" + private val MAIN_CLASS = "org.apache.spark.examples.SparkPi" + private val APP_ARGS = Array("arg1", "arg2") + private val ADDITIONAL_PYTHON_FILES = Seq("local:///var/apps/python/py1.py") + + test("Base submission steps without an init-container or python files.") { + val sparkConf = new SparkConf(false) + .set("spark.jars", "local:///var/apps/jars/jar1.jar") + val mainAppResource = JavaMainAppResource("local:///var/apps/jars/main.jar") + val orchestrator = new DriverConfigurationStepsOrchestrator( + NAMESPACE, + APP_ID, + LAUNCH_TIME, + mainAppResource, + APP_NAME, + MAIN_CLASS, + APP_ARGS, + ADDITIONAL_PYTHON_FILES, + sparkConf) + val steps = orchestrator.getAllConfigurationSteps() + assert(steps.size === 3) + assert(steps(0).isInstanceOf[BaseDriverConfigurationStep]) + assert(steps(1).isInstanceOf[DriverKubernetesCredentialsStep]) + assert(steps(2).isInstanceOf[DependencyResolutionStep]) + } + + test("Submission steps with an init-container.") { + val sparkConf = new SparkConf(false) + .set("spark.jars", "hdfs://localhost:9000/var/apps/jars/jar1.jar") + val mainAppResource = JavaMainAppResource("local:///var/apps/jars/main.jar") + val orchestrator = new DriverConfigurationStepsOrchestrator( + NAMESPACE, + APP_ID, + LAUNCH_TIME, + mainAppResource, + APP_NAME, + MAIN_CLASS, + APP_ARGS, + ADDITIONAL_PYTHON_FILES, + sparkConf) + val steps = orchestrator.getAllConfigurationSteps() + assert(steps.size === 4) + assert(steps(0).isInstanceOf[BaseDriverConfigurationStep]) + assert(steps(1).isInstanceOf[DriverKubernetesCredentialsStep]) + assert(steps(2).isInstanceOf[DependencyResolutionStep]) + assert(steps(3).isInstanceOf[InitContainerBootstrapStep]) + } + + test("Submission steps with python files.") { + val sparkConf = new SparkConf(false) + val mainAppResource = PythonMainAppResource("local:///var/apps/python/main.py") + val orchestrator = new DriverConfigurationStepsOrchestrator( + NAMESPACE, + APP_ID, + LAUNCH_TIME, + mainAppResource, + APP_NAME, + MAIN_CLASS, + APP_ARGS, + ADDITIONAL_PYTHON_FILES, + sparkConf) + val steps = orchestrator.getAllConfigurationSteps() + assert(steps.size === 4) + assert(steps(0).isInstanceOf[BaseDriverConfigurationStep]) + assert(steps(1).isInstanceOf[DriverKubernetesCredentialsStep]) + assert(steps(2).isInstanceOf[DependencyResolutionStep]) + assert(steps(3).isInstanceOf[PythonStep]) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala deleted file mode 100644 index 2e0a7ba5098b2..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import io.fabric8.kubernetes.api.model.{PodBuilder, SecretBuilder} -import org.scalatest.prop.TableDrivenPropertyChecks -import scala.collection.JavaConverters._ - -import org.apache.spark.{SparkConf, SparkFunSuite} -import org.apache.spark.deploy.kubernetes.KubernetesCredentials -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ - -class DriverPodKubernetesCredentialsMounterSuite - extends SparkFunSuite with TableDrivenPropertyChecks { - - private val CLIENT_KEY_DATA = "client-key-data" - private val CLIENT_CERT_DATA = "client-cert-data" - private val OAUTH_TOKEN_DATA = "oauth-token" - private val CA_CERT_DATA = "ca-cert-data" - private val SUBMITTER_LOCAL_DRIVER_KUBERNETES_CREDENTIALS = KubernetesCredentials( - caCertDataBase64 = Some(CA_CERT_DATA), - clientKeyDataBase64 = Some(CLIENT_KEY_DATA), - clientCertDataBase64 = Some(CLIENT_CERT_DATA), - oauthTokenBase64 = Some(OAUTH_TOKEN_DATA)) - private val APP_ID = "app-id" - private val USER_SPECIFIED_CLIENT_KEY_FILE = Some("/var/data/client-key.pem") - private val USER_SPECIFIED_CLIENT_CERT_FILE = Some("/var/data/client-cert.pem") - private val USER_SPECIFIED_OAUTH_TOKEN_FILE = Some("/var/data/token.txt") - private val USER_SPECIFIED_CA_CERT_FILE = Some("/var/data/ca.pem") - - // Different configurations of credentials mounters - private val credentialsMounterWithPreMountedFiles = - new DriverPodKubernetesCredentialsMounterImpl( - kubernetesAppId = APP_ID, - submitterLocalDriverPodKubernetesCredentials = SUBMITTER_LOCAL_DRIVER_KUBERNETES_CREDENTIALS, - maybeUserSpecifiedMountedClientKeyFile = USER_SPECIFIED_CLIENT_KEY_FILE, - maybeUserSpecifiedMountedClientCertFile = USER_SPECIFIED_CLIENT_CERT_FILE, - maybeUserSpecifiedMountedOAuthTokenFile = USER_SPECIFIED_OAUTH_TOKEN_FILE, - maybeUserSpecifiedMountedCaCertFile = USER_SPECIFIED_CA_CERT_FILE) - private val credentialsMounterWithoutPreMountedFiles = - new DriverPodKubernetesCredentialsMounterImpl( - kubernetesAppId = APP_ID, - submitterLocalDriverPodKubernetesCredentials = SUBMITTER_LOCAL_DRIVER_KUBERNETES_CREDENTIALS, - maybeUserSpecifiedMountedClientKeyFile = None, - maybeUserSpecifiedMountedClientCertFile = None, - maybeUserSpecifiedMountedOAuthTokenFile = None, - maybeUserSpecifiedMountedCaCertFile = None) - private val credentialsMounterWithoutAnyDriverCredentials = - new DriverPodKubernetesCredentialsMounterImpl( - APP_ID, KubernetesCredentials(None, None, None, None), None, None, None, None) - - // Test matrices - private val TEST_MATRIX_EXPECTED_SPARK_CONFS = Table( - ("Credentials Mounter Implementation", - "Expected client key file", - "Expected client cert file", - "Expected CA Cert file", - "Expected OAuth Token File"), - (credentialsMounterWithoutAnyDriverCredentials, - None, - None, - None, - None), - (credentialsMounterWithoutPreMountedFiles, - Some(DRIVER_CREDENTIALS_CLIENT_KEY_PATH), - Some(DRIVER_CREDENTIALS_CLIENT_CERT_PATH), - Some(DRIVER_CREDENTIALS_CA_CERT_PATH), - Some(DRIVER_CREDENTIALS_OAUTH_TOKEN_PATH)), - (credentialsMounterWithPreMountedFiles, - USER_SPECIFIED_CLIENT_KEY_FILE, - USER_SPECIFIED_CLIENT_CERT_FILE, - USER_SPECIFIED_CA_CERT_FILE, - USER_SPECIFIED_OAUTH_TOKEN_FILE)) - - private val TEST_MATRIX_EXPECTED_CREDENTIALS_SECRET = Table( - ("Credentials Mounter Implementation", "Expected Credentials Secret Data"), - (credentialsMounterWithoutAnyDriverCredentials, None), - (credentialsMounterWithoutPreMountedFiles, - Some(KubernetesSecretNameAndData( - data = Map[String, String]( - DRIVER_CREDENTIALS_CLIENT_KEY_SECRET_NAME -> CLIENT_KEY_DATA, - DRIVER_CREDENTIALS_CLIENT_CERT_SECRET_NAME -> CLIENT_CERT_DATA, - DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME -> CA_CERT_DATA, - DRIVER_CREDENTIALS_OAUTH_TOKEN_SECRET_NAME -> OAUTH_TOKEN_DATA - ), - name = s"$APP_ID-kubernetes-credentials"))), - (credentialsMounterWithPreMountedFiles, None)) - - test("Credentials mounter should set the driver's Kubernetes credentials locations") { - forAll(TEST_MATRIX_EXPECTED_SPARK_CONFS) { - case (credentialsMounter, - expectedClientKeyFile, - expectedClientCertFile, - expectedCaCertFile, - expectedOAuthTokenFile) => - val baseSparkConf = new SparkConf() - val resolvedSparkConf = - credentialsMounter.setDriverPodKubernetesCredentialLocations(baseSparkConf) - assert(resolvedSparkConf.getOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX") === - expectedClientKeyFile) - assert(resolvedSparkConf.getOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX") === - expectedClientCertFile) - assert(resolvedSparkConf.getOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX") === - expectedCaCertFile) - assert(resolvedSparkConf.getOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX") === - expectedOAuthTokenFile) - } - } - - test("Credentials mounter should create the correct credentials secret.") { - forAll(TEST_MATRIX_EXPECTED_CREDENTIALS_SECRET) { - case (credentialsMounter, expectedSecretNameAndData) => - val builtSecret = credentialsMounter.createCredentialsSecret() - val secretNameAndData = builtSecret.map { secret => - KubernetesSecretNameAndData(secret.getMetadata.getName, secret.getData.asScala.toMap) - } - assert(secretNameAndData === expectedSecretNameAndData) - } - } - - test("When credentials secret is provided, driver pod should mount the secret volume.") { - val credentialsSecret = new SecretBuilder() - .withNewMetadata().withName("secret").endMetadata() - .addToData("secretKey", "secretValue") - .build() - val originalPodSpec = new PodBuilder() - .withNewMetadata().withName("pod").endMetadata() - .withNewSpec() - .addNewContainer() - .withName("container") - .endContainer() - .endSpec() - val podSpecWithMountedDriverKubernetesCredentials = - credentialsMounterWithoutPreMountedFiles.mountDriverKubernetesCredentials( - originalPodSpec, "container", Some(credentialsSecret)).build() - val volumes = podSpecWithMountedDriverKubernetesCredentials.getSpec.getVolumes.asScala - assert(volumes.exists(_.getName == DRIVER_CREDENTIALS_SECRET_VOLUME_NAME)) - volumes.find(_.getName == DRIVER_CREDENTIALS_SECRET_VOLUME_NAME).foreach { secretVolume => - assert(secretVolume.getSecret != null && secretVolume.getSecret.getSecretName == "secret") - } - } - - test("When credentials secret is absent, driver pod should not be changed.") { - val originalPodSpec = new PodBuilder() - val nonAdjustedPodSpec = - credentialsMounterWithoutAnyDriverCredentials.mountDriverKubernetesCredentials( - originalPodSpec, "driver", None) - assert(nonAdjustedPodSpec === originalPodSpec) - } -} - -private case class KubernetesSecretNameAndData(name: String, data: Map[String, String]) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfigurationSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfigurationSuite.scala deleted file mode 100644 index ead1d49b8a37c..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfigurationSuite.scala +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import org.apache.spark.{SparkConf, SparkFunSuite} -import org.apache.spark.deploy.kubernetes.config._ - -class ExecutorInitContainerConfigurationSuite extends SparkFunSuite { - - private val SECRET_NAME = "init-container-secret" - private val SECRET_MOUNT_DIR = "/mnt/secrets/spark" - private val CONFIG_MAP_NAME = "spark-config-map" - private val CONFIG_MAP_KEY = "spark-config-map-key" - - test("Not passing a secret name should not set the secret value.") { - val baseSparkConf = new SparkConf(false) - val configurationUnderTest = new ExecutorInitContainerConfigurationImpl( - None, - SECRET_MOUNT_DIR, - CONFIG_MAP_NAME, - CONFIG_MAP_KEY) - val resolvedSparkConf = configurationUnderTest - .configureSparkConfForExecutorInitContainer(baseSparkConf) - assert(resolvedSparkConf.get(EXECUTOR_INIT_CONTAINER_CONFIG_MAP).contains(CONFIG_MAP_NAME)) - assert(resolvedSparkConf.get(EXECUTOR_INIT_CONTAINER_CONFIG_MAP_KEY).contains(CONFIG_MAP_KEY)) - assert(resolvedSparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET_MOUNT_DIR) - .contains(SECRET_MOUNT_DIR)) - assert(resolvedSparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET).isEmpty) - } - - test("Passing a secret name should set the secret value.") { - val baseSparkConf = new SparkConf(false) - val configurationUnderTest = new ExecutorInitContainerConfigurationImpl( - Some(SECRET_NAME), - SECRET_MOUNT_DIR, - CONFIG_MAP_NAME, - CONFIG_MAP_KEY) - val resolvedSparkConf = configurationUnderTest - .configureSparkConfForExecutorInitContainer(baseSparkConf) - assert(resolvedSparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET).contains(SECRET_NAME)) - } -} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResourcesSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResourcesSuite.scala deleted file mode 100644 index 9b60b7ef2b786..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResourcesSuite.scala +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import org.apache.spark.{SSLOptions, SparkConf, SparkFunSuite} -import org.apache.spark.deploy.kubernetes.config._ - -import scala.collection.JavaConverters._ -import io.fabric8.kubernetes.api.model.{ContainerBuilder, Pod, PodBuilder} -import org.mockito.{Mock, MockitoAnnotations} -import org.mockito.Mockito.when -import org.scalatest.BeforeAndAfter - -private[spark] class PythonSubmissionResourcesSuite extends SparkFunSuite with BeforeAndAfter { - private val PYSPARK_FILES = Seq( - "hdfs://localhost:9000/app/files/file1.py", - "file:///app/files/file2.py", - "local:///app/files/file3.py", - "http://app/files/file4.py", - "file:///app/files/file5.py") - private val RESOLVED_PYSPARK_FILES = Seq( - "hdfs://localhost:9000/app/files/file1.py", - "/var/spark-data/spark-files/file2.py", - "local:///app/file`s/file3.py", - "http://app/files/file4.py") - private val PYSPARK_PRIMARY_FILE = "file:///app/files/file5.py" - private val RESOLVED_PYSPARK_PRIMARY_FILE = "/var/data/spark-files/file5.py" - - private val pyFilesResource = new PythonSubmissionResourcesImpl( - PYSPARK_PRIMARY_FILE, Array(PYSPARK_FILES.mkString(","), "500") - ) - private val pyResource = new PythonSubmissionResourcesImpl( - PYSPARK_PRIMARY_FILE, Array(null, "500") - ) - private val DRIVER_CONTAINER_NAME = "pyspark_container" - private val driverContainer = new ContainerBuilder() - .withName(DRIVER_CONTAINER_NAME) - .build() - private val basePodBuilder = new PodBuilder() - .withNewMetadata() - .withName("base_pod") - .endMetadata() - .withNewSpec() - .addToContainers(driverContainer) - .endSpec() - - @Mock - private var driverInitContainer: DriverInitContainerComponentsProviderImpl = _ - @Mock - private var localizedFileResolver: ContainerLocalizedFilesResolverImpl = _ - before { - MockitoAnnotations.initMocks(this) - when(driverInitContainer.provideDriverPodFileMounter()).thenReturn( - new DriverPodKubernetesFileMounterImpl() - ) - when(localizedFileResolver.resolvePrimaryResourceFile()).thenReturn( - RESOLVED_PYSPARK_PRIMARY_FILE) - } - test("Test with --py-files included") { - assert(pyFilesResource.sparkJars === Seq.empty[String]) - assert(pyFilesResource.pySparkFiles === - PYSPARK_PRIMARY_FILE +: PYSPARK_FILES) - assert(pyFilesResource.primaryPySparkResource(localizedFileResolver) === - RESOLVED_PYSPARK_PRIMARY_FILE) - val driverPod: Pod = pyFilesResource.driverPodWithPySparkEnvs( - driverInitContainer.provideDriverPodFileMounter(), - RESOLVED_PYSPARK_PRIMARY_FILE, - RESOLVED_PYSPARK_FILES.mkString(","), - DRIVER_CONTAINER_NAME, - basePodBuilder - ) - val driverContainer = driverPod.getSpec.getContainers.asScala.head - val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)).toMap - envs.get("PYSPARK_PRIMARY") foreach{ a => assert (a === RESOLVED_PYSPARK_PRIMARY_FILE) } - envs.get("PYSPARK_FILES") foreach{ a => assert (a === RESOLVED_PYSPARK_FILES.mkString(",")) } - } - - test("Test without --py-files") { - assert(pyResource.sparkJars === Seq.empty[String]) - assert(pyResource.pySparkFiles === Array(PYSPARK_PRIMARY_FILE)) - assert(pyResource.primaryPySparkResource(localizedFileResolver) === - RESOLVED_PYSPARK_PRIMARY_FILE) - val driverPod: Pod = pyResource.driverPodWithPySparkEnvs( - driverInitContainer.provideDriverPodFileMounter(), - RESOLVED_PYSPARK_PRIMARY_FILE, - "", - DRIVER_CONTAINER_NAME, - basePodBuilder - ) - val driverContainer = driverPod.getSpec.getContainers.asScala.head - val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)).toMap - envs.get("PYSPARK_PRIMARY") foreach{ a => assert (a === RESOLVED_PYSPARK_PRIMARY_FILE) } - envs.get("PYSPARK_FILES") foreach{ a => assert (a === "") } - } -} \ No newline at end of file diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilderSuite.scala deleted file mode 100644 index f1e1ff7013496..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilderSuite.scala +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import java.io.StringReader -import java.util.Properties - -import com.google.common.collect.Maps -import org.mockito.Mockito.{verify, when} -import org.scalatest.BeforeAndAfter -import org.scalatest.mock.MockitoSugar._ -import scala.collection.JavaConverters._ - -import org.apache.spark.SparkFunSuite -import org.apache.spark.deploy.kubernetes.config._ - -class SparkInitContainerConfigMapBuilderSuite extends SparkFunSuite with BeforeAndAfter { - - private val JARS = Seq( - "hdfs://localhost:9000/app/jars/jar1.jar", - "file:///app/jars/jar2.jar", - "http://localhost:9000/app/jars/jar3.jar", - "local:///app/jars/jar4.jar") - private val FILES = Seq( - "hdfs://localhost:9000/app/files/file1.txt", - "file:///app/files/file2.txt", - "http://localhost:9000/app/files/file3.txt", - "local:///app/files/file4.txt") - private val JARS_DOWNLOAD_PATH = "/var/data/jars" - private val FILES_DOWNLOAD_PATH = "/var/data/files" - private val CONFIG_MAP_NAME = "config-map" - private val CONFIG_MAP_KEY = "config-map-key" - - test("Config map without submitted dependencies sets remote download configurations") { - val configMap = new SparkInitContainerConfigMapBuilderImpl( - JARS, - FILES, - JARS_DOWNLOAD_PATH, - FILES_DOWNLOAD_PATH, - CONFIG_MAP_NAME, - CONFIG_MAP_KEY, - None).build() - assert(configMap.getMetadata.getName === CONFIG_MAP_NAME) - val maybeConfigValue = configMap.getData.asScala.get(CONFIG_MAP_KEY) - assert(maybeConfigValue.isDefined) - maybeConfigValue.foreach { configValue => - val propertiesStringReader = new StringReader(configValue) - val properties = new Properties() - properties.load(propertiesStringReader) - val propertiesMap = Maps.fromProperties(properties).asScala - val remoteJarsString = propertiesMap.get(INIT_CONTAINER_REMOTE_JARS.key) - assert(remoteJarsString.isDefined) - val remoteJars = remoteJarsString.map(_.split(",")).toSet.flatten - assert(remoteJars === - Set("hdfs://localhost:9000/app/jars/jar1.jar", "http://localhost:9000/app/jars/jar3.jar")) - val remoteFilesString = propertiesMap.get(INIT_CONTAINER_REMOTE_FILES.key) - assert(remoteFilesString.isDefined) - val remoteFiles = remoteFilesString.map(_.split(",")).toSet.flatten - assert(remoteFiles === - Set("hdfs://localhost:9000/app/files/file1.txt", - "http://localhost:9000/app/files/file3.txt")) - assert(propertiesMap(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION.key) === JARS_DOWNLOAD_PATH) - assert(propertiesMap(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION.key) === FILES_DOWNLOAD_PATH) - } - } - - test("Config map with submitted dependencies adds configurations from plugin") { - val submittedDependenciesPlugin = mock[SubmittedDependencyInitContainerConfigPlugin] - when(submittedDependenciesPlugin.configurationsToFetchSubmittedDependencies()) - .thenReturn(Map("customConf" -> "customConfValue")) - val configMap = new SparkInitContainerConfigMapBuilderImpl( - JARS, - FILES, - JARS_DOWNLOAD_PATH, - FILES_DOWNLOAD_PATH, - CONFIG_MAP_NAME, - CONFIG_MAP_KEY, - Some(submittedDependenciesPlugin)).build() - val configValue = configMap.getData.asScala(CONFIG_MAP_KEY) - val propertiesStringReader = new StringReader(configValue) - val properties = new Properties() - properties.load(propertiesStringReader) - val propertiesMap = Maps.fromProperties(properties).asScala - assert(propertiesMap("customConf") === "customConfValue") - verify(submittedDependenciesPlugin).configurationsToFetchSubmittedDependencies() - } -} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPluginSuite.scala deleted file mode 100644 index 8431b77c9e85f..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPluginSuite.scala +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import org.apache.spark.SparkFunSuite -import org.apache.spark.deploy.kubernetes.config._ - -class SubmittedDependencyInitContainerConfigPluginSuite extends SparkFunSuite { - private val STAGING_SERVER_URI = "http://localhost:9000" - private val STAGING_SERVER_INTERNAL_URI = "http://internalHost:9000" - private val JARS_RESOURCE_ID = "jars-id" - private val FILES_RESOURCE_ID = "files-id" - private val JARS_SECRET_KEY = "jars" - private val FILES_SECRET_KEY = "files" - private val TRUSTSTORE_SECRET_KEY = "trustStore" - private val CLIENT_CERT_SECRET_KEY = "client-cert" - private val SECRETS_VOLUME_MOUNT_PATH = "/var/data" - private val TRUSTSTORE_PASSWORD = "trustStore" - private val TRUSTSTORE_FILE = "/mnt/secrets/trustStore.jks" - private val CLIENT_CERT_URI = "local:///mnt/secrets/client-cert.pem" - private val TRUSTSTORE_TYPE = "jks" - - test("Plugin should provide configuration for fetching uploaded dependencies") { - val configPluginUnderTest = new SubmittedDependencyInitContainerConfigPluginImpl( - STAGING_SERVER_URI, - JARS_RESOURCE_ID, - FILES_RESOURCE_ID, - JARS_SECRET_KEY, - FILES_SECRET_KEY, - TRUSTSTORE_SECRET_KEY, - CLIENT_CERT_SECRET_KEY, - false, - None, - None, - None, - None, - SECRETS_VOLUME_MOUNT_PATH) - val addedConfigurations = configPluginUnderTest.configurationsToFetchSubmittedDependencies() - val expectedConfigurations = Map( - RESOURCE_STAGING_SERVER_URI.key -> STAGING_SERVER_URI, - INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> JARS_RESOURCE_ID, - INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> FILES_RESOURCE_ID, - INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> - s"$SECRETS_VOLUME_MOUNT_PATH/$JARS_SECRET_KEY", - INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> - s"$SECRETS_VOLUME_MOUNT_PATH/$FILES_SECRET_KEY", - RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> "false") - assert(addedConfigurations === expectedConfigurations) - } - - test("Plugin should set up SSL with the appropriate trustStore if it's provided.") { - val configPluginUnderTest = new SubmittedDependencyInitContainerConfigPluginImpl( - STAGING_SERVER_URI, - JARS_RESOURCE_ID, - FILES_RESOURCE_ID, JARS_SECRET_KEY, - FILES_SECRET_KEY, - TRUSTSTORE_SECRET_KEY, - CLIENT_CERT_SECRET_KEY, - true, - Some(TRUSTSTORE_FILE), - Some(CLIENT_CERT_URI), - Some(TRUSTSTORE_PASSWORD), - Some(TRUSTSTORE_TYPE), - SECRETS_VOLUME_MOUNT_PATH) - val addedConfigurations = configPluginUnderTest.configurationsToFetchSubmittedDependencies() - val expectedSslConfigurations = Map( - RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> "true", - RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key -> - s"$SECRETS_VOLUME_MOUNT_PATH/$TRUSTSTORE_SECRET_KEY", - RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key -> TRUSTSTORE_PASSWORD, - RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key -> TRUSTSTORE_TYPE, - RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM.key -> "/mnt/secrets/client-cert.pem") - assert(expectedSslConfigurations.toSet.subsetOf(addedConfigurations.toSet)) - } -} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilderSuite.scala deleted file mode 100644 index 83fd568e7a3aa..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilderSuite.scala +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import java.io.File - -import com.google.common.base.Charsets -import com.google.common.io.{BaseEncoding, Files} -import io.fabric8.kubernetes.api.model.Secret -import scala.collection.JavaConverters._ -import scala.collection.Map - -import org.apache.spark.SparkFunSuite -import org.apache.spark.util.Utils - -class SubmittedDependencySecretBuilderSuite extends SparkFunSuite { - - private val SECRET_NAME = "submitted-dependency-secret" - private val JARS_SECRET = "jars-secret" - private val FILES_SECRET = "files-secret" - private val JARS_SECRET_KEY = "jars-secret-key" - private val FILES_SECRET_KEY = "files-secret-key" - private val TRUSTSTORE_SECRET_KEY = "truststore-secret-key" - private val CLIENT_CERT_SECRET_KEY = "client-cert" - private val TRUSTSTORE_STRING_CONTENTS = "trustStore-contents" - private val CLIENT_CERT_STRING_CONTENTS = "client-certificate-contents" - - test("Building the secret without a trustStore") { - val builder = new SubmittedDependencySecretBuilderImpl( - SECRET_NAME, - JARS_SECRET, - FILES_SECRET, - JARS_SECRET_KEY, - FILES_SECRET_KEY, - TRUSTSTORE_SECRET_KEY, - CLIENT_CERT_SECRET_KEY, - None, - None) - val secret = builder.build() - assert(secret.getMetadata.getName === SECRET_NAME) - val secretDecodedData = decodeSecretData(secret) - val expectedSecretData = Map(JARS_SECRET_KEY -> JARS_SECRET, FILES_SECRET_KEY -> FILES_SECRET) - assert(secretDecodedData === expectedSecretData) - } - - private def decodeSecretData(secret: Secret): Map[String, String] = { - val secretData = secret.getData.asScala - secretData.mapValues(encoded => - new String(BaseEncoding.base64().decode(encoded), Charsets.UTF_8)) - } - - test("Building the secret with a trustStore") { - val tempSslDir = Utils.createTempDir(namePrefix = "temp-ssl-tests") - try { - val trustStoreFile = new File(tempSslDir, "trustStore.jks") - Files.write(TRUSTSTORE_STRING_CONTENTS, trustStoreFile, Charsets.UTF_8) - val clientCertFile = new File(tempSslDir, "cert.pem") - Files.write(CLIENT_CERT_STRING_CONTENTS, clientCertFile, Charsets.UTF_8) - val builder = new SubmittedDependencySecretBuilderImpl( - SECRET_NAME, - JARS_SECRET, - FILES_SECRET, - JARS_SECRET_KEY, - FILES_SECRET_KEY, - TRUSTSTORE_SECRET_KEY, - CLIENT_CERT_SECRET_KEY, - Some(trustStoreFile.getAbsolutePath), - Some(clientCertFile.getAbsolutePath)) - val secret = builder.build() - val decodedSecretData = decodeSecretData(secret) - assert(decodedSecretData(TRUSTSTORE_SECRET_KEY) === TRUSTSTORE_STRING_CONTENTS) - assert(decodedSecretData(CLIENT_CERT_SECRET_KEY) === CLIENT_CERT_STRING_CONTENTS) - } finally { - tempSslDir.delete() - } - } - - test("If trustStore and certificate are container-local, don't add secret entries") { - val builder = new SubmittedDependencySecretBuilderImpl( - SECRET_NAME, - JARS_SECRET, - FILES_SECRET, - JARS_SECRET_KEY, - FILES_SECRET_KEY, - TRUSTSTORE_SECRET_KEY, - CLIENT_CERT_SECRET_KEY, - Some("local:///mnt/secrets/trustStore.jks"), - Some("local:///mnt/secrets/cert.pem")) - val secret = builder.build() - val decodedSecretData = decodeSecretData(secret) - assert(!decodedSecretData.contains(TRUSTSTORE_SECRET_KEY)) - assert(!decodedSecretData.contains(CLIENT_CERT_SECRET_KEY)) - } - -} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStepSuite.scala new file mode 100644 index 0000000000000..c7d80a16a1532 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStepSuite.scala @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata, PodBuilder} +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ + +private[spark] class BaseDriverConfigurationStepSuite extends SparkFunSuite { + + private val APP_ID = "spark-app-id" + private val RESOURCE_NAME_PREFIX = "spark" + private val DRIVER_LABELS = Map("labelkey" -> "labelvalue") + private val DOCKER_IMAGE_PULL_POLICY = "IfNotPresent" + private val APP_NAME = "spark-test" + private val MAIN_CLASS = "org.apache.spark.examples.SparkPi" + private val APP_ARGS = Array("arg1", "arg2") + private val CUSTOM_ANNOTATION_KEY = "customAnnotation" + private val CUSTOM_ANNOTATION_VALUE = "customAnnotationValue" + private val DEPRECATED_CUSTOM_ANNOTATION_KEY = "customAnnotationDeprecated" + private val DEPRECATED_CUSTOM_ANNOTATION_VALUE = "customAnnotationDeprecatedValue" + + test("Set all possible configurations from the user.") { + val sparkConf = new SparkConf() + .set(KUBERNETES_DRIVER_POD_NAME, "spark-driver-pod") + .set(org.apache.spark.internal.config.DRIVER_CLASS_PATH, "/opt/spark/spark-exmaples.jar") + .set("spark.driver.cores", "2") + .set(KUBERNETES_DRIVER_LIMIT_CORES, "4") + .set(org.apache.spark.internal.config.DRIVER_MEMORY, 256L) + .set(KUBERNETES_DRIVER_MEMORY_OVERHEAD, 200L) + .set(DRIVER_DOCKER_IMAGE, "spark-driver:latest") + .set(s"spark.kubernetes.driver.annotation.$CUSTOM_ANNOTATION_KEY", CUSTOM_ANNOTATION_VALUE) + .set("spark.kubernetes.driver.annotations", + s"$DEPRECATED_CUSTOM_ANNOTATION_KEY=$DEPRECATED_CUSTOM_ANNOTATION_VALUE") + val submissionStep = new BaseDriverConfigurationStep( + APP_ID, + RESOURCE_NAME_PREFIX, + DRIVER_LABELS, + DOCKER_IMAGE_PULL_POLICY, + APP_NAME, + MAIN_CLASS, + APP_ARGS, + sparkConf) + val basePod = new PodBuilder().withNewMetadata().endMetadata().withNewSpec().endSpec().build() + val baseDriverSpec = KubernetesDriverSpec( + driverPod = basePod, + driverContainer = new ContainerBuilder().build(), + driverSparkConf = new SparkConf(false), + otherKubernetesResources = Seq.empty[HasMetadata]) + + val preparedDriverSpec = submissionStep.configureDriver(baseDriverSpec) + assert(preparedDriverSpec.driverContainer.getName === DRIVER_CONTAINER_NAME) + assert(preparedDriverSpec.driverContainer.getImage === "spark-driver:latest") + assert(preparedDriverSpec.driverContainer.getImagePullPolicy === DOCKER_IMAGE_PULL_POLICY) + val envs = preparedDriverSpec.driverContainer + .getEnv + .asScala + .map(env => (env.getName, env.getValue)) + .toMap + assert(envs.size === 4) + assert(envs(ENV_SUBMIT_EXTRA_CLASSPATH) === "/opt/spark/spark-exmaples.jar") + assert(envs(ENV_DRIVER_MEMORY) === "456m") + assert(envs(ENV_DRIVER_MAIN_CLASS) === MAIN_CLASS) + assert(envs(ENV_DRIVER_ARGS) === "arg1 arg2") + val resourceRequirements = preparedDriverSpec.driverContainer.getResources + val requests = resourceRequirements.getRequests.asScala + assert(requests("cpu").getAmount === "2") + assert(requests("memory").getAmount === "256M") + val limits = resourceRequirements.getLimits.asScala + assert(limits("memory").getAmount === "456M") + assert(limits("cpu").getAmount === "4") + val driverPodMetadata = preparedDriverSpec.driverPod.getMetadata + assert(driverPodMetadata.getName === "spark-driver-pod") + assert(driverPodMetadata.getLabels.asScala === DRIVER_LABELS) + val expectedAnnotations = Map( + CUSTOM_ANNOTATION_KEY -> CUSTOM_ANNOTATION_VALUE, + DEPRECATED_CUSTOM_ANNOTATION_KEY -> DEPRECATED_CUSTOM_ANNOTATION_VALUE, + SPARK_APP_NAME_ANNOTATION -> APP_NAME) + assert(driverPodMetadata.getAnnotations.asScala === expectedAnnotations) + assert(preparedDriverSpec.driverPod.getSpec.getRestartPolicy === "Never") + val resolvedSparkConf = preparedDriverSpec.driverSparkConf.getAll.toMap + val expectedSparkConf = Map( + KUBERNETES_DRIVER_POD_NAME.key -> "spark-driver-pod", + "spark.app.id" -> APP_ID, + KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key -> RESOURCE_NAME_PREFIX) + assert(resolvedSparkConf === expectedSparkConf) + + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DependencyResolutionStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DependencyResolutionStepSuite.scala new file mode 100644 index 0000000000000..3f7ec61074b0c --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DependencyResolutionStepSuite.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +import java.io.File + +import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata, PodBuilder} +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.constants._ + +private[spark] class DependencyResolutionStepSuite extends SparkFunSuite { + + private val SPARK_JARS = Seq( + "hdfs://localhost:9000/apps/jars/jar1.jar", + "file:///home/user/apps/jars/jar2.jar", + "local:///var/apps/jars/jar3.jar") + + private val SPARK_FILES = Seq( + "file:///home/user/apps/files/file1.txt", + "hdfs://localhost:9000/apps/files/file2.txt", + "local:///var/apps/files/file3.txt") + + private val JARS_DOWNLOAD_PATH = "/mnt/spark-data/jars" + private val FILES_DOWNLOAD_PATH = "/mnt/spark-data/files" + + test("Added dependencies should be resolved in Spark configuration and environment") { + val dependencyResolutionStep = new DependencyResolutionStep( + SPARK_JARS, + SPARK_FILES, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH) + val driverPod = new PodBuilder().build() + val baseDriverSpec = KubernetesDriverSpec( + driverPod = driverPod, + driverContainer = new ContainerBuilder().build(), + driverSparkConf = new SparkConf(false), + otherKubernetesResources = Seq.empty[HasMetadata]) + val preparedDriverSpec = dependencyResolutionStep.configureDriver(baseDriverSpec) + assert(preparedDriverSpec.driverPod === driverPod) + assert(preparedDriverSpec.otherKubernetesResources.isEmpty) + val resolvedSparkJars = preparedDriverSpec.driverSparkConf.get("spark.jars").split(",").toSet + val expectedResolvedSparkJars = Set( + "hdfs://localhost:9000/apps/jars/jar1.jar", + s"$JARS_DOWNLOAD_PATH/jar2.jar", + "/var/apps/jars/jar3.jar") + assert(resolvedSparkJars === expectedResolvedSparkJars) + val resolvedSparkFiles = preparedDriverSpec.driverSparkConf.get("spark.files").split(",").toSet + val expectedResolvedSparkFiles = Set( + s"$FILES_DOWNLOAD_PATH/file1.txt", + s"hdfs://localhost:9000/apps/files/file2.txt", + s"/var/apps/files/file3.txt") + assert(resolvedSparkFiles === expectedResolvedSparkFiles) + val driverEnv = preparedDriverSpec.driverContainer.getEnv.asScala + assert(driverEnv.size === 1) + assert(driverEnv.head.getName === ENV_MOUNTED_CLASSPATH) + val resolvedDriverClasspath = driverEnv.head.getValue.split(File.pathSeparator).toSet + val expectedResolvedDriverClasspath = Set( + s"$JARS_DOWNLOAD_PATH/jar1.jar", + s"$JARS_DOWNLOAD_PATH/jar2.jar", + "/var/apps/jars/jar3.jar") + assert(resolvedDriverClasspath === expectedResolvedDriverClasspath) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStepSuite.scala new file mode 100644 index 0000000000000..3d5664713a2b8 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStepSuite.scala @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +import java.io.File + +import com.google.common.base.Charsets +import com.google.common.io.{BaseEncoding, Files} +import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata, PodBuilder, Secret} +import org.scalatest.BeforeAndAfter +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.util.Utils + +private[spark] class DriverKubernetesCredentialsStepSuite + extends SparkFunSuite with BeforeAndAfter { + + private val KUBERNETES_RESOURCE_NAME_PREFIX = "spark" + private var credentialsTempDirectory: File = _ + private val BASE_DRIVER_SPEC = new KubernetesDriverSpec( + driverPod = new PodBuilder().build(), + driverContainer = new ContainerBuilder().build(), + driverSparkConf = new SparkConf(false), + otherKubernetesResources = Seq.empty[HasMetadata]) + + before { + credentialsTempDirectory = Utils.createTempDir() + } + + after { + credentialsTempDirectory.delete() + } + + test("Don't set any credentials") { + val kubernetesCredentialsStep = new DriverKubernetesCredentialsStep( + new SparkConf(false), KUBERNETES_RESOURCE_NAME_PREFIX) + val preparedDriverSpec = kubernetesCredentialsStep.configureDriver(BASE_DRIVER_SPEC) + assert(preparedDriverSpec.driverPod === BASE_DRIVER_SPEC.driverPod) + assert(preparedDriverSpec.driverContainer === BASE_DRIVER_SPEC.driverContainer) + assert(preparedDriverSpec.otherKubernetesResources.isEmpty) + assert(preparedDriverSpec.driverSparkConf.getAll.isEmpty) + } + + test("Only set credentials that are manually mounted.") { + val submissionSparkConf = new SparkConf(false) + .set( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX", + "/mnt/secrets/my-token.txt") + .set( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX", + "/mnt/secrets/my-key.pem") + .set( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX", + "/mnt/secrets/my-cert.pem") + .set( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX", + "/mnt/secrets/my-ca.pem") + + val kubernetesCredentialsStep = new DriverKubernetesCredentialsStep( + submissionSparkConf, KUBERNETES_RESOURCE_NAME_PREFIX) + val preparedDriverSpec = kubernetesCredentialsStep.configureDriver(BASE_DRIVER_SPEC) + assert(preparedDriverSpec.driverPod === BASE_DRIVER_SPEC.driverPod) + assert(preparedDriverSpec.driverContainer === BASE_DRIVER_SPEC.driverContainer) + assert(preparedDriverSpec.otherKubernetesResources.isEmpty) + assert(preparedDriverSpec.driverSparkConf.getAll.toMap === submissionSparkConf.getAll.toMap) + } + + test("Mount credentials from the submission client as a secret.") { + val caCertFile = writeCredentials("ca.pem", "ca-cert") + val clientKeyFile = writeCredentials("key.pem", "key") + val clientCertFile = writeCredentials("cert.pem", "cert") + val submissionSparkConf = new SparkConf(false) + .set( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$OAUTH_TOKEN_CONF_SUFFIX", + "token") + .set( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX", + clientKeyFile.getAbsolutePath) + .set( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX", + clientCertFile.getAbsolutePath) + .set( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX", + caCertFile.getAbsolutePath) + val kubernetesCredentialsStep = new DriverKubernetesCredentialsStep( + submissionSparkConf, KUBERNETES_RESOURCE_NAME_PREFIX) + val preparedDriverSpec = kubernetesCredentialsStep.configureDriver( + BASE_DRIVER_SPEC.copy(driverSparkConf = submissionSparkConf)) + val expectedSparkConf = Map( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$OAUTH_TOKEN_CONF_SUFFIX" -> "", + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX" -> + DRIVER_CREDENTIALS_OAUTH_TOKEN_PATH, + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX" -> + DRIVER_CREDENTIALS_CLIENT_KEY_PATH, + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX" -> + DRIVER_CREDENTIALS_CLIENT_CERT_PATH, + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX" -> + DRIVER_CREDENTIALS_CA_CERT_PATH, + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX" -> + clientKeyFile.getAbsolutePath, + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX" -> + clientCertFile.getAbsolutePath, + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX" -> + caCertFile.getAbsolutePath) + assert(preparedDriverSpec.driverSparkConf.getAll.toMap === expectedSparkConf) + assert(preparedDriverSpec.otherKubernetesResources.size === 1) + val credentialsSecret = preparedDriverSpec.otherKubernetesResources.head.asInstanceOf[Secret] + assert(credentialsSecret.getMetadata.getName === + s"$KUBERNETES_RESOURCE_NAME_PREFIX-kubernetes-credentials") + val decodedSecretData = credentialsSecret.getData.asScala.map { data => + (data._1, new String(BaseEncoding.base64().decode(data._2), Charsets.UTF_8)) + } + val expectedSecretData = Map( + DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME -> "ca-cert", + DRIVER_CREDENTIALS_OAUTH_TOKEN_SECRET_NAME -> "token", + DRIVER_CREDENTIALS_CLIENT_KEY_SECRET_NAME -> "key", + DRIVER_CREDENTIALS_CLIENT_CERT_SECRET_NAME -> "cert") + assert(decodedSecretData === expectedSecretData) + val driverPodVolumes = preparedDriverSpec.driverPod.getSpec.getVolumes.asScala + assert(driverPodVolumes.size === 1) + assert(driverPodVolumes.head.getName === DRIVER_CREDENTIALS_SECRET_VOLUME_NAME) + assert(driverPodVolumes.head.getSecret != null) + assert(driverPodVolumes.head.getSecret.getSecretName === credentialsSecret.getMetadata.getName) + val driverContainerVolumeMount = preparedDriverSpec.driverContainer.getVolumeMounts.asScala + assert(driverContainerVolumeMount.size === 1) + assert(driverContainerVolumeMount.head.getName === DRIVER_CREDENTIALS_SECRET_VOLUME_NAME) + assert(driverContainerVolumeMount.head.getMountPath === DRIVER_CREDENTIALS_SECRETS_BASE_DIR) + } + + private def writeCredentials(credentialsFileName: String, credentialsContents: String): File = { + val credentialsFile = new File(credentialsTempDirectory, credentialsFileName) + Files.write(credentialsContents, credentialsFile, Charsets.UTF_8) + credentialsFile + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/PythonStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/PythonStepSuite.scala new file mode 100644 index 0000000000000..ce0dcee6acc46 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/PythonStepSuite.scala @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +import io.fabric8.kubernetes.api.model._ +import org.scalatest.BeforeAndAfter +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkConf, SparkFunSuite} + +class PythonStepSuite extends SparkFunSuite with BeforeAndAfter { + private val FILE_DOWNLOAD_PATH = "/var/data/spark-files" + private val PYSPARK_FILES = Seq( + "hdfs://localhost:9000/app/files/file1.py", + "file:///app/files/file2.py", + "local:///app/files/file3.py", + "http://app/files/file4.py") + private val RESOLVED_PYSPARK_FILES = Seq( + FILE_DOWNLOAD_PATH + "/file1.py", + FILE_DOWNLOAD_PATH + "/file2.py", + "/app/files/file3.py", + FILE_DOWNLOAD_PATH + "/file4.py").mkString(",") + private val PYSPARK_PRIMARY_FILE = "file:///app/files/file5.py" + private val RESOLVED_PYSPARK_PRIMARY_FILE = FILE_DOWNLOAD_PATH + "/file5.py" + + test("testing PySpark with --py-files both local and remote files") { + val pyStep = new PythonStep( + PYSPARK_PRIMARY_FILE, + PYSPARK_FILES, + FILE_DOWNLOAD_PATH) + val returnedDriverContainer = pyStep.configureDriver( + KubernetesDriverSpec( + new Pod(), + new Container(), + Seq.empty[HasMetadata], + new SparkConf)) + assert(returnedDriverContainer.driverContainer.getEnv + .asScala.map(env => (env.getName, env.getValue)).toMap === + Map( + "PYSPARK_PRIMARY" -> RESOLVED_PYSPARK_PRIMARY_FILE, + "PYSPARK_FILES" -> RESOLVED_PYSPARK_FILES)) + } + + test("testing PySpark with empty --py-files ") { + val pyStep = new PythonStep( + PYSPARK_PRIMARY_FILE, + Seq.empty[String], + FILE_DOWNLOAD_PATH) + val returnedDriverContainer = pyStep.configureDriver( + KubernetesDriverSpec( + new Pod(), + new Container(), + Seq.empty[HasMetadata], + new SparkConf)) + assert(returnedDriverContainer.driverContainer.getEnv + .asScala.map(env => (env.getName, env.getValue)).toMap === + Map( + "PYSPARK_PRIMARY" -> RESOLVED_PYSPARK_PRIMARY_FILE, + "PYSPARK_FILES" -> "null")) + } + +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initContainerBootstrapStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initContainerBootstrapStepSuite.scala new file mode 100644 index 0000000000000..b11b487111496 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initContainerBootstrapStepSuite.scala @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +import java.io.StringReader +import java.util.Properties + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.google.common.collect.Maps +import io.fabric8.kubernetes.api.model.{ConfigMap, Container, ContainerBuilder, HasMetadata, PodBuilder, SecretBuilder} +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer.{InitContainerConfigurationStep, InitContainerSpec} +import org.apache.spark.util.Utils + +private[spark] class initContainerBootstrapStepSuite extends SparkFunSuite { + + private val OBJECT_MAPPER = new ObjectMapper().registerModule(DefaultScalaModule) + private val CONFIG_MAP_NAME = "spark-init-config-map" + private val CONFIG_MAP_KEY = "spark-init-config-map-key" + + test("The init container bootstrap step should use all of the init container steps") { + val baseDriverSpec = KubernetesDriverSpec( + driverPod = new PodBuilder().build(), + driverContainer = new ContainerBuilder().build(), + driverSparkConf = new SparkConf(false), + otherKubernetesResources = Seq.empty[HasMetadata]) + val initContainerSteps = Seq( + FirstTestInitContainerConfigurationStep$, + SecondTestInitContainerConfigurationStep$) + val bootstrapStep = new InitContainerBootstrapStep( + initContainerSteps, + CONFIG_MAP_NAME, + CONFIG_MAP_KEY) + val preparedDriverSpec = bootstrapStep.configureDriver(baseDriverSpec) + assert(preparedDriverSpec.driverPod.getMetadata.getLabels.asScala === + FirstTestInitContainerConfigurationStep$.additionalLabels) + val additionalDriverEnv = preparedDriverSpec.driverContainer.getEnv.asScala + assert(additionalDriverEnv.size === 1) + assert(additionalDriverEnv.head.getName === + FirstTestInitContainerConfigurationStep$.additionalMainContainerEnvKey) + assert(additionalDriverEnv.head.getValue === + FirstTestInitContainerConfigurationStep$.additionalMainContainerEnvValue) + val driverAnnotations = preparedDriverSpec.driverPod.getMetadata.getAnnotations.asScala + assert(driverAnnotations.size === 1) + val initContainers = OBJECT_MAPPER.readValue( + driverAnnotations(INIT_CONTAINER_ANNOTATION), classOf[Array[Container]]) + assert(initContainers.length === 1) + val initContainerEnv = initContainers.head.getEnv.asScala + assert(initContainerEnv.size === 1) + assert(initContainerEnv.head.getName === + SecondTestInitContainerConfigurationStep$.additionalInitContainerEnvKey) + assert(initContainerEnv.head.getValue === + SecondTestInitContainerConfigurationStep$.additionalInitContainerEnvValue) + val expectedSparkConf = Map( + EXECUTOR_INIT_CONTAINER_CONFIG_MAP.key -> CONFIG_MAP_NAME, + EXECUTOR_INIT_CONTAINER_CONFIG_MAP_KEY.key -> CONFIG_MAP_KEY, + SecondTestInitContainerConfigurationStep$.additionalDriverSparkConfKey -> + SecondTestInitContainerConfigurationStep$.additionalDriverSparkConfValue) + assert(preparedDriverSpec.driverSparkConf.getAll.toMap === expectedSparkConf) + assert(preparedDriverSpec.otherKubernetesResources.size === 2) + assert(preparedDriverSpec.otherKubernetesResources.contains( + FirstTestInitContainerConfigurationStep$.additionalKubernetesResource)) + assert(preparedDriverSpec.otherKubernetesResources.exists { + case configMap: ConfigMap => + val hasMatchingName = configMap.getMetadata.getName == CONFIG_MAP_NAME + val configMapData = configMap.getData.asScala + val hasCorrectNumberOfEntries = configMapData.size == 1 + val initContainerPropertiesRaw = configMapData(CONFIG_MAP_KEY) + val initContainerProperties = new Properties() + Utils.tryWithResource(new StringReader(initContainerPropertiesRaw)) { + initContainerProperties.load(_) + } + val initContainerPropertiesMap = Maps.fromProperties(initContainerProperties).asScala + val expectedInitContainerProperties = Map( + SecondTestInitContainerConfigurationStep$.additionalInitContainerPropertyKey -> + SecondTestInitContainerConfigurationStep$.additionalInitContainerPropertyValue) + val hasMatchingProperties = initContainerPropertiesMap == expectedInitContainerProperties + hasMatchingName && hasCorrectNumberOfEntries && hasMatchingProperties + case _ => false + }) + } +} + +private object FirstTestInitContainerConfigurationStep$ extends InitContainerConfigurationStep { + + val additionalLabels = Map("additionalLabelkey" -> "additionalLabelValue") + val additionalMainContainerEnvKey = "TEST_ENV_MAIN_KEY" + val additionalMainContainerEnvValue = "TEST_ENV_MAIN_VALUE" + val additionalKubernetesResource = new SecretBuilder() + .withNewMetadata() + .withName("test-secret") + .endMetadata() + .addToData("secret-key", "secret-value") + .build() + + override def configureInitContainer(initContainerSpec: InitContainerSpec): InitContainerSpec = { + val driverPod = new PodBuilder(initContainerSpec.podToInitialize) + .editOrNewMetadata() + .addToLabels(additionalLabels.asJava) + .endMetadata() + .build() + val mainContainer = new ContainerBuilder(initContainerSpec.driverContainer) + .addNewEnv() + .withName(additionalMainContainerEnvKey) + .withValue(additionalMainContainerEnvValue) + .endEnv() + .build() + initContainerSpec.copy( + podToInitialize = driverPod, + driverContainer = mainContainer, + initContainerDependentResources = initContainerSpec.initContainerDependentResources ++ + Seq(additionalKubernetesResource)) + } +} + +private object SecondTestInitContainerConfigurationStep$ extends InitContainerConfigurationStep { + val additionalInitContainerEnvKey = "TEST_ENV_INIT_KEY" + val additionalInitContainerEnvValue = "TEST_ENV_INIT_VALUE" + val additionalInitContainerPropertyKey = "spark.initcontainer.testkey" + val additionalInitContainerPropertyValue = "testvalue" + val additionalDriverSparkConfKey = "spark.driver.testkey" + val additionalDriverSparkConfValue = "spark.driver.testvalue" + + override def configureInitContainer(initContainerSpec: InitContainerSpec): InitContainerSpec = { + val initContainer = new ContainerBuilder(initContainerSpec.initContainer) + .addNewEnv() + .withName(additionalInitContainerEnvKey) + .withValue(additionalInitContainerEnvValue) + .endEnv() + .build() + val initContainerProperties = initContainerSpec.initContainerProperties ++ + Map(additionalInitContainerPropertyKey -> additionalInitContainerPropertyValue) + val driverSparkConf = initContainerSpec.additionalDriverSparkConf ++ + Map(additionalDriverSparkConfKey -> additionalDriverSparkConfValue) + initContainerSpec.copy( + initContainer = initContainer, + initContainerProperties = initContainerProperties, + additionalDriverSparkConf = driverSparkConf) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/BaseInitContainerConfigurationStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/BaseInitContainerConfigurationStepSuite.scala new file mode 100644 index 0000000000000..fe1af4bc5be2a --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/BaseInitContainerConfigurationStepSuite.scala @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer + +import io.fabric8.kubernetes.api.model._ +import org.mockito.{Mock, MockitoAnnotations} +import org.mockito.Matchers.any +import org.mockito.Mockito.when +import org.mockito.invocation.InvocationOnMock +import org.mockito.stubbing.Answer +import org.scalatest.BeforeAndAfter +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkFunSuite +import org.apache.spark.deploy.kubernetes.{PodWithDetachedInitContainer, SparkPodInitContainerBootstrap} +import org.apache.spark.deploy.kubernetes.config._ + +class BaseInitContainerConfigurationStepSuite extends SparkFunSuite with BeforeAndAfter{ + private val SPARK_JARS = Seq( + "hdfs://localhost:9000/app/jars/jar1.jar", "file:///app/jars/jar2.jar") + private val SPARK_FILES = Seq( + "hdfs://localhost:9000/app/files/file1.txt", "file:///app/files/file2.txt") + private val JARS_DOWNLOAD_PATH = "/var/data/jars" + private val FILES_DOWNLOAD_PATH = "/var/data/files" + private val CONFIG_MAP_NAME = "config-map" + private val CONFIG_MAP_KEY = "config-map-key" + private val POD_LABEL = Map("bootstrap" -> "true") + private val INIT_CONTAINER_NAME = "init-container" + private val DRIVER_CONTAINER_NAME = "driver-container" + + @Mock + private var podAndInitContainerBootstrap : SparkPodInitContainerBootstrap = _ + + before { + MockitoAnnotations.initMocks(this) + when(podAndInitContainerBootstrap.bootstrapInitContainerAndVolumes( + any[PodWithDetachedInitContainer])).thenAnswer(new Answer[PodWithDetachedInitContainer] { + override def answer(invocation: InvocationOnMock) : PodWithDetachedInitContainer = { + val pod = invocation.getArgumentAt(0, classOf[PodWithDetachedInitContainer]) + pod.copy( + pod = + new PodBuilder(pod.pod) + .withNewMetadata() + .addToLabels("bootstrap", "true") + .endMetadata() + .withNewSpec().endSpec() + .build(), + initContainer = + new ContainerBuilder() + .withName(INIT_CONTAINER_NAME).build(), + mainContainer = + new ContainerBuilder() + .withName(DRIVER_CONTAINER_NAME).build() + )}}) + } + + test("Test of additionalDriverSparkConf with mix of remote files and jars") { + val baseInitStep = new BaseInitContainerConfigurationStep( + SPARK_JARS, + SPARK_FILES, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH, + CONFIG_MAP_NAME, + CONFIG_MAP_KEY, + podAndInitContainerBootstrap) + val expectedDriverSparkConf = Map( + INIT_CONTAINER_JARS_DOWNLOAD_LOCATION.key -> JARS_DOWNLOAD_PATH, + INIT_CONTAINER_FILES_DOWNLOAD_LOCATION.key -> FILES_DOWNLOAD_PATH, + INIT_CONTAINER_REMOTE_JARS.key -> "hdfs://localhost:9000/app/jars/jar1.jar", + INIT_CONTAINER_REMOTE_FILES.key -> "hdfs://localhost:9000/app/files/file1.txt") + val initContainerSpec = InitContainerSpec( + Map.empty[String, String], + Map.empty[String, String], + new Container(), + new Container(), + new Pod, + Seq.empty[HasMetadata]) + val returnContainerSpec = baseInitStep.configureInitContainer(initContainerSpec) + assert(expectedDriverSparkConf === returnContainerSpec.initContainerProperties) + assert(returnContainerSpec.initContainer.getName == INIT_CONTAINER_NAME) + assert(returnContainerSpec.driverContainer.getName == DRIVER_CONTAINER_NAME) + assert(returnContainerSpec.podToInitialize.getMetadata.getLabels.asScala === POD_LABEL) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStepsOrchestratorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStepsOrchestratorSuite.scala new file mode 100644 index 0000000000000..1cc8007803457 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStepsOrchestratorSuite.scala @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ + +class InitContainerConfigurationStepsOrchestratorSuite extends SparkFunSuite { + private val NAMESPACE = "namespace" + private val APP_RESOURCE_PREFIX = "spark-prefix" + private val SPARK_JARS = Seq( + "hdfs://localhost:9000/app/jars/jar1.jar", "file:///app/jars/jar2.jar") + private val SPARK_FILES = Seq( + "hdfs://localhost:9000/app/files/file1.txt", "file:///app/files/file2.txt") + private val JARS_DOWNLOAD_PATH = "/var/data/jars" + private val FILES_DOWNLOAD_PATH = "/var/data/files" + private val DOCKER_IMAGE_PULL_POLICY: String = "IfNotPresent" + private val APP_ID = "spark-id" + private val CUSTOM_LABEL_KEY = "customLabel" + private val CUSTOM_LABEL_VALUE = "customLabelValue" + private val DEPRECATED_CUSTOM_LABEL_KEY = "deprecatedCustomLabel" + private val DEPRECATED_CUSTOM_LABEL_VALUE = "deprecatedCustomLabelValue" + private val DRIVER_LABELS = Map( + CUSTOM_LABEL_KEY -> CUSTOM_LABEL_VALUE, + DEPRECATED_CUSTOM_LABEL_KEY -> DEPRECATED_CUSTOM_LABEL_VALUE, + SPARK_APP_ID_LABEL -> APP_ID, + SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE) + private val INIT_CONTAINER_CONFIG_MAP_NAME = "spark-init-config-map" + private val INIT_CONTAINER_CONFIG_MAP_KEY = "spark-init-config-map-key" + private val STAGING_SERVER_URI = "http://localhost:8000" + + test ("including step to contact resource staging server") { + val sparkConf = new SparkConf(true) + .set(KUBERNETES_DRIVER_LABELS, s"$DEPRECATED_CUSTOM_LABEL_KEY=$DEPRECATED_CUSTOM_LABEL_VALUE") + .set(s"$KUBERNETES_DRIVER_LABEL_PREFIX$CUSTOM_LABEL_KEY", CUSTOM_LABEL_VALUE) + .set(RESOURCE_STAGING_SERVER_URI, STAGING_SERVER_URI) + + val orchestrator = new InitContainerConfigurationStepsOrchestrator( + NAMESPACE, + APP_RESOURCE_PREFIX, + SPARK_JARS, + SPARK_FILES, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH, + DOCKER_IMAGE_PULL_POLICY, + DRIVER_LABELS, + INIT_CONTAINER_CONFIG_MAP_NAME, + INIT_CONTAINER_CONFIG_MAP_KEY, + sparkConf) + val initSteps : Seq[InitContainerConfigurationStep] = + orchestrator.getAllConfigurationSteps() + assert(initSteps.length == 2) + assert(initSteps.head.isInstanceOf[BaseInitContainerConfigurationStep]) + assert(initSteps(1).isInstanceOf[SubmittedResourcesInitContainerConfigurationStep]) + } + + test ("not including steps because no contact to resource staging server") { + val sparkConf = new SparkConf(true) + .set(KUBERNETES_DRIVER_LABELS, s"$DEPRECATED_CUSTOM_LABEL_KEY=$DEPRECATED_CUSTOM_LABEL_VALUE") + .set(s"$KUBERNETES_DRIVER_LABEL_PREFIX$CUSTOM_LABEL_KEY", CUSTOM_LABEL_VALUE) + + val orchestrator = new InitContainerConfigurationStepsOrchestrator( + NAMESPACE, + APP_RESOURCE_PREFIX, + SPARK_JARS, + SPARK_FILES, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH, + DOCKER_IMAGE_PULL_POLICY, + DRIVER_LABELS, + INIT_CONTAINER_CONFIG_MAP_NAME, + INIT_CONTAINER_CONFIG_MAP_KEY, + sparkConf) + val initSteps : Seq[InitContainerConfigurationStep] = + orchestrator.getAllConfigurationSteps() + assert(initSteps.length === 1) + assert(initSteps.head.isInstanceOf[BaseInitContainerConfigurationStep]) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/SubmittedResourcesInitContainerStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/SubmittedResourcesInitContainerStepSuite.scala new file mode 100644 index 0000000000000..2edaba93fe07f --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/SubmittedResourcesInitContainerStepSuite.scala @@ -0,0 +1,252 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer + +import java.io.File +import java.util.UUID + +import com.google.common.base.Charsets +import com.google.common.io.{BaseEncoding, Files} +import io.fabric8.kubernetes.api.model._ +import org.mockito.{Mock, MockitoAnnotations} +import org.mockito.Matchers.any +import org.mockito.Mockito.when +import org.mockito.invocation.InvocationOnMock +import org.mockito.stubbing.Answer +import org.scalatest.BeforeAndAfter +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkFunSuite +import org.apache.spark.deploy.kubernetes.InitContainerResourceStagingServerSecretPlugin +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.{SubmittedDependencyUploader, SubmittedResourceIdAndSecret} +import org.apache.spark.util.Utils + +class SubmittedResourcesInitContainerStepSuite extends SparkFunSuite with BeforeAndAfter { + private val RESOURCE_SECRET_NAME = "secret" + private val JARS_RESOURCE_ID = "jarsID" + private val JARS_SECRET = "jarsSecret" + private val FILES_RESOURCE_ID = "filesID" + private val FILES_SECRET = "filesSecret" + private val STAGING_SERVER_URI = "http://localhost:8000" + private val SECRET_MOUNT_PATH = "/tmp" + private val RSS_SECRET = Map( + INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY -> + BaseEncoding.base64().encode(JARS_SECRET.getBytes(Charsets.UTF_8)), + INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY -> + BaseEncoding.base64().encode(FILES_SECRET.getBytes(Charsets.UTF_8)) + ).asJava + private var RSS_WITH_SSL_SECRET: java.util.Map[String, String] = _ + private var TRUSTSTORE_FILENAME: String = "" + private var TRUSTSTORE_FILE: File = _ + private var TRUSTSTORE_URI: Option[String] = None + private val TRUSTSTORE_PASS = "trustStorePassword" + private val TRUSTSTORE_TYPE = "jks" + private var CERT_FILENAME: String = "" + private var CERT_FILE: File = _ + private var CERT_URI: Option[String] = None + + @Mock + private var submittedDependencyUploader: SubmittedDependencyUploader = _ + @Mock + private var submittedResourcesSecretPlugin: InitContainerResourceStagingServerSecretPlugin = _ + + before { + MockitoAnnotations.initMocks(this) + TRUSTSTORE_FILENAME = createTempFile(".jks") + TRUSTSTORE_FILE = new File(TRUSTSTORE_FILENAME) + TRUSTSTORE_URI = Some(TRUSTSTORE_FILENAME) + CERT_FILENAME = createTempFile("pem") + CERT_FILE = new File(CERT_FILENAME) + CERT_URI = Some(CERT_FILENAME) + RSS_WITH_SSL_SECRET = + (RSS_SECRET.asScala ++ Map( + INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY -> + BaseEncoding.base64().encode(Files.toByteArray(TRUSTSTORE_FILE)), + INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY -> + BaseEncoding.base64().encode(Files.toByteArray(CERT_FILE)) + )).asJava + when(submittedDependencyUploader.uploadJars()).thenReturn( + SubmittedResourceIdAndSecret(JARS_RESOURCE_ID, JARS_SECRET) + ) + when(submittedDependencyUploader.uploadFiles()).thenReturn( + SubmittedResourceIdAndSecret(FILES_RESOURCE_ID, FILES_SECRET) + ) + when(submittedResourcesSecretPlugin.addResourceStagingServerSecretVolumeToPod( + any[Pod])).thenAnswer(new Answer[Pod] { + override def answer(invocation: InvocationOnMock) : Pod = { + val pod = invocation.getArgumentAt(0, classOf[Pod]) + new PodBuilder(pod) + .withNewMetadata() + .addToLabels("mountedSecret", "true") + .endMetadata() + .withNewSpec().endSpec() + .build() + }}) + when(submittedResourcesSecretPlugin.mountResourceStagingServerSecretIntoInitContainer( + any[Container])).thenAnswer(new Answer[Container] { + override def answer(invocation: InvocationOnMock) : Container = { + val con = invocation.getArgumentAt(0, classOf[Container]) + new ContainerBuilder(con).withName("mountedSecret").build() + }}) + } + after { + TRUSTSTORE_FILE.delete() + CERT_FILE.delete() + } + test ("testing vanilla prepareInitContainer on resources and properties") { + val submittedResourceStep = new SubmittedResourcesInitContainerConfigurationStep( + RESOURCE_SECRET_NAME, + STAGING_SERVER_URI, + SECRET_MOUNT_PATH, + false, + None, + None, + None, + None, + submittedDependencyUploader, + submittedResourcesSecretPlugin + ) + val returnedInitContainer = + submittedResourceStep.configureInitContainer(InitContainerSpec( + Map.empty[String, String], + Map.empty[String, String], + new Container(), + new Container(), + new Pod(), + Seq.empty[HasMetadata])) + assert(returnedInitContainer.initContainer.getName === "mountedSecret") + assert(returnedInitContainer.podToInitialize.getMetadata.getLabels.asScala + === Map("mountedSecret" -> "true")) + assert(returnedInitContainer.initContainerDependentResources.length == 1) + val secret = returnedInitContainer.initContainerDependentResources.head.asInstanceOf[Secret] + assert(secret.getData === RSS_SECRET) + assert(secret.getMetadata.getName == RESOURCE_SECRET_NAME) + val expectedinitContainerProperties = Map( + RESOURCE_STAGING_SERVER_URI.key -> STAGING_SERVER_URI, + INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> JARS_RESOURCE_ID, + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> + s"$SECRET_MOUNT_PATH/$INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY", + INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> FILES_RESOURCE_ID, + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> + s"$SECRET_MOUNT_PATH/$INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY", + RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> false.toString) + assert(returnedInitContainer.initContainerProperties === expectedinitContainerProperties) + assert(returnedInitContainer.additionalDriverSparkConf === + Map( + EXECUTOR_INIT_CONTAINER_SECRET.key -> RESOURCE_SECRET_NAME, + EXECUTOR_INIT_CONTAINER_SECRET_MOUNT_DIR.key -> SECRET_MOUNT_PATH)) + } + + test ("testing prepareInitContainer w/ CERT and TrustStore Files w/o SSL") { + val submittedResourceStep = new SubmittedResourcesInitContainerConfigurationStep( + RESOURCE_SECRET_NAME, + STAGING_SERVER_URI, + SECRET_MOUNT_PATH, + false, + TRUSTSTORE_URI, + CERT_URI, + Some(TRUSTSTORE_PASS), + Some(TRUSTSTORE_TYPE), + submittedDependencyUploader, + submittedResourcesSecretPlugin + ) + val returnedInitContainer = + submittedResourceStep.configureInitContainer(InitContainerSpec( + Map.empty[String, String], + Map.empty[String, String], + new Container(), + new Container(), + new Pod(), + Seq.empty[HasMetadata])) + val expectedinitContainerProperties = Map( + RESOURCE_STAGING_SERVER_URI.key -> STAGING_SERVER_URI, + INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> JARS_RESOURCE_ID, + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> + s"$SECRET_MOUNT_PATH/$INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY", + INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> FILES_RESOURCE_ID, + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> + s"$SECRET_MOUNT_PATH/$INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY", + RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> false.toString, + RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key -> TRUSTSTORE_PASS, + RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key -> TRUSTSTORE_TYPE, + RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key -> + s"$SECRET_MOUNT_PATH/$INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY", + RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM.key -> + s"$SECRET_MOUNT_PATH/$INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY" + ) + assert(returnedInitContainer.initContainerProperties === expectedinitContainerProperties) + assert(returnedInitContainer.initContainerDependentResources.length == 1) + val secret = returnedInitContainer.initContainerDependentResources.head.asInstanceOf[Secret] + assert(secret.getData === RSS_WITH_SSL_SECRET) + assert(secret.getMetadata.getName == RESOURCE_SECRET_NAME) + + } + + test ("testing prepareInitContainer w/ local CERT and TrustStore Files w/o SSL") { + val LOCAL_TRUST_FILE = "local:///tmp/trust.jsk" + val LOCAL_CERT_FILE = "local:///tmp/cert.pem" + val submittedResourceStep = new SubmittedResourcesInitContainerConfigurationStep( + RESOURCE_SECRET_NAME, + STAGING_SERVER_URI, + SECRET_MOUNT_PATH, + false, + Some(LOCAL_TRUST_FILE), + Some(LOCAL_CERT_FILE), + Some(TRUSTSTORE_PASS), + Some(TRUSTSTORE_TYPE), + submittedDependencyUploader, + submittedResourcesSecretPlugin + ) + val returnedInitContainer = + submittedResourceStep.configureInitContainer(InitContainerSpec( + Map.empty[String, String], + Map.empty[String, String], + new Container(), + new Container(), + new Pod(), + Seq.empty[HasMetadata])) + val expectedinitContainerProperties = Map( + RESOURCE_STAGING_SERVER_URI.key -> STAGING_SERVER_URI, + INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> JARS_RESOURCE_ID, + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> + s"$SECRET_MOUNT_PATH/$INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY", + INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> FILES_RESOURCE_ID, + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> + s"$SECRET_MOUNT_PATH/$INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY", + RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> false.toString, + RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key -> TRUSTSTORE_PASS, + RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key -> TRUSTSTORE_TYPE, + RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key -> + "/tmp/trust.jsk", + RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM.key -> + "/tmp/cert.pem" + ) + assert(returnedInitContainer.initContainerProperties === expectedinitContainerProperties) + assert(returnedInitContainer.initContainerDependentResources.length == 1) + val secret = returnedInitContainer.initContainerDependentResources.head.asInstanceOf[Secret] + assert(secret.getData === RSS_SECRET) + assert(secret.getMetadata.getName == RESOURCE_SECRET_NAME) + } + private def createTempFile(extension: String): String = { + val dir = Utils.createTempDir() + val file = new File(dir, s"${UUID.randomUUID().toString}.$extension") + Files.write(UUID.randomUUID().toString, file, Charsets.UTF_8) + file.getAbsolutePath + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index d2082291eba22..c6cd6a74c88d1 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -34,7 +34,7 @@ import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackendFactory import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND -import org.apache.spark.deploy.kubernetes.submit.{Client, KeyAndCertPem} +import org.apache.spark.deploy.kubernetes.submit.{Client, ClientArguments, JavaMainAppResource, KeyAndCertPem, MainAppResource, PythonMainAppResource} import org.apache.spark.launcher.SparkLauncher import org.apache.spark.util.Utils @@ -72,7 +72,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { kubernetesTestComponents.deleteNamespace() } - test("Run PySpark Job on file from SUBMITTER") { + test("Run PySpark Job on file from SUBMITTER with --py-files") { assume(testBackend.name == MINIKUBE_TEST_BACKEND) launchStagingServer(SSLOptions(), None) @@ -83,7 +83,9 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { System.getProperty("spark.docker.test.executorImage", "spark-executor-py:latest")) runPySparkPiAndVerifyCompletion( - PYSPARK_PI_SUBMITTER_LOCAL_FILE_LOCATION) + PYSPARK_PI_SUBMITTER_LOCAL_FILE_LOCATION, + Seq(PYSPARK_SORT_CONTAINER_LOCAL_FILE_LOCATION) + ) } test("Run PySpark Job on file from CONTAINER with spark.jar defined") { @@ -96,8 +98,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .set(EXECUTOR_DOCKER_IMAGE, System.getProperty("spark.docker.test.executorImage", "spark-executor-py:latest")) - runPySparkPiAndVerifyCompletion( - PYSPARK_PI_CONTAINER_LOCAL_FILE_LOCATION) + runPySparkPiAndVerifyCompletion(PYSPARK_PI_CONTAINER_LOCAL_FILE_LOCATION, Seq.empty[String]) } test("Simple submission test with the resource staging server.") { @@ -154,10 +155,11 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { sparkConf.set("spark.kubernetes.shuffle.namespace", kubernetesTestComponents.namespace) sparkConf.set("spark.app.name", "group-by-test") runSparkApplicationAndVerifyCompletion( - SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + JavaMainAppResource(SUBMITTER_LOCAL_MAIN_APP_RESOURCE), GROUP_BY_MAIN_CLASS, - "The Result is", - Array.empty[String]) + Seq("The Result is"), + Array.empty[String], + Seq.empty[String]) } test("Use remote resources without the resource staging server.") { @@ -217,10 +219,11 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { launchStagingServer(SSLOptions(), None) sparkConf.set("spark.files", testExistenceFile.getAbsolutePath) runSparkApplicationAndVerifyCompletion( - SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + JavaMainAppResource(SUBMITTER_LOCAL_MAIN_APP_RESOURCE), FILE_EXISTENCE_MAIN_CLASS, - s"File found at /opt/spark/${testExistenceFile.getName} with correct contents.", - Array(testExistenceFile.getName, TEST_EXISTENCE_FILE_CONTENTS)) + Seq(s"File found at /opt/spark/${testExistenceFile.getName} with correct contents."), + Array(testExistenceFile.getName, TEST_EXISTENCE_FILE_CONTENTS), + Seq.empty[String]) } test("Use a very long application name.") { @@ -248,26 +251,35 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { private def runSparkPiAndVerifyCompletion(appResource: String): Unit = { runSparkApplicationAndVerifyCompletion( - appResource, SPARK_PI_MAIN_CLASS, "Pi is roughly 3", Array.empty[String]) + JavaMainAppResource(appResource), + SPARK_PI_MAIN_CLASS, + Seq("Pi is roughly 3"), + Array.empty[String], + Seq.empty[String]) } private def runPySparkPiAndVerifyCompletion( - appResource: String): Unit = { + appResource: String, otherPyFiles: Seq[String]): Unit = { runSparkApplicationAndVerifyCompletion( - appResource, PYSPARK_PI_MAIN_CLASS, "Pi is roughly 3", - Array(null, "5")) + PythonMainAppResource(appResource), + PYSPARK_PI_MAIN_CLASS, + Seq("Submitting 5 missing tasks from ResultStage", "Pi is roughly 3"), + Array("5"), + otherPyFiles) } private def runSparkApplicationAndVerifyCompletion( - appResource: String, + appResource: MainAppResource, mainClass: String, - expectedLogOnCompletion: String, - appArgs: Array[String]): Unit = { - Client.run( - sparkConf = sparkConf, - appArgs = appArgs, + expectedLogOnCompletion: Seq[String], + appArgs: Array[String], + otherPyFiles: Seq[String]): Unit = { + val clientArguments = ClientArguments( + mainAppResource = appResource, mainClass = mainClass, - mainAppResource = appResource) + driverArgs = appArgs, + otherPyFiles = otherPyFiles) + Client.run(sparkConf, clientArguments) val driverPod = kubernetesTestComponents.kubernetesClient .pods() .withLabel("spark-app-locator", APP_LOCATOR_LABEL) @@ -275,11 +287,13 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .getItems .get(0) Eventually.eventually(TIMEOUT, INTERVAL) { - assert(kubernetesTestComponents.kubernetesClient - .pods() - .withName(driverPod.getMetadata.getName) - .getLog - .contains(expectedLogOnCompletion), "The application did not complete.") + expectedLogOnCompletion.foreach { e => + assert(kubernetesTestComponents.kubernetesClient + .pods() + .withName(driverPod.getMetadata.getName) + .getLog + .contains(e), "The application did not complete.") + } } } @@ -347,6 +361,8 @@ private[spark] object KubernetesSuite { val PYSPARK_PI_MAIN_CLASS = "org.apache.spark.deploy.PythonRunner" val PYSPARK_PI_CONTAINER_LOCAL_FILE_LOCATION = "local:///opt/spark/examples/src/main/python/pi.py" + val PYSPARK_SORT_CONTAINER_LOCAL_FILE_LOCATION = + "local:///opt/spark/examples/src/main/python/sort.py" val PYSPARK_PI_SUBMITTER_LOCAL_FILE_LOCATION = "src/test/python/pi.py" val FILE_EXISTENCE_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + ".integrationtest.jobs.FileExistenceTest" From 8c35d81824336ca7b35afbb0387d1be88ccd9293 Mon Sep 17 00:00:00 2001 From: mccheah Date: Mon, 17 Jul 2017 13:09:41 -0700 Subject: [PATCH 145/156] Add implicit conversions to imports. (#374) Otherwise we can get a Scalastyle error when building from SBT. --- .../submit/submitsteps/DriverKubernetesCredentialsStep.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStep.scala index 0c58006130659..70a108edc8678 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStep.scala @@ -22,6 +22,7 @@ import java.nio.charset.StandardCharsets import com.google.common.io.{BaseEncoding, Files} import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder, Secret, SecretBuilder} import scala.collection.JavaConverters._ +import scala.language.implicitConversions import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.config._ From db5f5be54e250c8863bf00e429e71d5acb9b496d Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Mon, 17 Jul 2017 16:56:13 -0700 Subject: [PATCH 146/156] Fix import order and scalastyle (#375) Test with ./dev/scalastyle --- ...nitContainerResourceStagingServerSecretPluginSuite.scala | 6 +++--- .../kubernetes/SparkPodInitContainerBootstrapSuite.scala | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPluginSuite.scala index f5b2db36aff8f..597bcdb416fc0 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPluginSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPluginSuite.scala @@ -16,11 +16,11 @@ */ package org.apache.spark.deploy.kubernetes -import org.scalatest.BeforeAndAfter import io.fabric8.kubernetes.api.model._ -import org.apache.spark.deploy.kubernetes.constants._ - +import org.scalatest.BeforeAndAfter import scala.collection.JavaConverters._ + +import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.SparkFunSuite class InitContainerResourceStagingServerSecretPluginSuite extends SparkFunSuite with BeforeAndAfter{ diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala index 0557b5677b919..d5f25983f5080 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala @@ -16,11 +16,11 @@ */ package org.apache.spark.deploy.kubernetes -import org.scalatest.BeforeAndAfter import io.fabric8.kubernetes.api.model._ -import org.apache.spark.deploy.kubernetes.constants._ - +import org.scalatest.BeforeAndAfter import scala.collection.JavaConverters._ + +import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.SparkFunSuite class SparkPodInitContainerBootstrapSuite extends SparkFunSuite with BeforeAndAfter { From 8751a9aaf1255d71397d49c15ee8aa1d8e854925 Mon Sep 17 00:00:00 2001 From: sandflee Date: Wed, 19 Jul 2017 03:00:36 +0800 Subject: [PATCH 147/156] fix submit job errors (#376) --- .../org/apache/spark/deploy/kubernetes/submit/Client.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index 98cd7afcd204d..2fa9b416330e5 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -42,7 +42,7 @@ private[spark] object ClientArguments { var otherPyFiles = Seq.empty[String] var mainClass: Option[String] = None val driverArgs = mutable.Buffer.empty[String] - args.sliding(2).toList.collect { + args.sliding(2, 2).toList.collect { case Array("--primary-py-file", mainPyFile: String) => mainAppResource = Some(PythonMainAppResource(mainPyFile)) case Array("--primary-java-resource", primaryJavaResource: String) => @@ -54,7 +54,8 @@ private[spark] object ClientArguments { case Array("--arg", arg: String) => driverArgs += arg case other => - throw new RuntimeException(s"Unknown arguments: $other") + val invalid = other.mkString(" ") + throw new RuntimeException(s"Unknown arguments: $invalid") } require(mainAppResource.isDefined, "Main app resource must be defined by either --primary-py-file or --primary-java-resource.") From 6dbd32e0d68d9ee68bf4fbe806300bf3cdfd6849 Mon Sep 17 00:00:00 2001 From: sandflee Date: Wed, 19 Jul 2017 06:43:10 +0800 Subject: [PATCH 148/156] Add node selectors for driver and executor pods (#355) --- docs/running-on-kubernetes.md | 10 ++++++++++ .../deploy/kubernetes/ConfigurationUtils.scala | 14 ++++++++++++++ .../apache/spark/deploy/kubernetes/config.scala | 2 ++ .../submitsteps/BaseDriverConfigurationStep.scala | 3 +++ .../KubernetesClusterSchedulerBackend.scala | 6 ++++++ 5 files changed, 35 insertions(+) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 2b4e9a6f96af1..5e23801e15b10 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -758,6 +758,16 @@ from the other deployment modes. See the [configuration page](configuration.html Specify the hard cpu limit for a single executor pod + + spark.kubernetes.node.selector.[labelKey] + (none) + + Adds to the node selector of the driver pod and executor pods, with key labelKey and the value as the + configuration's value. For example, setting spark.kubernetes.node.selector.identifier to myIdentifier + will result in the driver pod and executors having a node selector with key identifier and value + myIdentifier. Multiple node selector keys can be added by setting multiple configurations with this prefix. + + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala index f461da4809b4d..1a008c236d00f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala @@ -65,4 +65,18 @@ object ConfigurationUtils extends Logging { } combined.toMap } + + def parsePrefixedKeyValuePairs( + sparkConf: SparkConf, + prefix: String, + configType: String): Map[String, String] = { + val fromPrefix = sparkConf.getAllWithPrefix(prefix) + fromPrefix.groupBy(_._1).foreach { + case (key, values) => + require(values.size == 1, + s"Cannot have multiple values for a given $configType key, got key $key with" + + s" values $values") + } + fromPrefix.toMap + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index e1c1ab9d459fc..c6772c1cb5ae4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -497,6 +497,8 @@ package object config extends Logging { .stringConf .createOptional + private[spark] val KUBERNETES_NODE_SELECTOR_PREFIX = "spark.kubernetes.node.selector." + private[spark] def resolveK8sMaster(rawMasterString: String): String = { if (!rawMasterString.startsWith("k8s://")) { throw new IllegalArgumentException("Master URL should start with k8s:// in Kubernetes mode.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStep.scala index 022b5fccdc5e1..b3f509b44054e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStep.scala @@ -73,6 +73,8 @@ private[spark] class BaseDriverConfigurationStep( s"Annotation with key $SPARK_APP_NAME_ANNOTATION is not allowed as it is reserved for" + s" Spark bookkeeping operations.") val allDriverAnnotations = driverCustomAnnotations ++ Map(SPARK_APP_NAME_ANNOTATION -> appName) + val nodeSelector = ConfigurationUtils.parsePrefixedKeyValuePairs( + submissionSparkConf, KUBERNETES_NODE_SELECTOR_PREFIX, "node selector") val driverCpuQuantity = new QuantityBuilder(false) .withAmount(driverCpuCores) .build() @@ -117,6 +119,7 @@ private[spark] class BaseDriverConfigurationStep( .endMetadata() .withNewSpec() .withRestartPolicy("Never") + .withNodeSelector(nodeSelector.asJava) .endSpec() .build() val resolvedSparkConf = driverSpec.driverSparkConf.clone() diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index e5f980ad1f366..6dbe918f966e4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -84,6 +84,11 @@ private[spark] class KubernetesClusterSchedulerBackend( KUBERNETES_EXECUTOR_ANNOTATION_PREFIX, KUBERNETES_EXECUTOR_ANNOTATIONS, "executor annotation") + private val nodeSelector = + ConfigurationUtils.parsePrefixedKeyValuePairs( + conf, + KUBERNETES_NODE_SELECTOR_PREFIX, + "node-selector") private var shufflePodCache: Option[ShufflePodCache] = None private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) private val dockerImagePullPolicy = conf.get(DOCKER_IMAGE_PULL_POLICY) @@ -449,6 +454,7 @@ private[spark] class KubernetesClusterSchedulerBackend( .endMetadata() .withNewSpec() .withHostname(hostname) + .withNodeSelector(nodeSelector.asJava) .endSpec() .build() From 3ec941020d980e2577c48f75a50512b5e8824830 Mon Sep 17 00:00:00 2001 From: mccheah Date: Tue, 18 Jul 2017 23:16:36 -0700 Subject: [PATCH 149/156] Retry binding server to random port in the resource staging server test. (#378) * Retry binding server to random port in the resource staging server test. * Break if successful start * Start server in try block. * FIx scalastyle * More rigorous cleanup logic. Increment port numbers. * Move around more exception logic. * More exception refactoring. * Remove whitespace * Fix test * Rename variable --- .../ResourceStagingServerSuite.scala | 69 ++++++++++++++++--- 1 file changed, 58 insertions(+), 11 deletions(-) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala index 0c0908da20d89..1bcd85a611e00 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala @@ -23,12 +23,14 @@ import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.google.common.io.ByteStreams import okhttp3.{RequestBody, ResponseBody} +import org.eclipse.jetty.server.Server import org.scalatest.BeforeAndAfter import org.scalatest.mock.MockitoSugar.mock import retrofit2.Call import org.apache.spark.{SparkFunSuite, SSLOptions} import org.apache.spark.deploy.kubernetes.SSLUtils +import org.apache.spark.internal.Logging import org.apache.spark.util.Utils /** @@ -40,30 +42,37 @@ import org.apache.spark.util.Utils * we've configured the Jetty server correctly and that the endpoints reached over HTTP can * receive streamed uploads and can stream downloads. */ -class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfter { +class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfter with Logging { + + private val MAX_SERVER_START_ATTEMPTS = 5 private var serviceImpl: ResourceStagingService = _ private var stagedResourcesCleaner: StagedResourcesCleaner = _ - private var server: ResourceStagingServer = _ + private var server: Option[ResourceStagingServer] = None private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) - private val serverPort = new ServerSocket(0).getLocalPort - private val sslOptionsProvider = new SettableReferenceSslOptionsProvider() before { stagedResourcesCleaner = mock[StagedResourcesCleaner] serviceImpl = new ResourceStagingServiceImpl( new StagedResourcesStoreImpl(Utils.createTempDir()), stagedResourcesCleaner) - server = new ResourceStagingServer(serverPort, serviceImpl, sslOptionsProvider) } after { - server.stop() + server.foreach { s => + try { + s.stop() + } catch { + case e: Throwable => + log.warn("Failed to stop the resource staging server.", e) + } + } + server = None } test("Accept file and jar uploads and downloads") { - server.start() - runUploadAndDownload(SSLOptions()) + val serverPort = startServer() + runUploadAndDownload(SSLOptions(), serverPort) } test("Enable SSL on the server") { @@ -80,11 +89,11 @@ class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfter { trustStore = Some(keyStoreAndTrustStore.trustStore), trustStorePassword = Some("trustStore")) sslOptionsProvider.setOptions(sslOptions) - server.start() - runUploadAndDownload(sslOptions) + val serverPort = startServer() + runUploadAndDownload(sslOptions, serverPort) } - private def runUploadAndDownload(sslOptions: SSLOptions): Unit = { + private def runUploadAndDownload(sslOptions: SSLOptions, serverPort: Int): Unit = { val scheme = if (sslOptions.enabled) "https" else "http" val retrofitService = RetrofitClientFactoryImpl.createRetrofitClient( s"$scheme://127.0.0.1:$serverPort/", @@ -125,6 +134,44 @@ class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfter { val downloadedBytes = ByteStreams.toByteArray(responseBody.byteStream()) assert(downloadedBytes.toSeq === bytes) } + + private def startServer(): Int = { + var currentAttempt = 0 + var successfulStart = false + var latestServerPort = new ServerSocket(0).getLocalPort + while (currentAttempt < MAX_SERVER_START_ATTEMPTS && !successfulStart) { + val newServer = new ResourceStagingServer(latestServerPort, serviceImpl, sslOptionsProvider) + try { + newServer.start() + successfulStart = true + server = Some(newServer) + } catch { + case e: Throwable => + try { + newServer.stop() + } catch { + case e1: Throwable => + log.warn("Failed to stop a resource staging server that failed to start.", e1) + } + + if (Utils.isBindCollision(e)) { + currentAttempt += 1 + latestServerPort = latestServerPort + 1 + if (currentAttempt == MAX_SERVER_START_ATTEMPTS) { + throw new RuntimeException(s"Failed to bind to a random port" + + s" $MAX_SERVER_START_ATTEMPTS times. Last attempted port: $latestServerPort", e) + } else { + logWarning(s"Attempt $currentAttempt/$MAX_SERVER_START_ATTEMPTS failed to start" + + s" server on port $latestServerPort.", e) + } + } else { + throw e + } + } + } + logInfo(s"Started resource staging server on port $latestServerPort.") + latestServerPort + } } private class SettableReferenceSslOptionsProvider extends ResourceStagingServerSslOptionsProvider { From e1ff2f06ee537431440a020d613a9be278f57287 Mon Sep 17 00:00:00 2001 From: Hong Zhiguo Date: Thu, 20 Jul 2017 04:44:47 +0800 Subject: [PATCH 150/156] set RestartPolicy=Never for executor (#367) * set RestartPolicy=Never for executor As for current implementation the RestartPolicy of executor pod is not set, so the default value "OnFailure" is in effect. But this causes problem. If an executor is terminated unexpectedly, for example, exit by java.lang.OutOfMemoryError, it'll be restarted by k8s with the same executor ID. When the new executor tries to fetch a block hold by the last executor, ShuffleBlockFetcherIterator.splitLocalRemoteBlocks() think it's a **local** block and tries to read it from it's local dir. But the executor's local dir is changed because random generated ID is part of local dir. FetchFailedException will raise and the stage will fail. The rolling Error message: 17/06/29 01:54:56 WARN KubernetesTaskSetManager: Lost task 0.1 in stage 2.0 (TID 7, 172.16.75.92, executor 1): FetchFailed(BlockManagerId(1, 172.16.75.92, 40539, None), shuffleId=2, mapId=0, reduceId=0, message= org.apache.spark.shuffle.FetchFailedException: /data2/spark/blockmgr-0e228d3c-8727-422e-aa97-2841a877c42a/32/shuffle_2_0_0.index (No such file or directory) at org.apache.spark.storage.ShuffleBlockFetcherIterator.throwFetchFailedException(ShuffleBlockFetcherIterator.scala:357) at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:332) at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:54) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) * Update KubernetesClusterSchedulerBackend.scala --- .../cluster/kubernetes/KubernetesClusterSchedulerBackend.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 6dbe918f966e4..a0753728f8cfd 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -454,6 +454,7 @@ private[spark] class KubernetesClusterSchedulerBackend( .endMetadata() .withNewSpec() .withHostname(hostname) + .withRestartPolicy("Never") .withNodeSelector(nodeSelector.asJava) .endSpec() .build() From b1c48f98bba5d2522fb4f348361b256626557aa1 Mon Sep 17 00:00:00 2001 From: mccheah Date: Wed, 19 Jul 2017 22:27:16 -0700 Subject: [PATCH 151/156] Read classpath entries from SPARK_EXTRA_CLASSPATH on executors. (#383) This makes executors consistent with the driver. Note that SPARK_EXTRA_CLASSPATH isn't set anywhere by Spark itself, but it's primarily meant to be set by images that inherit from the base driver/executor images. --- .../docker-minimal-bundle/src/main/docker/executor/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile index 9c9efb23d7e95..b3b0acc3b64b8 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -27,5 +27,6 @@ COPY examples /opt/spark/examples CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ if ! [ -z ${SPARK_MOUNTED_CLASSPATH}+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_EXECUTOR_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXECUTOR_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_MOUNTED_FILES_DIR} ]; then cp -R "$SPARK_MOUNTED_FILES_DIR/." .; fi && \ exec /sbin/tini -- ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp $SPARK_CLASSPATH org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP From 4dfb18485fbb423611b5a68e0c33192635375aaa Mon Sep 17 00:00:00 2001 From: Varun Date: Fri, 21 Jul 2017 16:36:08 -0700 Subject: [PATCH 152/156] Changes to support executor recovery behavior during static allocation. (#244) * Changes to support executor recovery behavior during static allocation. * addressed review comments * Style changes and removed inocrrectly merged code * addressed latest review comments * changed import order * Minor changes to avoid exceptions when exit code is missing * fixed style check * Addressed review comments from Yinan LiAddressed review comments from Yinan Li.. * Addressed comments and got rid of an explicit lock object. * Fixed imports order. * Addressed review comments from Matt * Couple of style fixes --- .../KubernetesClusterSchedulerBackend.scala | 206 +++++++++++++++--- 1 file changed, 173 insertions(+), 33 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index a0753728f8cfd..c993bff8df962 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -18,18 +18,20 @@ package org.apache.spark.scheduler.cluster.kubernetes import java.io.Closeable import java.net.InetAddress -import java.util.concurrent.TimeUnit +import java.util.Collections +import java.util.concurrent.{ConcurrentHashMap, TimeUnit} import java.util.concurrent.atomic.{AtomicInteger, AtomicLong, AtomicReference} +import scala.collection.{concurrent, mutable} +import scala.collection.JavaConverters._ +import scala.concurrent.{ExecutionContext, Future} + import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule -import io.fabric8.kubernetes.api.model.{ContainerBuilder, ContainerPortBuilder, EnvVarBuilder, EnvVarSourceBuilder, Pod, PodBuilder, QuantityBuilder} +import io.fabric8.kubernetes.api.model._ import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action import org.apache.commons.io.FilenameUtils -import scala.collection.JavaConverters._ -import scala.collection.mutable -import scala.concurrent.{ExecutionContext, Future} import org.apache.spark.{SparkContext, SparkEnv, SparkException} import org.apache.spark.deploy.kubernetes.{ConfigurationUtils, InitContainerResourceStagingServerSecretPlugin, PodWithDetachedInitContainer, SparkPodInitContainerBootstrap} @@ -38,8 +40,8 @@ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.kubernetes.submit.InitContainerUtil import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.network.shuffle.kubernetes.KubernetesExternalShuffleClient -import org.apache.spark.rpc.{RpcCallContext, RpcEndpointAddress, RpcEnv} -import org.apache.spark.scheduler.TaskSchedulerImpl +import org.apache.spark.rpc.{RpcAddress, RpcCallContext, RpcEndpointAddress, RpcEnv} +import org.apache.spark.scheduler.{ExecutorExited, SlaveLost, TaskSchedulerImpl} import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.{RetrieveSparkAppConfig, SparkAppConfig} import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend import org.apache.spark.util.{ThreadUtils, Utils} @@ -55,10 +57,18 @@ private[spark] class KubernetesClusterSchedulerBackend( import KubernetesClusterSchedulerBackend._ private val RUNNING_EXECUTOR_PODS_LOCK = new Object - private val runningExecutorPods = new mutable.HashMap[String, Pod] // Indexed by executor IDs. - + // Indexed by executor IDs and guarded by RUNNING_EXECUTOR_PODS_LOCK. + private val runningExecutorsToPods = new mutable.HashMap[String, Pod] + // Indexed by executor pod names and guarded by RUNNING_EXECUTOR_PODS_LOCK. + private val runningPodsToExecutors = new mutable.HashMap[String, String] + // TODO(varun): Get rid of this lock object by my making the underlying map a concurrent hash map. private val EXECUTOR_PODS_BY_IPS_LOCK = new Object - private val executorPodsByIPs = new mutable.HashMap[String, Pod] // Indexed by executor IP addrs. + // Indexed by executor IP addrs and guarded by EXECUTOR_PODS_BY_IPS_LOCK + private val executorPodsByIPs = new mutable.HashMap[String, Pod] + private val failedPods: concurrent.Map[String, ExecutorExited] = new + ConcurrentHashMap[String, ExecutorExited]().asScala + private val executorsToRemove = Collections.newSetFromMap[String]( + new ConcurrentHashMap[String, java.lang.Boolean]()).asScala private val executorExtraClasspath = conf.get( org.apache.spark.internal.config.EXECUTOR_CLASS_PATH) @@ -135,7 +145,7 @@ private[spark] class KubernetesClusterSchedulerBackend( val parsedShuffleLabels = ConfigurationUtils.parseKeyValuePairs( conf.get(KUBERNETES_SHUFFLE_LABELS), KUBERNETES_SHUFFLE_LABELS.key, "shuffle-labels") - if (parsedShuffleLabels.size == 0) { + if (parsedShuffleLabels.isEmpty) { throw new SparkException(s"Dynamic allocation enabled " + s"but no ${KUBERNETES_SHUFFLE_LABELS.key} specified") } @@ -170,12 +180,13 @@ private[spark] class KubernetesClusterSchedulerBackend( private val executorWatchResource = new AtomicReference[Closeable] protected var totalExpectedExecutors = new AtomicInteger(0) + private val driverUrl = RpcEndpointAddress( sc.getConf.get("spark.driver.host"), sc.getConf.getInt("spark.driver.port", DEFAULT_DRIVER_PORT), CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString - private val initialExecutors = getInitialTargetExecutorNumber(1) + private val initialExecutors = getInitialTargetExecutorNumber() private val podAllocationInterval = conf.get(KUBERNETES_ALLOCATION_BATCH_DELAY) require(podAllocationInterval > 0, s"Allocation batch delay " + @@ -192,23 +203,74 @@ private[spark] class KubernetesClusterSchedulerBackend( private val allocatorRunnable: Runnable = new Runnable { + // Number of times we are allowed check for the loss reason for an executor before we give up + // and assume the executor failed for good, and attribute it to a framework fault. + private val MAX_EXECUTOR_LOST_REASON_CHECKS = 10 + private val executorsToRecover = new mutable.HashSet[String] + // Maintains a map of executor id to count of checks performed to learn the loss reason + // for an executor. + private val executorReasonChecks = new mutable.HashMap[String, Int] + override def run(): Unit = { - if (totalRegisteredExecutors.get() < runningExecutorPods.size) { - logDebug("Waiting for pending executors before scaling") - } else if (totalExpectedExecutors.get() <= runningExecutorPods.size) { - logDebug("Maximum allowed executor limit reached. Not scaling up further.") - } else { - val nodeToLocalTaskCount = getNodesWithLocalTaskCounts - RUNNING_EXECUTOR_PODS_LOCK.synchronized { + removeFailedExecutors() + RUNNING_EXECUTOR_PODS_LOCK.synchronized { + if (totalRegisteredExecutors.get() < runningExecutorsToPods.size) { + logDebug("Waiting for pending executors before scaling") + } else if (totalExpectedExecutors.get() <= runningExecutorsToPods.size) { + logDebug("Maximum allowed executor limit reached. Not scaling up further.") + } else { + val nodeToLocalTaskCount = getNodesWithLocalTaskCounts for (i <- 0 until math.min( - totalExpectedExecutors.get - runningExecutorPods.size, podAllocationSize)) { - runningExecutorPods += allocateNewExecutorPod(nodeToLocalTaskCount) + totalExpectedExecutors.get - runningExecutorsToPods.size, podAllocationSize)) { + val (executorId, pod) = allocateNewExecutorPod(nodeToLocalTaskCount) + runningExecutorsToPods.put(executorId, pod) + runningPodsToExecutors.put(pod.getMetadata.getName, executorId) logInfo( - s"Requesting a new executor, total executors is now ${runningExecutorPods.size}") + s"Requesting a new executor, total executors is now ${runningExecutorsToPods.size}") } } } } + + def removeFailedExecutors(): Unit = { + val localRunningExecutorsToPods = RUNNING_EXECUTOR_PODS_LOCK.synchronized { + runningExecutorsToPods.toMap + } + executorsToRemove.foreach { case (executorId) => + localRunningExecutorsToPods.get(executorId).map { pod: Pod => + failedPods.get(pod.getMetadata.getName).map { executorExited: ExecutorExited => + logDebug(s"Removing executor $executorId with loss reason " + executorExited.message) + removeExecutor(executorId, executorExited) + if (!executorExited.exitCausedByApp) { + executorsToRecover.add(executorId) + } + }.getOrElse(removeExecutorOrIncrementLossReasonCheckCount(executorId)) + }.getOrElse(removeExecutorOrIncrementLossReasonCheckCount(executorId)) + + executorsToRecover.foreach(executorId => { + executorsToRemove -= executorId + executorReasonChecks -= executorId + RUNNING_EXECUTOR_PODS_LOCK.synchronized { + runningExecutorsToPods.remove(executorId).map { pod: Pod => + kubernetesClient.pods().delete(pod) + runningPodsToExecutors.remove(pod.getMetadata.getName) + }.getOrElse(logWarning(s"Unable to remove pod for unknown executor $executorId")) + } + }) + executorsToRecover.clear() + } + } + + def removeExecutorOrIncrementLossReasonCheckCount(executorId: String): Unit = { + val reasonCheckCount = executorReasonChecks.getOrElse(executorId, 0) + if (reasonCheckCount > MAX_EXECUTOR_LOST_REASON_CHECKS) { + removeExecutor(executorId, SlaveLost("Executor lost for unknown reasons")) + executorsToRecover.add(executorId) + executorReasonChecks -= executorId + } else { + executorReasonChecks.put(executorId, reasonCheckCount + 1) + } + } } private val objectMapper = new ObjectMapper().registerModule(DefaultScalaModule) @@ -280,8 +342,9 @@ private[spark] class KubernetesClusterSchedulerBackend( // indication as to why. try { RUNNING_EXECUTOR_PODS_LOCK.synchronized { - runningExecutorPods.values.foreach(kubernetesClient.pods().delete(_)) - runningExecutorPods.clear() + runningExecutorsToPods.values.foreach(kubernetesClient.pods().delete(_)) + runningExecutorsToPods.clear() + runningPodsToExecutors.clear() } EXECUTOR_PODS_BY_IPS_LOCK.synchronized { executorPodsByIPs.clear() @@ -534,11 +597,6 @@ private[spark] class KubernetesClusterSchedulerBackend( } } - override def createDriverEndpoint( - properties: Seq[(String, String)]): DriverEndpoint = { - new KubernetesDriverEndpoint(rpcEnv, properties) - } - override def doRequestTotalExecutors(requestedTotal: Int): Future[Boolean] = Future[Boolean] { totalExpectedExecutors.set(requestedTotal) true @@ -547,8 +605,10 @@ private[spark] class KubernetesClusterSchedulerBackend( override def doKillExecutors(executorIds: Seq[String]): Future[Boolean] = Future[Boolean] { RUNNING_EXECUTOR_PODS_LOCK.synchronized { for (executor <- executorIds) { - runningExecutorPods.remove(executor) match { - case Some(pod) => kubernetesClient.pods().delete(pod) + runningExecutorsToPods.remove(executor) match { + case Some(pod) => + kubernetesClient.pods().delete(pod) + runningPodsToExecutors.remove(pod.getMetadata.getName) case None => logWarning(s"Unable to remove pod for unknown executor $executor") } } @@ -564,6 +624,8 @@ private[spark] class KubernetesClusterSchedulerBackend( private class ExecutorPodsWatcher extends Watcher[Pod] { + private val DEFAULT_CONTAINER_FAILURE_EXIT_STATUS = -1 + override def eventReceived(action: Action, pod: Pod): Unit = { if (action == Action.MODIFIED && pod.getStatus.getPhase == "Running" && pod.getMetadata.getDeletionTimestamp == null) { @@ -583,12 +645,75 @@ private[spark] class KubernetesClusterSchedulerBackend( executorPodsByIPs -= podIP } } + if (action == Action.ERROR) { + logInfo(s"Received pod $podName exited event. Reason: " + pod.getStatus.getReason) + handleErroredPod(pod) + } else if (action == Action.DELETED) { + logInfo(s"Received delete pod $podName event. Reason: " + pod.getStatus.getReason) + handleDeletedPod(pod) + } } } override def onClose(cause: KubernetesClientException): Unit = { logDebug("Executor pod watch closed.", cause) } + + def getExecutorExitStatus(pod: Pod): Int = { + val containerStatuses = pod.getStatus.getContainerStatuses + if (!containerStatuses.isEmpty) { + // we assume the first container represents the pod status. This assumption may not hold + // true in the future. Revisit this if side-car containers start running inside executor + // pods. + getExecutorExitStatus(containerStatuses.get(0)) + } else DEFAULT_CONTAINER_FAILURE_EXIT_STATUS + } + + def getExecutorExitStatus(containerStatus: ContainerStatus): Int = { + Option(containerStatus.getState).map(containerState => + Option(containerState.getTerminated).map(containerStateTerminated => + containerStateTerminated.getExitCode.intValue()).getOrElse(UNKNOWN_EXIT_CODE) + ).getOrElse(UNKNOWN_EXIT_CODE) + } + + def isPodAlreadyReleased(pod: Pod): Boolean = { + RUNNING_EXECUTOR_PODS_LOCK.synchronized { + !runningPodsToExecutors.contains(pod.getMetadata.getName) + } + } + + def handleErroredPod(pod: Pod): Unit = { + val alreadyReleased = isPodAlreadyReleased(pod) + val containerExitStatus = getExecutorExitStatus(pod) + // container was probably actively killed by the driver. + val exitReason = if (alreadyReleased) { + ExecutorExited(containerExitStatus, exitCausedByApp = false, + s"Container in pod " + pod.getMetadata.getName + + " exited from explicit termination request.") + } else { + val containerExitReason = containerExitStatus match { + case VMEM_EXCEEDED_EXIT_CODE | PMEM_EXCEEDED_EXIT_CODE => + memLimitExceededLogMessage(pod.getStatus.getReason) + case _ => + // Here we can't be sure that that exit was caused by the application but this seems + // to be the right default since we know the pod was not explicitly deleted by + // the user. + "Pod exited with following container exit status code " + containerExitStatus + } + ExecutorExited(containerExitStatus, exitCausedByApp = true, containerExitReason) + } + failedPods.put(pod.getMetadata.getName, exitReason) + } + + def handleDeletedPod(pod: Pod): Unit = { + val exitReason = ExecutorExited(getExecutorExitStatus(pod), exitCausedByApp = false, + "Pod " + pod.getMetadata.getName + " deleted or lost.") + failedPods.put(pod.getMetadata.getName, exitReason) + } + } + + override def createDriverEndpoint(properties: Seq[(String, String)]): DriverEndpoint = { + new KubernetesDriverEndpoint(rpcEnv, properties) } private class KubernetesDriverEndpoint( @@ -597,6 +722,14 @@ private[spark] class KubernetesClusterSchedulerBackend( extends DriverEndpoint(rpcEnv, sparkProperties) { private val externalShufflePort = conf.getInt("spark.shuffle.service.port", 7337) + override def onDisconnected(rpcAddress: RpcAddress): Unit = { + addressToExecutorId.get(rpcAddress).foreach { executorId => + if (disableExecutor(executorId)) { + executorsToRemove.add(executorId) + } + } + } + override def receiveAndReply( context: RpcCallContext): PartialFunction[Any, Unit] = { new PartialFunction[Any, Unit]() { @@ -615,7 +748,7 @@ private[spark] class KubernetesClusterSchedulerBackend( var resolvedProperties = sparkProperties val runningExecutorPod = kubernetesClient .pods() - .withName(runningExecutorPods(executorId).getMetadata.getName) + .withName(runningExecutorsToPods(executorId).getMetadata.getName) .get() val nodeName = runningExecutorPod.getSpec.getNodeName val shufflePodIp = shufflePodCache.get.getShufflePodForExecutor(nodeName) @@ -637,7 +770,6 @@ private[spark] class KubernetesClusterSchedulerBackend( }.orElse(super.receiveAndReply(context)) } } - } case class ShuffleServiceConfig( shuffleNamespace: String, @@ -647,6 +779,14 @@ case class ShuffleServiceConfig( private object KubernetesClusterSchedulerBackend { private val DEFAULT_STATIC_PORT = 10000 private val EXECUTOR_ID_COUNTER = new AtomicLong(0L) + private val VMEM_EXCEEDED_EXIT_CODE = -103 + private val PMEM_EXCEEDED_EXIT_CODE = -104 + private val UNKNOWN_EXIT_CODE = -111 + + def memLimitExceededLogMessage(diagnostics: String): String = { + s"Pod/Container killed for exceeding memory limits. $diagnostics" + + " Consider boosting spark executor memory overhead." + } } /** From 37f99430caef57740ceedf4e9a47ce9f509e0b66 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Fri, 21 Jul 2017 20:07:12 -0600 Subject: [PATCH 153/156] Update pom to v0.3.0 of spark-kubernetes (#385) --- assembly/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/java8-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- launcher/pom.xml | 2 +- mesos/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/docker-minimal-bundle/pom.xml | 2 +- .../kubernetes/integration-tests-spark-jobs-helpers/pom.xml | 2 +- .../kubernetes/integration-tests-spark-jobs/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/pom.xml | 2 +- 41 files changed, 41 insertions(+), 41 deletions(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index a4f695e790ce3..8bbf8c4eb5058 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 58889a55cf651..02b44e15fc510 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 2daacc14d42b5..a994ebec4ef0c 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index e14b4748efca9..94dfe2842967f 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 24fd97315ef4e..2a0535c72e3a8 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index e07e51c34ec93..9ffe6aabf7bb1 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 0bf7005b32eeb..1384e0c4a3d17 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index 9cac063dc62e7..d539d5736ad1d 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/examples/pom.xml b/examples/pom.xml index 0e91ae2a14dab..b4f079e182217 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index e4da21cb9b4be..ca091c6a41f35 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index 41cf53acdb38c..926104458e098 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 75c6f0596eae6..b8be42be5af50 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/flume/pom.xml b/external/flume/pom.xml index ac410bd46c403..44f19296d1e4d 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml index 884660d7dffdf..1bbdd774fd43b 100644 --- a/external/java8-tests/pom.xml +++ b/external/java8-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 7d0bd87bdcb93..07dfe3727205c 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index c2dafb03bcb8f..503f20361fd7d 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index d147aef12b9cc..3c1e90b7b9e95 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml index e11c2d0937517..e92964c8d101f 100644 --- a/external/kafka-0-8-assembly/pom.xml +++ b/external/kafka-0-8-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml index 9955cd65f6475..9884c0e5ab7aa 100644 --- a/external/kafka-0-8/pom.xml +++ b/external/kafka-0-8/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 284425d4f43f2..aaa95e5d632bb 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index 6d7fa95aec967..cbc468e67c2c7 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index 379526b682c85..5ea9c5e9cff75 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index ec4cfbad99d01..5226c506a3038 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index 6a536039b3975..bf09f5dc0fa1a 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/mesos/pom.xml b/mesos/pom.xml index ab3744863a5c6..8fd8aa030d636 100644 --- a/mesos/pom.xml +++ b/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 073b6482ce930..aca6430887725 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 6d8fe24d4e185..02817657c20ed 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 7035938515a58..952f75b0776d3 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT pom Spark Project Parent POM http://spark.apache.org/ diff --git a/repl/pom.xml b/repl/pom.xml index 94ecb10dfa6e4..0f4d64342a9a9 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index c90a824b1b8b1..d6d6cb0699e34 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml index 51ca26c0134fa..4b15d0ed54b4a 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml index 206059bd8e5b1..7283b2bb373c4 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml index 555398aa3e6d9..0f838d991358f 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index cd3ccad0a2b22..4776d2e5d4f0c 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 2b5faf37ddd0b..9559c58ef9525 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index f98deb0893af7..6fe6ac25e2d38 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index ece565e607315..d2b407f7c4ae6 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 2d12eb50905b6..070dd02cc737b 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index f55bbc61df071..2f3a682f1b990 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index eafb3f283c619..10ca3a5d7d0d6 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/yarn/pom.xml b/yarn/pom.xml index 00812174cdf0c..169637d595757 100644 --- a/yarn/pom.xml +++ b/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../pom.xml From af446e6cd12a5b37404ede46d460e89d4c726e26 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Fri, 4 Aug 2017 15:16:06 -0600 Subject: [PATCH 154/156] Fix bug with null arguments (#415) --- core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index df50af13f71a3..7c93a958fb4f9 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -623,7 +623,9 @@ object SparkSubmit { if (args.isPython) { childArgs ++= Array("--primary-py-file", args.primaryResource) childArgs ++= Array("--main-class", "org.apache.spark.deploy.PythonRunner") - childArgs ++= Array("--other-py-files", args.pyFiles) + if (args.pyFiles != null) { + childArgs ++= Array("--other-py-files", args.pyFiles) + } } else { childArgs ++= Array("--primary-java-resource", args.primaryResource) childArgs ++= Array("--main-class", args.mainClass) From 96a1d8c87598a554b2d769f9843ae9306f2abd1c Mon Sep 17 00:00:00 2001 From: Yinan Li Date: Tue, 8 Aug 2017 08:09:36 -0700 Subject: [PATCH 155/156] Flag-guard expensive DNS lookup of cluster node full names, part of HDFS locality support (#412) (#421) * Flag-guard expensive DNS lookup of cluster node full names, part of HDFS locality support * Clean up a bit * Improve unit tests --- .../spark/deploy/kubernetes/config.scala | 13 +++++++ .../kubernetes/KubernetesTaskSetManager.scala | 21 ++++++++---- .../KubernetesTaskSetManagerSuite.scala | 34 ++++++++++++++++++- 3 files changed, 61 insertions(+), 7 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index c6772c1cb5ae4..f9c4c9c6a1e18 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -491,6 +491,19 @@ package object config extends Logging { .stringConf .createOptional + private[spark] val KUBERNETES_DRIVER_CLUSTER_NODENAME_DNS_LOOKUP_ENABLED = + ConfigBuilder("spark.kubernetes.driver.hdfslocality.clusterNodeNameDNSLookup.enabled") + .doc("Whether or not HDFS locality support code should look up DNS for full hostnames of" + + " cluster nodes. In some K8s clusters, notably GKE, cluster node names are short" + + " hostnames, and so comparing them against HDFS datanode hostnames always fail. To fix," + + " enable this flag. This is disabled by default because DNS lookup can be expensive." + + " The driver can slow down and fail to respond to executor heartbeats in time." + + " If enabling this flag, make sure your DNS server has enough capacity" + + " for the workload.") + .internal() + .booleanConf + .createWithDefault(false) + private[spark] val KUBERNETES_EXECUTOR_LIMIT_CORES = ConfigBuilder("spark.kubernetes.executor.limit.cores") .doc("Specify the hard cpu limit for a single executor pod") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala index 51566d03a7a6c..17710fada2876 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala @@ -20,6 +20,7 @@ import java.net.InetAddress import scala.collection.mutable.ArrayBuffer +import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.scheduler.{TaskSchedulerImpl, TaskSet, TaskSetManager} private[spark] class KubernetesTaskSetManager( @@ -29,6 +30,8 @@ private[spark] class KubernetesTaskSetManager( inetAddressUtil: InetAddressUtil = new InetAddressUtil) extends TaskSetManager(sched, taskSet, maxTaskFailures) { + private val conf = sched.sc.conf + /** * Overrides the lookup to use not only the executor pod IP, but also the cluster node * name and host IP address that the pod is running on. The base class may have populated @@ -58,13 +61,19 @@ private[spark] class KubernetesTaskSetManager( s"$executorIP using cluster node IP $clusterNodeIP") pendingTasksClusterNodeIP } else { - val clusterNodeFullName = inetAddressUtil.getFullHostName(clusterNodeIP) - val pendingTasksClusterNodeFullName = super.getPendingTasksForHost(clusterNodeFullName) - if (pendingTasksClusterNodeFullName.nonEmpty) { - logDebug(s"Got preferred task list $pendingTasksClusterNodeFullName " + - s"for executor host $executorIP using cluster node full name $clusterNodeFullName") + if (conf.get(KUBERNETES_DRIVER_CLUSTER_NODENAME_DNS_LOOKUP_ENABLED)) { + val clusterNodeFullName = inetAddressUtil.getFullHostName(clusterNodeIP) + val pendingTasksClusterNodeFullName = super.getPendingTasksForHost( + clusterNodeFullName) + if (pendingTasksClusterNodeFullName.nonEmpty) { + logDebug(s"Got preferred task list $pendingTasksClusterNodeFullName " + + s"for executor host $executorIP using cluster node full name " + + s"$clusterNodeFullName") + } + pendingTasksClusterNodeFullName + } else { + pendingTasksExecutorIP // Empty } - pendingTasksClusterNodeFullName } } } else { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManagerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManagerSuite.scala index 7618c137ab22b..864ff40d88c5c 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManagerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManagerSuite.scala @@ -20,11 +20,13 @@ import scala.collection.mutable.ArrayBuffer import io.fabric8.kubernetes.api.model.{Pod, PodSpec, PodStatus} import org.mockito.Mockito._ +import org.scalatest.BeforeAndAfter import org.apache.spark.{SparkContext, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.scheduler.{FakeTask, FakeTaskScheduler, HostTaskLocation, TaskLocation} -class KubernetesTaskSetManagerSuite extends SparkFunSuite { +class KubernetesTaskSetManagerSuite extends SparkFunSuite with BeforeAndAfter { val sc = new SparkContext("local", "test") val sched = new FakeTaskScheduler(sc, @@ -32,6 +34,10 @@ class KubernetesTaskSetManagerSuite extends SparkFunSuite { val backend = mock(classOf[KubernetesClusterSchedulerBackend]) sched.backend = backend + before { + sc.conf.remove(KUBERNETES_DRIVER_CLUSTER_NODENAME_DNS_LOOKUP_ENABLED) + } + test("Find pending tasks for executors using executor pod IP addresses") { val taskSet = FakeTask.createTaskSet(3, Seq(TaskLocation("10.0.0.1", "execA")), // Task 0 runs on executor pod 10.0.0.1. @@ -76,7 +82,33 @@ class KubernetesTaskSetManagerSuite extends SparkFunSuite { assert(manager.getPendingTasksForHost("10.0.0.1") == ArrayBuffer(1, 0)) } + test("Test DNS lookup is disabled by default for cluster node full hostnames") { + assert(!sc.conf.get(KUBERNETES_DRIVER_CLUSTER_NODENAME_DNS_LOOKUP_ENABLED)) + } + + test("Find pending tasks for executors, but avoid looking up cluster node FQDNs from DNS") { + sc.conf.set(KUBERNETES_DRIVER_CLUSTER_NODENAME_DNS_LOOKUP_ENABLED, false) + val taskSet = FakeTask.createTaskSet(2, + Seq(HostTaskLocation("kube-node1.domain1")), // Task 0's partition belongs to datanode here. + Seq(HostTaskLocation("kube-node1.domain1")) // task 1's partition belongs to datanode here. + ) + val spec1 = mock(classOf[PodSpec]) + when(spec1.getNodeName).thenReturn("kube-node1") + val pod1 = mock(classOf[Pod]) + when(pod1.getSpec).thenReturn(spec1) + val status1 = mock(classOf[PodStatus]) + when(status1.getHostIP).thenReturn("196.0.0.5") + when(pod1.getStatus).thenReturn(status1) + val inetAddressUtil = mock(classOf[InetAddressUtil]) + when(inetAddressUtil.getFullHostName("196.0.0.5")).thenReturn("kube-node1.domain1") + when(backend.getExecutorPodByIP("10.0.0.1")).thenReturn(Some(pod1)) + + val manager = new KubernetesTaskSetManager(sched, taskSet, maxTaskFailures = 2, inetAddressUtil) + assert(manager.getPendingTasksForHost("10.0.0.1") == ArrayBuffer()) + } + test("Find pending tasks for executors using cluster node FQDNs that executor pods run on") { + sc.conf.set(KUBERNETES_DRIVER_CLUSTER_NODENAME_DNS_LOOKUP_ENABLED, true) val taskSet = FakeTask.createTaskSet(2, Seq(HostTaskLocation("kube-node1.domain1")), // Task 0's partition belongs to datanode here. Seq(HostTaskLocation("kube-node1.domain1")) // task 1's partition belongs to datanode here. From 84d43368075f453296c33e3c9fe5466da931e60e Mon Sep 17 00:00:00 2001 From: Yinan Li Date: Tue, 8 Aug 2017 11:49:12 -0700 Subject: [PATCH 156/156] Updated pom version to 0.3.1 for the new bug fix 2.1 release --- assembly/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/java8-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- launcher/pom.xml | 2 +- mesos/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/docker-minimal-bundle/pom.xml | 2 +- .../kubernetes/integration-tests-spark-jobs-helpers/pom.xml | 2 +- .../kubernetes/integration-tests-spark-jobs/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/pom.xml | 2 +- 41 files changed, 41 insertions(+), 41 deletions(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index 8bbf8c4eb5058..ab733e7a6aa35 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 02b44e15fc510..de83219cddf37 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index a994ebec4ef0c..ab418e30c74f8 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 94dfe2842967f..1ec41447511ae 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 2a0535c72e3a8..e143da1f9790f 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 9ffe6aabf7bb1..fa67d941cf600 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 1384e0c4a3d17..93ccd5ba4e8cd 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index d539d5736ad1d..9b273984ce321 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../pom.xml diff --git a/examples/pom.xml b/examples/pom.xml index b4f079e182217..f301aa595f256 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index ca091c6a41f35..f00f67c145740 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index 926104458e098..2fcaa114555d5 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index b8be42be5af50..5378d1e1dbdf2 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 44f19296d1e4d..2c96f56c17534 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml index 1bbdd774fd43b..013c95c7e691e 100644 --- a/external/java8-tests/pom.xml +++ b/external/java8-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 07dfe3727205c..c2634c5ca7e5d 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 503f20361fd7d..bc0fb963e2496 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 3c1e90b7b9e95..9ed8511d9cba4 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml index e92964c8d101f..69f5d81a97038 100644 --- a/external/kafka-0-8-assembly/pom.xml +++ b/external/kafka-0-8-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml index 9884c0e5ab7aa..cd2d489fa1e26 100644 --- a/external/kafka-0-8/pom.xml +++ b/external/kafka-0-8/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index aaa95e5d632bb..74a4f779c255a 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index cbc468e67c2c7..622d6bf366228 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index 5ea9c5e9cff75..c72fed52c00f3 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 5226c506a3038..dc0f29ea2ca32 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index bf09f5dc0fa1a..f6cac3262c756 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../pom.xml diff --git a/mesos/pom.xml b/mesos/pom.xml index 8fd8aa030d636..36b1d08bb4516 100644 --- a/mesos/pom.xml +++ b/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index aca6430887725..4b0712df44367 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 02817657c20ed..bbe0af191952f 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 952f75b0776d3..6ec3d7e0bcf80 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT pom Spark Project Parent POM http://spark.apache.org/ diff --git a/repl/pom.xml b/repl/pom.xml index 0f4d64342a9a9..a3b2010eb6fb9 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index d6d6cb0699e34..53d40a1a6bf1d 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml index 4b15d0ed54b4a..31184794972db 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml index 7283b2bb373c4..94c982d5e1108 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml index 0f838d991358f..9183bbc4f72e8 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 4776d2e5d4f0c..32db53c670337 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 9559c58ef9525..160f0aedd2293 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 6fe6ac25e2d38..80adae9015935 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index d2b407f7c4ae6..5ae2130732055 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 070dd02cc737b..d1bbaea15f557 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 2f3a682f1b990..72e558d5aae77 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 10ca3a5d7d0d6..dc3f922ba0ccb 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../pom.xml diff --git a/yarn/pom.xml b/yarn/pom.xml index 169637d595757..f4d5f05a810be 100644 --- a/yarn/pom.xml +++ b/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.1.0-k8s-0.3.1-SNAPSHOT ../pom.xml