-
Notifications
You must be signed in to change notification settings - Fork 117
[WIP] Use spark.jars and spark.files instead of k8s-specific config #104
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,9 +16,14 @@ | |
| */ | ||
| package org.apache.spark.deploy.rest | ||
|
|
||
| import java.io.File | ||
|
|
||
| import com.fasterxml.jackson.annotation.{JsonSubTypes, JsonTypeInfo} | ||
| import com.google.common.io.Files | ||
| import org.apache.commons.codec.binary.Base64 | ||
|
|
||
| import org.apache.spark.SPARK_VERSION | ||
| import org.apache.spark.util.Utils | ||
|
|
||
| case class KubernetesCreateSubmissionRequest( | ||
| appResource: AppResource, | ||
|
|
@@ -63,3 +68,21 @@ class PingResponse extends SubmitRestProtocolResponse { | |
| serverSparkVersion = SPARK_VERSION | ||
| } | ||
|
|
||
| object AppResource { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Mark the object as |
||
| def assemble(appResource: String): AppResource = { | ||
| val appResourceUri = Utils.resolveURI(appResource) | ||
| appResourceUri.getScheme match { | ||
| case "file" | null => | ||
| val appFile = new File(appResourceUri.getPath) | ||
| if (!appFile.isFile) { | ||
| throw new IllegalStateException("Provided local file path does not exist" + | ||
| s" or is not a file: ${appFile.getAbsolutePath}") | ||
| } | ||
| val fileBytes = Files.toByteArray(appFile) | ||
| val fileBase64 = Base64.encodeBase64String(fileBytes) | ||
| UploadedAppResource(resourceBase64Contents = fileBase64, name = appFile.getName) | ||
| case "container" | "local" => ContainerAppResource(appResourceUri.getPath) | ||
| case other => RemoteAppResource(other) | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -158,17 +158,21 @@ private[spark] class KubernetesSparkRestServer( | |
| handleError("Unauthorized to submit application.") | ||
| } else { | ||
| val tempDir = Utils.createTempDir() | ||
| val appResourcePath = resolvedAppResource(appResource, tempDir) | ||
| val appResourcePath = resolveAppResourceToLocalPath(appResource, tempDir) | ||
| val writtenJars = writeUploadedJars(uploadedJars, tempDir) | ||
| val writtenFiles = writeUploadedFiles(uploadedFiles) | ||
| val resolvedSparkProperties = new mutable.HashMap[String, String] | ||
| resolvedSparkProperties ++= sparkProperties | ||
|
|
||
| // Resolve driver classpath and jars | ||
| val originalJars = sparkProperties.get("spark.jars") | ||
| val nonUploadedJars = sparkProperties.get("spark.jars") | ||
| .map(_.split(",")) | ||
| .map(_.filter(Utils.resolveURI(_).getScheme match { | ||
| case "file" | null => false | ||
| case _ => true | ||
| })) | ||
| .getOrElse(Array.empty[String]) | ||
| val resolvedJars = writtenJars ++ originalJars ++ Array(appResourcePath) | ||
| val resolvedJars = writtenJars ++ nonUploadedJars ++ Array(appResourcePath) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here we're missing adding jars with the scheme |
||
| val sparkJars = new File(sparkHome, "jars").listFiles().map(_.getAbsolutePath) | ||
| val driverExtraClasspath = sparkProperties | ||
| .get("spark.driver.extraClassPath") | ||
|
|
@@ -180,10 +184,14 @@ private[spark] class KubernetesSparkRestServer( | |
| resolvedSparkProperties("spark.jars") = resolvedJars.mkString(",") | ||
|
|
||
| // Resolve spark.files | ||
| val originalFiles = sparkProperties.get("spark.files") | ||
| val nonUploadedFiles = sparkProperties.get("spark.files") | ||
| .map(_.split(",")) | ||
| .map(_.filter(Utils.resolveURI(_).getScheme match { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a common enough pattern that we could think about separating this out to a separate class. Also, some other places in core Spark have used this paradigm:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using the above we can shorthand a lot of things and avoid the case-match entirely. For example:
|
||
| case "file" | null => false | ||
| case _ => true | ||
| })) | ||
| .getOrElse(Array.empty[String]) | ||
| val resolvedFiles = originalFiles ++ writtenFiles | ||
| val resolvedFiles = nonUploadedFiles ++ writtenFiles | ||
| resolvedSparkProperties("spark.files") = resolvedFiles.mkString(",") | ||
|
|
||
| val command = new ArrayBuffer[String] | ||
|
|
@@ -250,7 +258,7 @@ private[spark] class KubernetesSparkRestServer( | |
| writeBase64ContentsToFiles(files, workingDir) | ||
| } | ||
|
|
||
| def resolvedAppResource(appResource: AppResource, tempDir: File): String = { | ||
| def resolveAppResourceToLocalPath(appResource: AppResource, tempDir: File): String = { | ||
| val appResourcePath = appResource match { | ||
| case UploadedAppResource(resourceContentsBase64, resourceName) => | ||
| val resourceFile = new File(tempDir, resourceName) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not entirely sure if it's worth keeping
TarGzippedDatafields asOptions anymore. Sure, we save a few bytes of extra data to upload if there aren't any jars, but it's an extra layer of indirection that has to be semantically understood.