Skip to content

Commit 22a2e5a

Browse files
mccheahash211
authored andcommitted
Change the API contract for uploading local files (apache-spark-on-k8s#107)
* Change the API contract for uploading local jars. This mirrors similarly to what YARN and Mesos expects. * Address comments * Fix test
1 parent 16c5620 commit 22a2e5a

File tree

14 files changed

+244
-368
lines changed

14 files changed

+244
-368
lines changed

core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -462,10 +462,6 @@ object SparkSubmit extends CommandLineUtils {
462462

463463
OptionAssigner(args.kubernetesNamespace, KUBERNETES, ALL_DEPLOY_MODES,
464464
sysProp = "spark.kubernetes.namespace"),
465-
OptionAssigner(args.kubernetesUploadJars, KUBERNETES, CLUSTER,
466-
sysProp = "spark.kubernetes.driver.uploads.jars"),
467-
OptionAssigner(args.kubernetesUploadFiles, KUBERNETES, CLUSTER,
468-
sysProp = "spark.kubernetes.driver.uploads.files"),
469465

470466
// Other options
471467
OptionAssigner(args.executorCores, STANDALONE | YARN, ALL_DEPLOY_MODES,
@@ -474,10 +470,11 @@ object SparkSubmit extends CommandLineUtils {
474470
sysProp = "spark.executor.memory"),
475471
OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS, ALL_DEPLOY_MODES,
476472
sysProp = "spark.cores.max"),
477-
OptionAssigner(args.files, LOCAL | STANDALONE | MESOS, ALL_DEPLOY_MODES,
473+
OptionAssigner(args.files, LOCAL | STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES,
478474
sysProp = "spark.files"),
479475
OptionAssigner(args.jars, LOCAL, CLIENT, sysProp = "spark.jars"),
480-
OptionAssigner(args.jars, STANDALONE | MESOS, ALL_DEPLOY_MODES, sysProp = "spark.jars"),
476+
OptionAssigner(args.jars, STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES,
477+
sysProp = "spark.jars"),
481478
OptionAssigner(args.driverMemory, STANDALONE | MESOS | YARN, CLUSTER,
482479
sysProp = "spark.driver.memory"),
483480
OptionAssigner(args.driverCores, STANDALONE | MESOS | YARN, CLUSTER,

core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
7373

7474
// Kubernetes only
7575
var kubernetesNamespace: String = null
76-
var kubernetesUploadJars: String = null
77-
var kubernetesUploadFiles: String = null
7876

7977
// Standalone cluster mode only
8078
var supervise: Boolean = false
@@ -194,12 +192,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
194192
kubernetesNamespace = Option(kubernetesNamespace)
195193
.orElse(sparkProperties.get("spark.kubernetes.namespace"))
196194
.orNull
197-
kubernetesUploadJars = Option(kubernetesUploadJars)
198-
.orElse(sparkProperties.get("spark.kubernetes.driver.uploads.jars"))
199-
.orNull
200-
kubernetesUploadFiles = Option(kubernetesUploadFiles)
201-
.orElse(sparkProperties.get("spark.kubernetes.driver.uploads.files"))
202-
.orNull
203195

204196
// Try to set main class from JAR if no --class argument is given
205197
if (mainClass == null && !isPython && !isR && primaryResource != null) {
@@ -441,12 +433,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
441433
case KUBERNETES_NAMESPACE =>
442434
kubernetesNamespace = value
443435

444-
case KUBERNETES_UPLOAD_JARS =>
445-
kubernetesUploadJars = value
446-
447-
case KUBERNETES_UPLOAD_FILES =>
448-
kubernetesUploadFiles = value
449-
450436
case HELP =>
451437
printUsageAndExit(0)
452438

docs/running-on-kubernetes.md

Lines changed: 9 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -51,87 +51,15 @@ connect without SSL on a different port, the master would be set to `k8s://http:
5151

5252
Note that applications can currently only be executed in cluster mode, where the driver and its executors are running on
5353
the cluster.
54-
55-
### Adding Other JARs
56-
57-
Spark allows users to provide dependencies that are bundled into the driver's Docker image, or that are on the local
58-
disk of the submitter's machine. These two types of dependencies are specified via different configuration options to
59-
`spark-submit`:
6054

61-
* Local jars provided by specifying the `--jars` command line argument to `spark-submit`, or by setting `spark.jars` in
62-
the application's configuration, will be treated as jars that are located on the *disk of the driver container*. This
63-
only applies to jar paths that do not specify a scheme or that have the scheme `file://`. Paths with other schemes are
64-
fetched from their appropriate locations.
65-
* Local jars provided by specifying the `--upload-jars` command line argument to `spark-submit`, or by setting
66-
`spark.kubernetes.driver.uploads.jars` in the application's configuration, will be treated as jars that are located on
67-
the *disk of the submitting machine*. These jars are uploaded to the driver docker container before executing the
68-
application.
69-
* A main application resource path that does not have a scheme or that has the scheme `file://` is assumed to be on the
70-
*disk of the submitting machine*. This resource is uploaded to the driver docker container before executing the
71-
application. A remote path can still be specified and the resource will be fetched from the appropriate location.
72-
* A main application resource path that has the scheme `container://` is assumed to be on the *disk of the driver
73-
container*.
74-
75-
In all of these cases, the jars are placed on the driver's classpath, and are also sent to the executors. Below are some
76-
examples of providing application dependencies.
77-
78-
To submit an application with both the main resource and two other jars living on the submitting user's machine:
79-
80-
bin/spark-submit \
81-
--deploy-mode cluster \
82-
--class com.example.applications.SampleApplication \
83-
--master k8s://192.168.99.100 \
84-
--upload-jars /home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar \
85-
--conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \
86-
--conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \
87-
/home/exampleuser/exampleapplication/main.jar
88-
89-
Note that since passing the jars through the `--upload-jars` command line argument is equivalent to setting the
90-
`spark.kubernetes.driver.uploads.jars` Spark property, the above will behave identically to this command:
91-
92-
bin/spark-submit \
93-
--deploy-mode cluster \
94-
--class com.example.applications.SampleApplication \
95-
--master k8s://192.168.99.100 \
96-
--conf spark.kubernetes.driver.uploads.jars=/home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar \
97-
--conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \
98-
--conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \
99-
/home/exampleuser/exampleapplication/main.jar
100-
101-
To specify a main application resource that can be downloaded from an HTTP service, and if a plugin for that application
102-
is located in the jar `/opt/spark-plugins/app-plugin.jar` on the docker image's disk:
103-
104-
bin/spark-submit \
105-
--deploy-mode cluster \
106-
--class com.example.applications.PluggableApplication \
107-
--master k8s://192.168.99.100 \
108-
--jars /opt/spark-plugins/app-plugin.jar \
109-
--conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest \
110-
--conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \
111-
http://example.com:8080/applications/sparkpluggable/app.jar
112-
113-
Note that since passing the jars through the `--jars` command line argument is equivalent to setting the `spark.jars`
114-
Spark property, the above will behave identically to this command:
115-
116-
bin/spark-submit \
117-
--deploy-mode cluster \
118-
--class com.example.applications.PluggableApplication \
119-
--master k8s://192.168.99.100 \
120-
--conf spark.jars=file:///opt/spark-plugins/app-plugin.jar \
121-
--conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest \
122-
--conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \
123-
http://example.com:8080/applications/sparkpluggable/app.jar
124-
125-
To specify a main application resource that is in the Docker image, and if it has no other dependencies:
126-
127-
bin/spark-submit \
128-
--deploy-mode cluster \
129-
--class com.example.applications.PluggableApplication \
130-
--master k8s://192.168.99.100:8443 \
131-
--conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest \
132-
--conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \
133-
container:///home/applications/examples/example.jar
55+
### Dependency Management and Docker Containers
13456

57+
Spark supports specifying JAR paths that are either on the submitting host's disk, or are located on the disk of the
58+
driver and executors. Refer to the [application submission](submitting-applications.html#advanced-dependency-management)
59+
section for details. Note that files specified with the `local` scheme should be added to the container image of both
60+
the driver and the executors. Files without a scheme or with the scheme `file://` are treated as being on the disk of
61+
the submitting machine, and are uploaded to the driver running in Kubernetes before launching the application.
62+
13563
### Setting Up SSL For Submitting the Driver
13664

13765
When submitting to Kubernetes, a pod is started for the driver, and the pod starts an HTTP server. This HTTP server
@@ -146,9 +74,9 @@ pod in starting the application, set `spark.ssl.kubernetes.submit.trustStore`.
14674

14775
One note about the keyStore is that it can be specified as either a file on the client machine or a file in the
14876
container image's disk. Thus `spark.ssl.kubernetes.submit.keyStore` can be a URI with a scheme of either `file:`
149-
or `container:`. A scheme of `file:` corresponds to the keyStore being located on the client machine; it is mounted onto
77+
or `local:`. A scheme of `file:` corresponds to the keyStore being located on the client machine; it is mounted onto
15078
the driver container as a [secret volume](https://kubernetes.io/docs/user-guide/secrets/). When the URI has the scheme
151-
`container:`, the file is assumed to already be on the container's disk at the appropriate path.
79+
`local:`, the file is assumed to already be on the container's disk at the appropriate path.
15280

15381
### Kubernetes Clusters and the authenticated proxy endpoint
15482

@@ -241,24 +169,6 @@ from the other deployment modes. See the [configuration page](configuration.html
241169
executor pods from the API server.
242170
</td>
243171
</tr>
244-
<tr>
245-
<td><code>spark.kubernetes.driver.uploads.jars</code></td>
246-
<td>(none)</td>
247-
<td>
248-
Comma-separated list of jars to send to the driver and all executors when submitting the application in cluster
249-
mode. Refer to <a href="running-on-kubernetes.html#adding-other-jars">adding other jars</a> for more information.
250-
</td>
251-
</tr>
252-
<tr>
253-
<td><code>spark.kubernetes.driver.uploads.files</code></td>
254-
<td>(none)</td>
255-
<td>
256-
Comma-separated list of files to send to the driver and all executors when submitting the application in cluster
257-
mode. The files are added in a flat hierarchy to the current working directory of the driver, having the same
258-
names as the names of the original files. Note that two files with the same name cannot be added, even if they
259-
were in different source directories on the client disk.
260-
</td>
261-
</tr>
262172
<tr>
263173
<td><code>spark.kubernetes.executor.memoryOverhead</code></td>
264174
<td>executorMemory * 0.10, with minimum of 384 </td>

launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,7 @@ class SparkSubmitOptionParser {
7777
protected final String QUEUE = "--queue";
7878

7979
// Kubernetes-only options.
80-
protected final String KUBERNETES_MASTER = "--kubernetes-master";
8180
protected final String KUBERNETES_NAMESPACE = "--kubernetes-namespace";
82-
protected final String KUBERNETES_UPLOAD_JARS = "--upload-jars";
83-
protected final String KUBERNETES_UPLOAD_FILES = "--upload-files";
8481

8582
/**
8683
* This is the canonical list of spark-submit options. Each entry in the array contains the
@@ -121,10 +118,7 @@ class SparkSubmitOptionParser {
121118
{ REPOSITORIES },
122119
{ STATUS },
123120
{ TOTAL_EXECUTOR_CORES },
124-
{ KUBERNETES_MASTER },
125-
{ KUBERNETES_NAMESPACE },
126-
{ KUBERNETES_UPLOAD_JARS },
127-
{ KUBERNETES_UPLOAD_FILES }
121+
{ KUBERNETES_NAMESPACE }
128122
};
129123

130124
/**

0 commit comments

Comments
 (0)