-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-12343][YARN] Simplify Yarn client and client argument #11603
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9311622
f99cb19
e27a5a0
75c5b36
7f41403
6d3c62d
f9b62a1
d152f9f
7feae6e
3bb44b4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -441,7 +441,6 @@ object SparkSubmit { | |
| OptionAssigner(args.deployMode, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, | ||
| sysProp = "spark.submit.deployMode"), | ||
| OptionAssigner(args.name, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.app.name"), | ||
| OptionAssigner(args.jars, ALL_CLUSTER_MGRS, CLIENT, sysProp = "spark.jars"), | ||
| OptionAssigner(args.ivyRepoPath, ALL_CLUSTER_MGRS, CLIENT, sysProp = "spark.jars.ivy"), | ||
| OptionAssigner(args.driverMemory, ALL_CLUSTER_MGRS, CLIENT, | ||
| sysProp = "spark.driver.memory"), | ||
|
|
@@ -452,27 +451,15 @@ object SparkSubmit { | |
| OptionAssigner(args.driverExtraLibraryPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, | ||
| sysProp = "spark.driver.extraLibraryPath"), | ||
|
|
||
| // Yarn client only | ||
| OptionAssigner(args.queue, YARN, CLIENT, sysProp = "spark.yarn.queue"), | ||
| // Yarn only | ||
| OptionAssigner(args.queue, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.queue"), | ||
| OptionAssigner(args.numExecutors, YARN, ALL_DEPLOY_MODES, | ||
| sysProp = "spark.executor.instances"), | ||
| OptionAssigner(args.files, YARN, CLIENT, sysProp = "spark.yarn.dist.files"), | ||
| OptionAssigner(args.archives, YARN, CLIENT, sysProp = "spark.yarn.dist.archives"), | ||
| OptionAssigner(args.principal, YARN, CLIENT, sysProp = "spark.yarn.principal"), | ||
| OptionAssigner(args.keytab, YARN, CLIENT, sysProp = "spark.yarn.keytab"), | ||
|
|
||
| // Yarn cluster only | ||
| OptionAssigner(args.name, YARN, CLUSTER, clOption = "--name"), | ||
| OptionAssigner(args.driverMemory, YARN, CLUSTER, clOption = "--driver-memory"), | ||
| OptionAssigner(args.driverCores, YARN, CLUSTER, clOption = "--driver-cores"), | ||
| OptionAssigner(args.queue, YARN, CLUSTER, clOption = "--queue"), | ||
| OptionAssigner(args.executorMemory, YARN, CLUSTER, clOption = "--executor-memory"), | ||
| OptionAssigner(args.executorCores, YARN, CLUSTER, clOption = "--executor-cores"), | ||
| OptionAssigner(args.files, YARN, CLUSTER, clOption = "--files"), | ||
| OptionAssigner(args.archives, YARN, CLUSTER, clOption = "--archives"), | ||
| OptionAssigner(args.jars, YARN, CLUSTER, clOption = "--addJars"), | ||
| OptionAssigner(args.principal, YARN, CLUSTER, clOption = "--principal"), | ||
| OptionAssigner(args.keytab, YARN, CLUSTER, clOption = "--keytab"), | ||
| OptionAssigner(args.jars, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.dist.jars"), | ||
| OptionAssigner(args.files, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.dist.files"), | ||
| OptionAssigner(args.archives, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.dist.archives"), | ||
| OptionAssigner(args.principal, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.principal"), | ||
| OptionAssigner(args.keytab, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.keytab"), | ||
|
|
||
| // Other options | ||
| OptionAssigner(args.executorCores, STANDALONE | YARN, ALL_DEPLOY_MODES, | ||
|
|
@@ -483,10 +470,11 @@ object SparkSubmit { | |
| sysProp = "spark.cores.max"), | ||
| OptionAssigner(args.files, LOCAL | STANDALONE | MESOS, ALL_DEPLOY_MODES, | ||
| sysProp = "spark.files"), | ||
| OptionAssigner(args.jars, STANDALONE | MESOS, CLUSTER, sysProp = "spark.jars"), | ||
| OptionAssigner(args.driverMemory, STANDALONE | MESOS, CLUSTER, | ||
| OptionAssigner(args.jars, LOCAL, CLIENT, sysProp = "spark.jars"), | ||
|
||
| OptionAssigner(args.jars, STANDALONE | MESOS, ALL_DEPLOY_MODES, sysProp = "spark.jars"), | ||
| OptionAssigner(args.driverMemory, STANDALONE | MESOS | YARN, CLUSTER, | ||
| sysProp = "spark.driver.memory"), | ||
| OptionAssigner(args.driverCores, STANDALONE | MESOS, CLUSTER, | ||
| OptionAssigner(args.driverCores, STANDALONE | MESOS | YARN, CLUSTER, | ||
| sysProp = "spark.driver.cores"), | ||
| OptionAssigner(args.supervise.toString, STANDALONE | MESOS, CLUSTER, | ||
| sysProp = "spark.driver.supervise"), | ||
|
|
@@ -550,6 +538,10 @@ object SparkSubmit { | |
| if (args.isPython) { | ||
| sysProps.put("spark.yarn.isPython", "true") | ||
| } | ||
|
|
||
| if (args.pyFiles != null) { | ||
| sysProps("spark.submit.pyFiles") = args.pyFiles | ||
| } | ||
| } | ||
|
|
||
| // assure a keytab is available from any place in a JVM | ||
|
|
@@ -576,9 +568,6 @@ object SparkSubmit { | |
| childMainClass = "org.apache.spark.deploy.yarn.Client" | ||
| if (args.isPython) { | ||
| childArgs += ("--primary-py-file", args.primaryResource) | ||
| if (args.pyFiles != null) { | ||
| childArgs += ("--py-files", args.pyFiles) | ||
| } | ||
| childArgs += ("--class", "org.apache.spark.deploy.PythonRunner") | ||
| } else if (args.isR) { | ||
| val mainFile = new Path(args.primaryResource).getName | ||
|
|
@@ -627,7 +616,8 @@ object SparkSubmit { | |
| "spark.jars", | ||
| "spark.files", | ||
| "spark.yarn.dist.files", | ||
| "spark.yarn.dist.archives") | ||
| "spark.yarn.dist.archives", | ||
| "spark.yarn.dist.jars") | ||
| pathConfigs.foreach { config => | ||
| // Replace old URIs with resolved URIs, if they exist | ||
| sysProps.get(config).foreach { oldValue => | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here I put all the additional jars into a configuration
spark.yarn.dist.jars, this will be picked by yarn/client and put into distributed cache. So now both in yarn client and cluster mode, additional jars will be put into distributed cache.Another thing is that do we need to put user jar into distributed cache for yarn client mode, I think it is doable, not sure is there any special concern?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should just leave that as is for now. We can file separate jira if we want to change.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So dist.files and dist.archives are public and documented, seems like we should make dist.jars public and document it also in the yarn docs unless someone has reason not to.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure, I will add it to the yarn doc.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like there's another config "spark.jars" to handle this property, maybe we don't need to add another, and for
dist.jarswe could make it as internal use for yarn only.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
spark.jars is for distributing via spark internal mechanisms, this is done via the distributed cache, we should add it to the yarn only section of docs similar to the dist.files and dist.archives.