Skip to content

Commit 522e3e8

Browse files
committed
[SPARK-17605][SPARK_SUBMIT] Add option spark.usePython and spark.useR for applications that use both pyspark and sparkr
1 parent 8a6bbe0 commit 522e3e8

File tree

2 files changed

+20
-12
lines changed

2 files changed

+20
-12
lines changed

core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -295,27 +295,31 @@ object SparkSubmit {
295295
Option(args.repositories), Option(args.ivyRepoPath), exclusions = exclusions)
296296
if (!StringUtils.isBlank(resolvedMavenCoordinates)) {
297297
args.jars = mergeFileLists(args.jars, resolvedMavenCoordinates)
298-
if (args.isPython) {
298+
if (args.usePython) {
299299
args.pyFiles = mergeFileLists(args.pyFiles, resolvedMavenCoordinates)
300300
}
301301
}
302302

303303
// install any R packages that may have been passed through --jars or --packages.
304304
// Spark Packages may contain R source code inside the jar.
305-
if (args.isR && !StringUtils.isBlank(args.jars)) {
305+
if (args.useR && !StringUtils.isBlank(args.jars)) {
306306
RPackageUtils.checkAndBuildRPackage(args.jars, printStream, args.verbose)
307307
}
308308

309309
// Require all python files to be local, so we can add them to the PYTHONPATH
310310
// In YARN cluster mode, python files are distributed as regular files, which can be non-local.
311311
// In Mesos cluster mode, non-local python files are automatically downloaded by Mesos.
312-
if (args.isPython && !isYarnCluster && !isMesosCluster) {
313-
if (Utils.nonLocalPaths(args.primaryResource).nonEmpty) {
314-
printErrorAndExit(s"Only local python files are supported: ${args.primaryResource}")
312+
if (!isYarnCluster && !isMesosCluster) {
313+
if (args.isPython) {
314+
if (Utils.nonLocalPaths(args.primaryResource).nonEmpty) {
315+
printErrorAndExit(s"Only local python files are supported: ${args.primaryResource}")
316+
}
315317
}
316-
val nonLocalPyFiles = Utils.nonLocalPaths(args.pyFiles).mkString(",")
317-
if (nonLocalPyFiles.nonEmpty) {
318-
printErrorAndExit(s"Only local additional python files are supported: $nonLocalPyFiles")
318+
if (args.usePython) {
319+
val nonLocalPyFiles = Utils.nonLocalPaths(args.pyFiles).mkString(",")
320+
if (nonLocalPyFiles.nonEmpty) {
321+
printErrorAndExit(s"Only local additional python files are supported: $nonLocalPyFiles")
322+
}
319323
}
320324
}
321325

@@ -374,7 +378,7 @@ object SparkSubmit {
374378
// In YARN mode for an R app, add the SparkR package archive and the R package
375379
// archive containing all of the built R libraries to archives so that they can
376380
// be distributed with the job
377-
if (args.isR && clusterManager == YARN) {
381+
if (args.useR && clusterManager == YARN) {
378382
val sparkRPackagePath = RUtils.localSparkRPackagePath
379383
if (sparkRPackagePath.isEmpty) {
380384
printErrorAndExit("SPARK_HOME does not exist for R application in YARN mode.")
@@ -404,7 +408,7 @@ object SparkSubmit {
404408
}
405409

406410
// TODO: Support distributing R packages with standalone cluster
407-
if (args.isR && clusterManager == STANDALONE && !RUtils.rPackages.isEmpty) {
411+
if (args.useR && clusterManager == STANDALONE && !RUtils.rPackages.isEmpty) {
408412
printErrorAndExit("Distributing R packages with standalone cluster is not supported.")
409413
}
410414

@@ -538,7 +542,7 @@ object SparkSubmit {
538542

539543
// Let YARN know it's a pyspark app, so it distributes needed libraries.
540544
if (clusterManager == YARN) {
541-
if (args.isPython) {
545+
if (args.usePython) {
542546
sysProps.put("spark.yarn.isPython", "true")
543547
}
544548

@@ -590,7 +594,7 @@ object SparkSubmit {
590594
if (isMesosCluster) {
591595
assert(args.useRest, "Mesos cluster mode is only supported through the REST submission API")
592596
childMainClass = "org.apache.spark.deploy.rest.RestSubmissionClient"
593-
if (args.isPython) {
597+
if (args.usePython) {
594598
// Second argument is main class
595599
childArgs += (args.primaryResource, "")
596600
if (args.pyFiles != null) {

core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
7070
var proxyUser: String = null
7171
var principal: String = null
7272
var keytab: String = null
73+
var usePython: Boolean = false
74+
var useR: Boolean = false
7375

7476
// Standalone cluster mode only
7577
var supervise: Boolean = false
@@ -186,6 +188,8 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
186188
.getOrElse(sparkProperties.get("spark.executor.instances").orNull)
187189
keytab = Option(keytab).orElse(sparkProperties.get("spark.yarn.keytab")).orNull
188190
principal = Option(principal).orElse(sparkProperties.get("spark.yarn.principal")).orNull
191+
usePython = isPython || sparkProperties.getOrElse("spark.usePython", "false").toBoolean
192+
useR = isR || sparkProperties.getOrElse("spark.useR", "false").toBoolean
189193

190194
// Try to set main class from JAR if no --class argument is given
191195
if (mainClass == null && !isPython && !isR && primaryResource != null) {

0 commit comments

Comments
 (0)