Skip to content

Commit 8c3ee2b

Browse files
jerryshaoandrewor14
authored andcommitted
[SPARK-17512][CORE] Avoid formatting to python path for yarn and mesos cluster mode
## What changes were proposed in this pull request? Yarn and mesos cluster mode support remote python path (HDFS/S3 scheme) by their own mechanism, it is not necessary to check and format the python when running on these modes. This is a potential regression compared to 1.6, so here propose to fix it. ## How was this patch tested? Unit test to verify SparkSubmit arguments, also with local cluster verification. Because of lack of `MiniDFSCluster` support in Spark unit test, there's no integration test added. Author: jerryshao <[email protected]> Closes #15137 from jerryshao/SPARK-17512.
1 parent 9fcf1c5 commit 8c3ee2b

File tree

2 files changed

+29
-3
lines changed

2 files changed

+29
-3
lines changed

core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ object SparkSubmit {
311311
// In Mesos cluster mode, non-local python files are automatically downloaded by Mesos.
312312
if (args.isPython && !isYarnCluster && !isMesosCluster) {
313313
if (Utils.nonLocalPaths(args.primaryResource).nonEmpty) {
314-
printErrorAndExit(s"Only local python files are supported: $args.primaryResource")
314+
printErrorAndExit(s"Only local python files are supported: ${args.primaryResource}")
315315
}
316316
val nonLocalPyFiles = Utils.nonLocalPaths(args.pyFiles).mkString(",")
317317
if (nonLocalPyFiles.nonEmpty) {
@@ -322,7 +322,7 @@ object SparkSubmit {
322322
// Require all R files to be local
323323
if (args.isR && !isYarnCluster) {
324324
if (Utils.nonLocalPaths(args.primaryResource).nonEmpty) {
325-
printErrorAndExit(s"Only local R files are supported: $args.primaryResource")
325+
printErrorAndExit(s"Only local R files are supported: ${args.primaryResource}")
326326
}
327327
}
328328

@@ -633,7 +633,14 @@ object SparkSubmit {
633633
// explicitly sets `spark.submit.pyFiles` in his/her default properties file.
634634
sysProps.get("spark.submit.pyFiles").foreach { pyFiles =>
635635
val resolvedPyFiles = Utils.resolveURIs(pyFiles)
636-
val formattedPyFiles = PythonRunner.formatPaths(resolvedPyFiles).mkString(",")
636+
val formattedPyFiles = if (!isYarnCluster && !isMesosCluster) {
637+
PythonRunner.formatPaths(resolvedPyFiles).mkString(",")
638+
} else {
639+
// Ignoring formatting python path in yarn and mesos cluster mode, these two modes
640+
// support dealing with remote python files, they could distribute and add python files
641+
// locally.
642+
resolvedPyFiles
643+
}
637644
sysProps("spark.submit.pyFiles") = formattedPyFiles
638645
}
639646

core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,25 @@ class SparkSubmitSuite
582582
val sysProps3 = SparkSubmit.prepareSubmitEnvironment(appArgs3)._3
583583
sysProps3("spark.submit.pyFiles") should be(
584584
PythonRunner.formatPaths(Utils.resolveURIs(pyFiles)).mkString(","))
585+
586+
// Test remote python files
587+
val f4 = File.createTempFile("test-submit-remote-python-files", "", tmpDir)
588+
val writer4 = new PrintWriter(f4)
589+
val remotePyFiles = "hdfs:///tmp/file1.py,hdfs:///tmp/file2.py"
590+
writer4.println("spark.submit.pyFiles " + remotePyFiles)
591+
writer4.close()
592+
val clArgs4 = Seq(
593+
"--master", "yarn",
594+
"--deploy-mode", "cluster",
595+
"--properties-file", f4.getPath,
596+
"hdfs:///tmp/mister.py"
597+
)
598+
val appArgs4 = new SparkSubmitArguments(clArgs4)
599+
val sysProps4 = SparkSubmit.prepareSubmitEnvironment(appArgs4)._3
600+
// Should not format python path for yarn cluster mode
601+
sysProps4("spark.submit.pyFiles") should be(
602+
Utils.resolveURIs(remotePyFiles)
603+
)
585604
}
586605

587606
test("user classpath first in driver") {

0 commit comments

Comments
 (0)