From 208bb685cc899b705aadb7c5aba51334f2d340f0 Mon Sep 17 00:00:00 2001 From: Devaraj K Date: Tue, 5 Sep 2017 17:22:54 -0700 Subject: [PATCH 1/2] [SPARK-21384] [YARN] Spark 2.2 + YARN without spark.yarn.jars / spark.yarn.archive fails --- .../src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index d408ca90a5d1c..6ee7b8abe9bac 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -565,7 +565,6 @@ private[spark] class Client( distribute(jarsArchive.toURI.getPath, resType = LocalResourceType.ARCHIVE, destName = Some(LOCALIZED_LIB_DIR)) - jarsArchive.delete() } } From d2d13fe82aec1c0fc43d688dbd315385ef99be19 Mon Sep 17 00:00:00 2001 From: Devaraj K Date: Tue, 19 Sep 2017 14:55:59 -0700 Subject: [PATCH 2/2] Copying the files to remote always when the source scheme is 'file' --- .../main/scala/org/apache/spark/deploy/yarn/Client.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 6ee7b8abe9bac..64b2b4d4db549 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -340,8 +340,9 @@ private[spark] class Client( /** * Copy the given file to a remote file system (e.g. HDFS) if needed. - * The file is only copied if the source and destination file systems are different. This is used - * for preparing resources for launching the ApplicationMaster container. Exposed for testing. + * The file is only copied if the source and destination file systems are different or the source + * scheme is "file". This is used for preparing resources for launching the ApplicationMaster + * container. Exposed for testing. */ private[yarn] def copyFileToRemote( destDir: Path, @@ -353,7 +354,7 @@ private[spark] class Client( val destFs = destDir.getFileSystem(hadoopConf) val srcFs = srcPath.getFileSystem(hadoopConf) var destPath = srcPath - if (force || !compareFs(srcFs, destFs)) { + if (force || !compareFs(srcFs, destFs) || "file".equals(srcFs.getScheme)) { destPath = new Path(destDir, destName.getOrElse(srcPath.getName())) logInfo(s"Uploading resource $srcPath -> $destPath") FileUtil.copy(srcFs, srcPath, destFs, destPath, false, hadoopConf) @@ -565,6 +566,7 @@ private[spark] class Client( distribute(jarsArchive.toURI.getPath, resType = LocalResourceType.ARCHIVE, destName = Some(LOCALIZED_LIB_DIR)) + jarsArchive.delete() } }