From cc1ad11359567adafb6336db447620cfe046f359 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Wed, 11 May 2016 16:51:27 -0700 Subject: [PATCH 01/12] added option to load IvySettings from file instead of hard-coded settings --- .../org/apache/spark/deploy/SparkSubmit.scala | 90 +++++++++++++------ .../spark/deploy/SparkSubmitArguments.scala | 2 + .../spark/deploy/SparkSubmitUtilsSuite.scala | 43 ++++++--- .../hive/client/IsolatedClientLoader.scala | 5 +- 4 files changed, 101 insertions(+), 39 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 78606e06fbd3..f913ae33a4df 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -17,10 +17,11 @@ package org.apache.spark.deploy -import java.io.{File, PrintStream} +import java.io.{File, IOException, PrintStream} import java.lang.reflect.{InvocationTargetException, Modifier, UndeclaredThrowableException} import java.net.URL import java.security.PrivilegedExceptionAction +import java.text.ParseException import scala.annotation.tailrec import scala.collection.mutable.{ArrayBuffer, HashMap, Map} @@ -286,8 +287,12 @@ object SparkSubmit { } else { Nil } + + val ivySettings = Option(args.ivySettingsFile).map(SparkSubmitUtils.loadIvySettings).getOrElse( + SparkSubmitUtils.buildIvySettings(Option(args.repositories), Option(args.ivyRepoPath))) + val resolvedMavenCoordinates = SparkSubmitUtils.resolveMavenCoordinates(args.packages, - Option(args.repositories), Option(args.ivyRepoPath), exclusions = exclusions) + ivySettings, exclusions = exclusions) if (!StringUtils.isBlank(resolvedMavenCoordinates)) { args.jars = mergeFileLists(args.jars, resolvedMavenCoordinates) if (args.isPython) { @@ -440,6 +445,8 @@ object SparkSubmit { sysProp = "spark.submit.deployMode"), OptionAssigner(args.name, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.app.name"), OptionAssigner(args.ivyRepoPath, ALL_CLUSTER_MGRS, CLIENT, sysProp = "spark.jars.ivy"), + OptionAssigner(args.ivySettingsFile, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, + sysProp = "spark.ivy.settings"), OptionAssigner(args.driverMemory, ALL_CLUSTER_MGRS, CLIENT, sysProp = "spark.driver.memory"), OptionAssigner(args.driverExtraClassPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, @@ -967,6 +974,56 @@ private[spark] object SparkSubmitUtils { } } + /** + * Build Ivy Settings using options with default resolvers + * @param remoteRepos Comma-delimited string of remote repositories other than maven central + * @param ivyPath The path to the local ivy repository + * @return An IvySettings object + */ + def buildIvySettings( + remoteRepos: Option[String], + ivyPath: Option[String]): IvySettings = { + val ivySettings: IvySettings = new IvySettings + + // set ivy settings for location of cache + val alternateIvyCache = ivyPath.getOrElse("") + val packagesDirectory: File = + if (alternateIvyCache == null || alternateIvyCache.trim.isEmpty) { + new File(ivySettings.getDefaultIvyUserDir, "jars") + } else { + ivySettings.setDefaultIvyUserDir(new File(alternateIvyCache)) + ivySettings.setDefaultCache(new File(alternateIvyCache, "cache")) + new File(alternateIvyCache, "jars") + } + + // create a pattern matcher + ivySettings.addMatcher(new GlobPatternMatcher) + // create the dependency resolvers + val repoResolver = createRepoResolvers(remoteRepos, ivySettings) + ivySettings.addResolver(repoResolver) + ivySettings.setDefaultResolver(repoResolver.getName) + ivySettings + } + + /** + * Load Ivy settings from a given filename, using supplied resolvers + * @param settingsFile Path to Ivy settings file + * @return An IvySettings object + */ + def loadIvySettings(settingsFile: String): IvySettings = { + val file = new File(settingsFile) + require(file.exists(), s"Ivy settings file $file does not exist") + require(file.isFile, s"Ivy settings file $file is not a normal file") + val ivySettings: IvySettings = new IvySettings + try { + ivySettings.load(file) + } catch { + case e @ (_: IOException | _: ParseException) => + throw new SparkException(s"Failed when loading Ivy settings from $settingsFile", e) + } + ivySettings + } + /** A nice function to use in tests as well. Values are dummy strings. */ def getModuleDescriptor: DefaultModuleDescriptor = DefaultModuleDescriptor.newDefaultInstance( ModuleRevisionId.newInstance("org.apache.spark", "spark-submit-parent", "1.0")) @@ -974,16 +1031,14 @@ private[spark] object SparkSubmitUtils { /** * Resolves any dependencies that were supplied through maven coordinates * @param coordinates Comma-delimited string of maven coordinates - * @param remoteRepos Comma-delimited string of remote repositories other than maven central - * @param ivyPath The path to the local ivy repository + * @param ivySettings An IvySettings containing resolvers to use * @param exclusions Exclusions to apply when resolving transitive dependencies * @return The comma-delimited path to the jars of the given maven artifacts including their * transitive dependencies */ def resolveMavenCoordinates( coordinates: String, - remoteRepos: Option[String], - ivyPath: Option[String], + ivySettings: IvySettings, exclusions: Seq[String] = Nil, isTest: Boolean = false): String = { if (coordinates == null || coordinates.trim.isEmpty) { @@ -994,32 +1049,14 @@ private[spark] object SparkSubmitUtils { // To prevent ivy from logging to system out System.setOut(printStream) val artifacts = extractMavenCoordinates(coordinates) - // Default configuration name for ivy - val ivyConfName = "default" - // set ivy settings for location of cache - val ivySettings: IvySettings = new IvySettings // Directories for caching downloads through ivy and storing the jars when maven coordinates // are supplied to spark-submit - val alternateIvyCache = ivyPath.getOrElse("") - val packagesDirectory: File = - if (alternateIvyCache == null || alternateIvyCache.trim.isEmpty) { - new File(ivySettings.getDefaultIvyUserDir, "jars") - } else { - ivySettings.setDefaultIvyUserDir(new File(alternateIvyCache)) - ivySettings.setDefaultCache(new File(alternateIvyCache, "cache")) - new File(alternateIvyCache, "jars") - } + val packagesDirectory: File = new File(ivySettings.getDefaultIvyUserDir, "jars") // scalastyle:off println printStream.println( s"Ivy Default Cache set to: ${ivySettings.getDefaultCache.getAbsolutePath}") printStream.println(s"The jars for the packages stored in: $packagesDirectory") // scalastyle:on println - // create a pattern matcher - ivySettings.addMatcher(new GlobPatternMatcher) - // create the dependency resolvers - val repoResolver = createRepoResolvers(remoteRepos, ivySettings) - ivySettings.addResolver(repoResolver) - ivySettings.setDefaultResolver(repoResolver.getName) val ivy = Ivy.newInstance(ivySettings) // Set resolve options to download transitive dependencies as well @@ -1035,6 +1072,9 @@ private[spark] object SparkSubmitUtils { resolveOptions.setDownload(true) } + // Default configuration name for ivy + val ivyConfName = "default" + // A Module descriptor must be specified. Entries are dummy strings val md = getModuleDescriptor // clear ivy resolution from previous launches. The resolution file is usually at diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 206c130c7637..4b9fb4dcd5f1 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -60,6 +60,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S var packages: String = null var repositories: String = null var ivyRepoPath: String = null + var ivySettingsFile: String = null var packagesExclusions: String = null var verbose: Boolean = false var isPython: Boolean = false @@ -175,6 +176,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S jars = Option(jars).orElse(sparkProperties.get("spark.jars")).orNull files = Option(files).orElse(sparkProperties.get("spark.files")).orNull ivyRepoPath = sparkProperties.get("spark.jars.ivy").orNull + ivySettingsFile = sparkProperties.get("spark.ivy.settings").orNull packages = Option(packages).orElse(sparkProperties.get("spark.jars.packages")).orNull packagesExclusions = Option(packagesExclusions) .orElse(sparkProperties.get("spark.jars.excludes")).orNull diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala index 4877710c1237..b6d822c8ce18 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala @@ -126,8 +126,10 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { val main = MavenCoordinate("my.awesome.lib", "mylib", "0.1") IvyTestUtils.withRepository(main, None, None) { repo => // end to end - val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, Option(repo), - Option(tempIvyPath), isTest = true) + val jarPath = SparkSubmitUtils.resolveMavenCoordinates( + main.toString, + SparkSubmitUtils.buildIvySettings(Option(repo), Option(tempIvyPath)), + isTest = true) assert(jarPath.indexOf(tempIvyPath) >= 0, "should use non-default ivy path") } } @@ -137,7 +139,9 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { val dep = "my.great.dep:mydep:0.5" // Local M2 repository IvyTestUtils.withRepository(main, Some(dep), Some(SparkSubmitUtils.m2Path)) { repo => - val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, None, None, + val jarPath = SparkSubmitUtils.resolveMavenCoordinates( + main.toString, + SparkSubmitUtils.buildIvySettings(None, None), isTest = true) assert(jarPath.indexOf("mylib") >= 0, "should find artifact") assert(jarPath.indexOf("mydep") >= 0, "should find dependency") @@ -146,7 +150,9 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { val settings = new IvySettings val ivyLocal = new File(settings.getDefaultIvyUserDir, "local" + File.separator) IvyTestUtils.withRepository(main, Some(dep), Some(ivyLocal), useIvyLayout = true) { repo => - val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, None, None, + val jarPath = SparkSubmitUtils.resolveMavenCoordinates( + main.toString, + SparkSubmitUtils.buildIvySettings(None, None), isTest = true) assert(jarPath.indexOf("mylib") >= 0, "should find artifact") assert(jarPath.indexOf("mydep") >= 0, "should find dependency") @@ -156,8 +162,10 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { settings.setDefaultIvyUserDir(new File(tempIvyPath)) IvyTestUtils.withRepository(main, Some(dep), Some(dummyIvyLocal), useIvyLayout = true, ivySettings = settings) { repo => - val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, None, - Some(tempIvyPath), isTest = true) + val jarPath = SparkSubmitUtils.resolveMavenCoordinates( + main.toString, + SparkSubmitUtils.buildIvySettings(None, Some(tempIvyPath)), + isTest = true) assert(jarPath.indexOf("mylib") >= 0, "should find artifact") assert(jarPath.indexOf(tempIvyPath) >= 0, "should be in new ivy path") assert(jarPath.indexOf("mydep") >= 0, "should find dependency") @@ -166,7 +174,10 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { test("dependency not found throws RuntimeException") { intercept[RuntimeException] { - SparkSubmitUtils.resolveMavenCoordinates("a:b:c", None, None, isTest = true) + SparkSubmitUtils.resolveMavenCoordinates( + "a:b:c", + SparkSubmitUtils.buildIvySettings(None, None), + isTest = true) } } @@ -178,12 +189,17 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { components.map(comp => s"org.apache.spark:spark-${comp}2.10:1.2.0").mkString(",") + ",org.apache.spark:spark-core_fake:1.2.0" - val path = SparkSubmitUtils.resolveMavenCoordinates(coordinates, None, None, isTest = true) + val path = SparkSubmitUtils.resolveMavenCoordinates( + coordinates, + SparkSubmitUtils.buildIvySettings(None, None), + isTest = true) assert(path === "", "should return empty path") val main = MavenCoordinate("org.apache.spark", "spark-streaming-kafka-assembly_2.10", "1.2.0") IvyTestUtils.withRepository(main, None, None) { repo => - val files = SparkSubmitUtils.resolveMavenCoordinates(coordinates + "," + main.toString, - Some(repo), None, isTest = true) + val files = SparkSubmitUtils.resolveMavenCoordinates( + coordinates + "," + main.toString, + SparkSubmitUtils.buildIvySettings(Some(repo), None), + isTest = true) assert(files.indexOf(main.artifactId) >= 0, "Did not return artifact") } } @@ -192,8 +208,11 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { val main = new MavenCoordinate("my.great.lib", "mylib", "0.1") val dep = "my.great.dep:mydep:0.5" IvyTestUtils.withRepository(main, Some(dep), None) { repo => - val files = SparkSubmitUtils.resolveMavenCoordinates(main.toString, - Some(repo), None, Seq("my.great.dep:mydep"), isTest = true) + val files = SparkSubmitUtils.resolveMavenCoordinates( + main.toString, + SparkSubmitUtils.buildIvySettings(Some(repo), None), + Seq("my.great.dep:mydep"), + isTest = true) assert(files.indexOf(main.artifactId) >= 0, "Did not return artifact") assert(files.indexOf("my.great.dep") < 0, "Returned excluded artifact") } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index e1950d181d10..06180c16560f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -112,8 +112,9 @@ private[hive] object IsolatedClientLoader extends Logging { val classpath = quietly { SparkSubmitUtils.resolveMavenCoordinates( hiveArtifacts.mkString(","), - Some("http://www.datanucleus.org/downloads/maven2"), - ivyPath, + SparkSubmitUtils.buildIvySettings( + Some("http://www.datanucleus.org/downloads/maven2"), + ivyPath), exclusions = version.exclusions) } val allFiles = classpath.split(",").map(new File(_)).toSet From 97ea3a4a072bbfcb1ece774df4fb7aa7d8c8c551 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Thu, 12 May 2016 10:54:39 -0700 Subject: [PATCH 02/12] added parenthesis to be consistent --- core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index f913ae33a4df..f8b5e586cb91 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -1013,7 +1013,7 @@ private[spark] object SparkSubmitUtils { def loadIvySettings(settingsFile: String): IvySettings = { val file = new File(settingsFile) require(file.exists(), s"Ivy settings file $file does not exist") - require(file.isFile, s"Ivy settings file $file is not a normal file") + require(file.isFile(), s"Ivy settings file $file is not a normal file") val ivySettings: IvySettings = new IvySettings try { ivySettings.load(file) From 716454b9a923a79f9218ae9c5040ef4796df872a Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Tue, 3 Jan 2017 10:19:30 -0800 Subject: [PATCH 03/12] removed OptionAssigner, not needed --- core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 28b796b8c23d..e1cad809664d 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -439,8 +439,6 @@ object SparkSubmit extends CommandLineUtils { sysProp = "spark.submit.deployMode"), OptionAssigner(args.name, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.app.name"), OptionAssigner(args.ivyRepoPath, ALL_CLUSTER_MGRS, CLIENT, sysProp = "spark.jars.ivy"), - OptionAssigner(args.ivySettingsFile, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, - sysProp = "spark.ivy.settings"), OptionAssigner(args.driverMemory, ALL_CLUSTER_MGRS, CLIENT, sysProp = "spark.driver.memory"), OptionAssigner(args.driverExtraClassPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, From a25170a1d412454224f0d59e8f26348b74f60dab Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Tue, 3 Jan 2017 11:10:48 -0800 Subject: [PATCH 04/12] cleaned up alternateIvyCache setting --- .../org/apache/spark/deploy/SparkSubmit.scala | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index e1cad809664d..bcb892ca58c8 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -990,16 +990,11 @@ private[spark] object SparkSubmitUtils { ivyPath: Option[String]): IvySettings = { val ivySettings: IvySettings = new IvySettings - // set ivy settings for location of cache - val alternateIvyCache = ivyPath.getOrElse("") - val packagesDirectory: File = - if (alternateIvyCache == null || alternateIvyCache.trim.isEmpty) { - new File(ivySettings.getDefaultIvyUserDir, "jars") - } else { - ivySettings.setDefaultIvyUserDir(new File(alternateIvyCache)) - ivySettings.setDefaultCache(new File(alternateIvyCache, "cache")) - new File(alternateIvyCache, "jars") - } + // set ivy settings for location of cache, if option is supplied + ivyPath.foreach { alternateIvyCache => + ivySettings.setDefaultIvyUserDir(new File(alternateIvyCache)) + ivySettings.setDefaultCache(new File(alternateIvyCache, "cache")) + } // create a pattern matcher ivySettings.addMatcher(new GlobPatternMatcher) From 28f504a574538bfb4c3fc13943ab5b03b155e407 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Fri, 6 Jan 2017 11:31:23 -0800 Subject: [PATCH 05/12] added option for additional repos and ivy path when loading settings file too --- .../org/apache/spark/deploy/SparkSubmit.scala | 92 ++++++++++++------- .../spark/deploy/SparkSubmitUtilsSuite.scala | 19 ++-- 2 files changed, 69 insertions(+), 42 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index bcb892ca58c8..5ba4c5cdd9a0 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -285,8 +285,13 @@ object SparkSubmit extends CommandLineUtils { Nil } - val ivySettings = Option(args.ivySettingsFile).map(SparkSubmitUtils.loadIvySettings).getOrElse( - SparkSubmitUtils.buildIvySettings(Option(args.repositories), Option(args.ivyRepoPath))) + // Create the IvySettings, either load from file or build defaults + val ivySettings = if (Option(args.ivySettingsFile).isDefined) { + SparkSubmitUtils.loadIvySettings(args.ivySettingsFile, Option(args.repositories), + Option(args.ivyRepoPath)) + } else { + SparkSubmitUtils.buildIvySettings(Option(args.repositories), Option(args.ivyRepoPath)) + } val resolvedMavenCoordinates = SparkSubmitUtils.resolveMavenCoordinates(args.packages, ivySettings, exclusions = exclusions) @@ -865,30 +870,13 @@ private[spark] object SparkSubmitUtils { /** * Extracts maven coordinates from a comma-delimited string - * @param remoteRepos Comma-delimited string of remote repositories - * @param ivySettings The Ivy settings for this session + * @param defaultIvyUserDir The default user path for Ivy * @return A ChainResolver used by Ivy to search for and resolve dependencies. */ - def createRepoResolvers(remoteRepos: Option[String], ivySettings: IvySettings): ChainResolver = { + def createRepoResolvers(defaultIvyUserDir: File): ChainResolver = { // We need a chain resolver if we want to check multiple repositories val cr = new ChainResolver - cr.setName("list") - - val repositoryList = remoteRepos.getOrElse("") - // add any other remote repositories other than maven central - if (repositoryList.trim.nonEmpty) { - repositoryList.split(",").zipWithIndex.foreach { case (repo, i) => - val brr: IBiblioResolver = new IBiblioResolver - brr.setM2compatible(true) - brr.setUsepoms(true) - brr.setRoot(repo) - brr.setName(s"repo-${i + 1}") - cr.add(brr) - // scalastyle:off println - printStream.println(s"$repo added as a remote repository with the name: ${brr.getName}") - // scalastyle:on println - } - } + cr.setName("spark-list") val localM2 = new IBiblioResolver localM2.setM2compatible(true) @@ -898,7 +886,7 @@ private[spark] object SparkSubmitUtils { cr.add(localM2) val localIvy = new FileSystemResolver - val localIvyRoot = new File(ivySettings.getDefaultIvyUserDir, "local") + val localIvyRoot = new File(defaultIvyUserDir, "local") localIvy.setLocal(true) localIvy.setRepository(new FileRepository(localIvyRoot)) val ivyPattern = Seq(localIvyRoot.getAbsolutePath, "[organisation]", "[module]", "[revision]", @@ -985,32 +973,31 @@ private[spark] object SparkSubmitUtils { * @param ivyPath The path to the local ivy repository * @return An IvySettings object */ - def buildIvySettings( - remoteRepos: Option[String], - ivyPath: Option[String]): IvySettings = { + def buildIvySettings(remoteRepos: Option[String], ivyPath: Option[String]): IvySettings = { val ivySettings: IvySettings = new IvySettings - - // set ivy settings for location of cache, if option is supplied - ivyPath.foreach { alternateIvyCache => - ivySettings.setDefaultIvyUserDir(new File(alternateIvyCache)) - ivySettings.setDefaultCache(new File(alternateIvyCache, "cache")) - } + processIvyPathArg(ivySettings, ivyPath) // create a pattern matcher ivySettings.addMatcher(new GlobPatternMatcher) // create the dependency resolvers - val repoResolver = createRepoResolvers(remoteRepos, ivySettings) + val repoResolver = createRepoResolvers(ivySettings.getDefaultIvyUserDir) ivySettings.addResolver(repoResolver) ivySettings.setDefaultResolver(repoResolver.getName) + processRemoteRepoArg(ivySettings, remoteRepos) ivySettings } /** * Load Ivy settings from a given filename, using supplied resolvers * @param settingsFile Path to Ivy settings file + * @param remoteRepos Comma-delimited string of remote repositories other than maven central + * @param ivyPath The path to the local ivy repository * @return An IvySettings object */ - def loadIvySettings(settingsFile: String): IvySettings = { + def loadIvySettings( + settingsFile: String, + remoteRepos: Option[String], + ivyPath: Option[String]): IvySettings = { val file = new File(settingsFile) require(file.exists(), s"Ivy settings file $file does not exist") require(file.isFile(), s"Ivy settings file $file is not a normal file") @@ -1021,9 +1008,46 @@ private[spark] object SparkSubmitUtils { case e @ (_: IOException | _: ParseException) => throw new SparkException(s"Failed when loading Ivy settings from $settingsFile", e) } + processIvyPathArg(ivySettings, ivyPath) + processRemoteRepoArg(ivySettings, remoteRepos) ivySettings } + /* Set ivy settings for location of cache, if option is supplied */ + private def processIvyPathArg(ivySettings: IvySettings, ivyPath: Option[String]): Unit = { + ivyPath.filterNot(_.trim.isEmpty).foreach { alternateIvyDir => + ivySettings.setDefaultIvyUserDir(new File(alternateIvyDir)) + ivySettings.setDefaultCache(new File(alternateIvyDir, "cache")) + } + } + + /* Add any optional additional remote repositories */ + private def processRemoteRepoArg(ivySettings: IvySettings, remoteRepos: Option[String]): Unit = { + remoteRepos.filterNot(_.trim.isEmpty).map(_.split(",")).foreach { repositoryList => + val cr = new ChainResolver + cr.setName("user-list") + + // add current default resolver, if any + Option(ivySettings.getDefaultResolver).foreach(cr.add) + + // add additional repositories, last resolution in chain takes precedence + repositoryList.zipWithIndex.foreach { case (repo, i) => + val brr: IBiblioResolver = new IBiblioResolver + brr.setM2compatible(true) + brr.setUsepoms(true) + brr.setRoot(repo) + brr.setName(s"repo-${i + 1}") + cr.add(brr) + // scalastyle:off println + printStream.println(s"$repo added as a remote repository with the name: ${brr.getName}") + // scalastyle:on println + } + + ivySettings.addResolver(cr) + ivySettings.setDefaultResolver(cr.getName) + } + } + /** A nice function to use in tests as well. Values are dummy strings. */ def getModuleDescriptor: DefaultModuleDescriptor = DefaultModuleDescriptor.newDefaultInstance( ModuleRevisionId.newInstance("org.apache.spark", "spark-submit-parent", "1.0")) diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala index b6d822c8ce18..fd0cb3b24c5c 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala @@ -23,7 +23,7 @@ import scala.collection.mutable.ArrayBuffer import org.apache.ivy.core.module.descriptor.MDArtifact import org.apache.ivy.core.settings.IvySettings -import org.apache.ivy.plugins.resolver.{AbstractResolver, FileSystemResolver, IBiblioResolver} +import org.apache.ivy.plugins.resolver.{AbstractResolver, ChainResolver, FileSystemResolver, IBiblioResolver} import org.scalatest.BeforeAndAfterAll import org.apache.spark.SparkFunSuite @@ -66,22 +66,25 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { test("create repo resolvers") { val settings = new IvySettings - val res1 = SparkSubmitUtils.createRepoResolvers(None, settings) + val res1 = SparkSubmitUtils.createRepoResolvers(settings.getDefaultIvyUserDir) // should have central and spark-packages by default assert(res1.getResolvers.size() === 4) assert(res1.getResolvers.get(0).asInstanceOf[IBiblioResolver].getName === "local-m2-cache") assert(res1.getResolvers.get(1).asInstanceOf[FileSystemResolver].getName === "local-ivy-cache") assert(res1.getResolvers.get(2).asInstanceOf[IBiblioResolver].getName === "central") assert(res1.getResolvers.get(3).asInstanceOf[IBiblioResolver].getName === "spark-packages") + } + test("create additional resolvers") { val repos = "a/1,b/2,c/3" - val resolver2 = SparkSubmitUtils.createRepoResolvers(Option(repos), settings) - assert(resolver2.getResolvers.size() === 7) + val settings = SparkSubmitUtils.buildIvySettings(Option(repos), None) + val resolver = settings.getDefaultResolver.asInstanceOf[ChainResolver] + assert(resolver.getResolvers.size() === 4) val expected = repos.split(",").map(r => s"$r/") - resolver2.getResolvers.toArray.zipWithIndex.foreach { case (resolver: AbstractResolver, i) => - if (i < 3) { - assert(resolver.getName === s"repo-${i + 1}") - assert(resolver.asInstanceOf[IBiblioResolver].getRoot === expected(i)) + resolver.getResolvers.toArray.zipWithIndex.foreach { case (resolver: AbstractResolver, i) => + if (1 < i && i < 3) { + assert(resolver.getName === s"repo-$i") + assert(resolver.asInstanceOf[IBiblioResolver].getRoot === expected(i - 1)) } } } From 498a34f2e497d6033cf00df6209200ce0156340e Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Fri, 6 Jan 2017 15:28:45 -0800 Subject: [PATCH 06/12] added test for loading ivy settings from file --- .../spark/deploy/SparkSubmitUtilsSuite.scala | 40 ++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala index fd0cb3b24c5c..f29a32645ed6 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.deploy -import java.io.{File, OutputStream, PrintStream} +import java.io.{File, FileWriter, OutputStream, PrintStream} import scala.collection.mutable.ArrayBuffer @@ -220,4 +220,42 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { assert(files.indexOf("my.great.dep") < 0, "Returned excluded artifact") } } + + test("load ivy settings file") { + val main = new MavenCoordinate("my.great.lib", "mylib", "0.1") + val dep = "my.great.dep:mydep:0.5" + val dummyIvyLocal = new File(tempIvyPath, "local" + File.separator) + val settingsText = + s""" + | + | + | + | + | + | + | + | + | + | + |""".stripMargin + + val settingsFile = new File(tempIvyPath, "ivysettings.xml") + val settingsWriter = new FileWriter(settingsFile) + settingsWriter.write(settingsText) + settingsWriter.close() + val settings = SparkSubmitUtils.loadIvySettings(settingsFile.toString, None, None) + settings.setDefaultIvyUserDir(new File(tempIvyPath)) // NOTE - can't set this through file + + val testUtilSettings = new IvySettings + testUtilSettings.setDefaultIvyUserDir(new File(tempIvyPath)) + IvyTestUtils.withRepository(main, Some(dep), Some(dummyIvyLocal), useIvyLayout = true, + ivySettings = testUtilSettings) { repo => + val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, settings, isTest = true) + assert(jarPath.indexOf("mylib") >= 0, "should find artifact") + assert(jarPath.indexOf(tempIvyPath) >= 0, "should be in new ivy path") + assert(jarPath.indexOf("mydep") >= 0, "should find dependency") + } + } } From a9da2b4f1b0d73067dd0e60d1a8ae6328ab92b67 Mon Sep 17 00:00:00 2001 From: Ian Hummel Date: Thu, 22 Dec 2016 11:44:51 -0500 Subject: [PATCH 07/12] Adjusts documentation for Ivy-related settings. modified documentation for option changes closes #12 --- docs/configuration.md | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index bd67144007e9..0336d2158d75 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -433,9 +433,10 @@ Apart from these, the following properties are also available, and may be useful Comma-separated list of maven coordinates of jars to include on the driver and executor - classpaths. Will search the local maven repo, then maven central and any additional remote - repositories given by spark.jars.ivy. The format for the coordinates should be - groupId:artifactId:version. + classpaths. If spark.ivy.settings is given artifacts will be resolved according + to the configuration in the file, otherwise artifacts will be searched for in the local maven repo, + then maven central and finally any additional remote repositories given by spark.jars.ivy. + The format for the coordinates should be groupId:artifactId:version. @@ -454,6 +455,17 @@ Apart from these, the following properties are also available, and may be useful with spark.jars.packages. + + spark.ivy.settings + + + Path to an Ivy settings file to customize resolution of jars specified using spark.jars.packages + instead of the built-in defaults, such as maven central. Additional repositories given on the command-line + will also be included. Useful for allowing Spark to resolve artifacts from behind a firewall e.g. via an + in-house artifact server like Artifactory. Details on the settings file format can be found at + http://ant.apache.org/ivy/history/latest-milestone/settings.html + + spark.pyspark.driver.python From 7a7b6ba213e57a705642d42220037a7b9a18e3a6 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Fri, 6 Jan 2017 16:46:47 -0800 Subject: [PATCH 08/12] fixed up package related docs --- docs/configuration.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 0336d2158d75..183021c57c12 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -435,7 +435,8 @@ Apart from these, the following properties are also available, and may be useful Comma-separated list of maven coordinates of jars to include on the driver and executor classpaths. If spark.ivy.settings is given artifacts will be resolved according to the configuration in the file, otherwise artifacts will be searched for in the local maven repo, - then maven central and finally any additional remote repositories given by spark.jars.ivy. + then maven central and finally any additional remote repositories given by the command-line option + --repositories see Advanced Dependency Management. The format for the coordinates should be groupId:artifactId:version. @@ -451,8 +452,9 @@ Apart from these, the following properties are also available, and may be useful spark.jars.ivy - Comma-separated list of additional remote repositories to search for the coordinates given - with spark.jars.packages. + Path to specify the Ivy user directory, used for the local Ivy cache and package files from + spark.jars.packages. This will override the Ivy property ivy.default.ivy.user.dir + which defaults to ~/.ivy2. @@ -460,10 +462,10 @@ Apart from these, the following properties are also available, and may be useful Path to an Ivy settings file to customize resolution of jars specified using spark.jars.packages - instead of the built-in defaults, such as maven central. Additional repositories given on the command-line - will also be included. Useful for allowing Spark to resolve artifacts from behind a firewall e.g. via an - in-house artifact server like Artifactory. Details on the settings file format can be found at - http://ant.apache.org/ivy/history/latest-milestone/settings.html + instead of the built-in defaults, such as maven central. Additional repositories given by the command-line + option --repositories will also be included. Useful for allowing Spark to resolve artifacts from behind + a firewall e.g. via an in-house artifact server like Artifactory. Details on the settings file format can be + found at http://ant.apache.org/ivy/history/latest-milestone/settings.html From c855a17100f4ad02b4597393d1326271a4061c6b Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Mon, 9 Jan 2017 14:50:49 -0800 Subject: [PATCH 09/12] changed conf to spark.jars.ivySettings --- .../scala/org/apache/spark/deploy/SparkSubmitArguments.scala | 2 +- docs/configuration.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 4c81b7ade45a..db3150c785ca 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -176,7 +176,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S jars = Option(jars).orElse(sparkProperties.get("spark.jars")).orNull files = Option(files).orElse(sparkProperties.get("spark.files")).orNull ivyRepoPath = sparkProperties.get("spark.jars.ivy").orNull - ivySettingsFile = sparkProperties.get("spark.ivy.settings").orNull + ivySettingsFile = sparkProperties.get("spark.jars.ivySettings").orNull packages = Option(packages).orElse(sparkProperties.get("spark.jars.packages")).orNull packagesExclusions = Option(packagesExclusions) .orElse(sparkProperties.get("spark.jars.excludes")).orNull diff --git a/docs/configuration.md b/docs/configuration.md index 183021c57c12..caf32999ef4e 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -433,7 +433,7 @@ Apart from these, the following properties are also available, and may be useful Comma-separated list of maven coordinates of jars to include on the driver and executor - classpaths. If spark.ivy.settings is given artifacts will be resolved according + classpaths. If spark.jars.ivySettings is given artifacts will be resolved according to the configuration in the file, otherwise artifacts will be searched for in the local maven repo, then maven central and finally any additional remote repositories given by the command-line option --repositories see Advanced Dependency Management. @@ -458,7 +458,7 @@ Apart from these, the following properties are also available, and may be useful - spark.ivy.settings + spark.jars.ivySettings Path to an Ivy settings file to customize resolution of jars specified using spark.jars.packages From 9b6d67f9529d9b83aa5f7480461d3bbb691cb881 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Mon, 9 Jan 2017 15:41:12 -0800 Subject: [PATCH 10/12] removed ivySettings from SparkSubmitArguments --- .../main/scala/org/apache/spark/deploy/SparkSubmit.scala | 6 +++--- .../org/apache/spark/deploy/SparkSubmitArguments.scala | 2 -- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 5ba4c5cdd9a0..a980144a7595 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -286,10 +286,10 @@ object SparkSubmit extends CommandLineUtils { } // Create the IvySettings, either load from file or build defaults - val ivySettings = if (Option(args.ivySettingsFile).isDefined) { - SparkSubmitUtils.loadIvySettings(args.ivySettingsFile, Option(args.repositories), + val ivySettings = args.sparkProperties.get("spark.jars.ivySettings").map { ivySettingsFile => + SparkSubmitUtils.loadIvySettings(ivySettingsFile, Option(args.repositories), Option(args.ivyRepoPath)) - } else { + }.getOrElse { SparkSubmitUtils.buildIvySettings(Option(args.repositories), Option(args.ivyRepoPath)) } diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index db3150c785ca..b1d36e1821cc 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -60,7 +60,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S var packages: String = null var repositories: String = null var ivyRepoPath: String = null - var ivySettingsFile: String = null var packagesExclusions: String = null var verbose: Boolean = false var isPython: Boolean = false @@ -176,7 +175,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S jars = Option(jars).orElse(sparkProperties.get("spark.jars")).orNull files = Option(files).orElse(sparkProperties.get("spark.files")).orNull ivyRepoPath = sparkProperties.get("spark.jars.ivy").orNull - ivySettingsFile = sparkProperties.get("spark.jars.ivySettings").orNull packages = Option(packages).orElse(sparkProperties.get("spark.jars.packages")).orNull packagesExclusions = Option(packagesExclusions) .orElse(sparkProperties.get("spark.jars.excludes")).orNull From ff34f994b1a11dcfb4e61c33f275cece44931fc7 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Tue, 10 Jan 2017 17:06:30 -0800 Subject: [PATCH 11/12] fixed doc punctuation and non-related tag error --- docs/configuration.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index caf32999ef4e..8b5980d60b12 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -433,11 +433,11 @@ Apart from these, the following properties are also available, and may be useful Comma-separated list of maven coordinates of jars to include on the driver and executor - classpaths. If spark.jars.ivySettings is given artifacts will be resolved according - to the configuration in the file, otherwise artifacts will be searched for in the local maven repo, - then maven central and finally any additional remote repositories given by the command-line option - --repositories see Advanced Dependency Management. - The format for the coordinates should be groupId:artifactId:version. + classpaths. The coordinates should be groupId:artifactId:version. If spark.jars.ivySettings + is given artifacts will be resolved according to the configuration in the file, otherwise artifacts + will be searched for in the local maven repo, then maven central and finally any additional remote + repositories given by the command-line option --repositories. For more details, see + Advanced Dependency Management. @@ -453,7 +453,7 @@ Apart from these, the following properties are also available, and may be useful Path to specify the Ivy user directory, used for the local Ivy cache and package files from - spark.jars.packages. This will override the Ivy property ivy.default.ivy.user.dir + spark.jars.packages. This will override the Ivy property ivy.default.ivy.user.dir which defaults to ~/.ivy2. @@ -463,8 +463,8 @@ Apart from these, the following properties are also available, and may be useful Path to an Ivy settings file to customize resolution of jars specified using spark.jars.packages instead of the built-in defaults, such as maven central. Additional repositories given by the command-line - option --repositories will also be included. Useful for allowing Spark to resolve artifacts from behind - a firewall e.g. via an in-house artifact server like Artifactory. Details on the settings file format can be + option --repositories will also be included. Useful for allowing Spark to resolve artifacts from behind + a firewall e.g. via an in-house artifact server like Artifactory. Details on the settings file format can be found at http://ant.apache.org/ivy/history/latest-milestone/settings.html @@ -1444,7 +1444,7 @@ Apart from these, the following properties are also available, and may be useful Enables monitoring of killed / interrupted tasks. When set to true, any task which is killed will be monitored by the executor until that task actually finishes executing. See the other spark.task.reaper.* configurations for details on how to control the exact behavior - of this monitoring. When set to false (the default), task killing will use an older code + of this monitoring. When set to false (the default), task killing will use an older code path which lacks such monitoring. From 532e744eff04ce377ceae903c3d916091f551216 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Tue, 10 Jan 2017 17:14:38 -0800 Subject: [PATCH 12/12] used Files.write for writing test suite file, cleaner and specify encoding --- .../org/apache/spark/deploy/SparkSubmitUtilsSuite.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala index f29a32645ed6..266c9d33b5a9 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala @@ -17,10 +17,12 @@ package org.apache.spark.deploy -import java.io.{File, FileWriter, OutputStream, PrintStream} +import java.io.{File, OutputStream, PrintStream} +import java.nio.charset.StandardCharsets import scala.collection.mutable.ArrayBuffer +import com.google.common.io.Files import org.apache.ivy.core.module.descriptor.MDArtifact import org.apache.ivy.core.settings.IvySettings import org.apache.ivy.plugins.resolver.{AbstractResolver, ChainResolver, FileSystemResolver, IBiblioResolver} @@ -242,9 +244,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { |""".stripMargin val settingsFile = new File(tempIvyPath, "ivysettings.xml") - val settingsWriter = new FileWriter(settingsFile) - settingsWriter.write(settingsText) - settingsWriter.close() + Files.write(settingsText, settingsFile, StandardCharsets.UTF_8) val settings = SparkSubmitUtils.loadIvySettings(settingsFile.toString, None, None) settings.setDefaultIvyUserDir(new File(tempIvyPath)) // NOTE - can't set this through file