diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 0ec661fc16c88..d4f09abcd8048 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1979,6 +1979,15 @@ object SQLConf { .doc("When true, the ArrayExists will follow the three-valued boolean logic.") .booleanConf .createWithDefault(true) + + val ADDITIONAL_REMOTE_REPOSITORIES = + buildConf("spark.sql.additionalRemoteRepositories") + .doc("A comma-delimited string config of the optional additional remote Maven mirror " + + "repositories. This is only used for downloading Hive jars in IsolatedClientLoader " + + "if the default Maven Central repo is unreachable.") + .stringConf + .createWithDefault( + "https://maven-central.storage-download.googleapis.com/repos/central/data/") } /** diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 1720af3b2b367..be50cb0ded5b5 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -36,6 +36,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.util.quietly import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.internal.NonClosableMutableURLClassLoader +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.util.{MutableURLClassLoader, Utils} /** Factory for `IsolatedClientLoader` with specific versions of hive. */ @@ -60,9 +61,10 @@ private[hive] object IsolatedClientLoader extends Logging { val files = if (resolvedVersions.contains((resolvedVersion, hadoopVersion))) { resolvedVersions((resolvedVersion, hadoopVersion)) } else { + val remoteRepos = sparkConf.get(SQLConf.ADDITIONAL_REMOTE_REPOSITORIES) val (downloadedFiles, actualHadoopVersion) = try { - (downloadVersion(resolvedVersion, hadoopVersion, ivyPath), hadoopVersion) + (downloadVersion(resolvedVersion, hadoopVersion, ivyPath, remoteRepos), hadoopVersion) } catch { case e: RuntimeException if e.getMessage.contains("hadoop") => // If the error message contains hadoop, it is probably because the hadoop @@ -74,7 +76,8 @@ private[hive] object IsolatedClientLoader extends Logging { "It is recommended to set jars used by Hive metastore client through " + "spark.sql.hive.metastore.jars in the production environment.") _sharesHadoopClasses = false - (downloadVersion(resolvedVersion, fallbackVersion, ivyPath), fallbackVersion) + (downloadVersion( + resolvedVersion, fallbackVersion, ivyPath, remoteRepos), fallbackVersion) } resolvedVersions.put((resolvedVersion, actualHadoopVersion), downloadedFiles) resolvedVersions((resolvedVersion, actualHadoopVersion)) @@ -112,7 +115,8 @@ private[hive] object IsolatedClientLoader extends Logging { private def downloadVersion( version: HiveVersion, hadoopVersion: String, - ivyPath: Option[String]): Seq[URL] = { + ivyPath: Option[String], + remoteRepos: String): Seq[URL] = { val hiveArtifacts = version.extraDeps ++ Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde") .map(a => s"org.apache.hive:$a:${version.fullVersion}") ++ @@ -123,7 +127,7 @@ private[hive] object IsolatedClientLoader extends Logging { SparkSubmitUtils.resolveMavenCoordinates( hiveArtifacts.mkString(","), SparkSubmitUtils.buildIvySettings( - Some("https://maven-central.storage-download.googleapis.com/repos/central/data/"), + Some(remoteRepos), ivyPath), exclusions = version.exclusions) }