From b225072111cfc91552e26138eac38c3b31e9b482 Mon Sep 17 00:00:00 2001 From: Erik Krogen Date: Wed, 22 Feb 2023 16:40:18 -0800 Subject: [PATCH 1/5] Add a test in HiveUtilsSuite reproducing the issue --- .../scala/org/apache/spark/TestUtils.scala | 5 ++- .../spark/sql/hive/HiveUtilsSuite.scala | 35 +++++++++++++++++-- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala index bdf81d22efa4..13ae6aca38b8 100644 --- a/core/src/main/scala/org/apache/spark/TestUtils.scala +++ b/core/src/main/scala/org/apache/spark/TestUtils.scala @@ -193,12 +193,15 @@ private[spark] object TestUtils { baseClass: String = null, classpathUrls: Seq[URL] = Seq.empty, implementsClasses: Seq[String] = Seq.empty, - extraCodeBody: String = ""): File = { + extraCodeBody: String = "", + packageName: Option[String] = None): File = { val extendsText = Option(baseClass).map { c => s" extends ${c}" }.getOrElse("") val implementsText = "implements " + (implementsClasses :+ "java.io.Serializable").mkString(", ") + val packageText = packageName.map(p => s"package $p;\n").getOrElse("") val sourceFile = new JavaSourceFromString(className, s""" + |$packageText |public class $className $extendsText $implementsText { | @Override public String toString() { return "$toStringValue"; } | diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala index d8e1e0129282..1f1bf1e7afef 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala @@ -19,13 +19,16 @@ package org.apache.spark.sql.hive import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hive.conf.HiveConf.ConfVars - -import org.apache.spark.SparkConf +import org.apache.spark.{SparkConf, TestUtils} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst.catalog.CatalogDatabase import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils -import org.apache.spark.util.ChildFirstURLClassLoader +import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader} + +import java.io.File +import java.net.URI class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { @@ -77,6 +80,32 @@ class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton } } + test("User-provided JARs should not take precedence over builtin Hive JARs") { + withTempDir { tmpDir => + val classFile = TestUtils.createCompiledClass( + "Hive", tmpDir, packageName = Some("org.apache.hadoop.hive.ql.metadata")) + + val jarFile = new File(tmpDir, "hive-fake.jar") + TestUtils.createJar(Seq(classFile), jarFile, Some("org/apache/hadoop/hive/ql/metadata")) + + val conf = new SparkConf + val contextClassLoader = Thread.currentThread().getContextClassLoader + val loader = new MutableURLClassLoader(Array(jarFile.toURI.toURL), contextClassLoader) + try { + Thread.currentThread().setContextClassLoader(loader) + val client = HiveUtils.newClientForMetadata( + conf, + SparkHadoopUtil.newConfiguration(conf), + HiveUtils.newTemporaryConfiguration(useInMemoryDerby = true)) + client.createDatabase( + CatalogDatabase("foo", "", URI.create(s"file://${tmpDir.getAbsolutePath}/foo.db"), Map()), + ignoreIfExists = true) + } finally { + Thread.currentThread().setContextClassLoader(contextClassLoader) + } + } + } + test("SPARK-27349: Dealing with TimeVars removed in Hive 2.x") { // Test default value val defaultConf = new Configuration From dc42da82e1b977ffe571f87c8c7f6d9eddfd5edd Mon Sep 17 00:00:00 2001 From: Erik Krogen Date: Wed, 22 Feb 2023 16:42:34 -0800 Subject: [PATCH 2/5] eliminate the conflict using the same approach as Java 9; just use the general spark user classloader when 'builtin' is used instead of reconstructing a new URL classloader --- .../org/apache/spark/sql/hive/HiveUtils.scala | 35 ++----------------- 1 file changed, 3 insertions(+), 32 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index fe9bdef3d0e1..a19f5d8d4d12 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.hive import java.io.File -import java.net.{URL, URLClassLoader} +import java.net.URL import java.util.Locale import java.util.concurrent.TimeUnit @@ -26,7 +26,6 @@ import scala.collection.JavaConverters._ import scala.collection.mutable.HashMap import scala.util.Try -import org.apache.commons.lang3.{JavaVersion, SystemUtils} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars @@ -46,7 +45,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf._ import org.apache.spark.sql.internal.StaticSQLConf.WAREHOUSE_PATH import org.apache.spark.sql.types._ -import org.apache.spark.util.{ChildFirstURLClassLoader, Utils} +import org.apache.spark.util.Utils private[spark] object HiveUtils extends Logging { @@ -409,41 +408,13 @@ private[spark] object HiveUtils extends Logging { s"or change ${HIVE_METASTORE_VERSION.key} to $builtinHiveVersion.") } - // We recursively find all jars in the class loader chain, - // starting from the given classLoader. - def allJars(classLoader: ClassLoader): Array[URL] = classLoader match { - case null => Array.empty[URL] - case childFirst: ChildFirstURLClassLoader => - childFirst.getURLs() ++ allJars(Utils.getSparkClassLoader) - case urlClassLoader: URLClassLoader => - urlClassLoader.getURLs ++ allJars(urlClassLoader.getParent) - case other => allJars(other.getParent) - } - - val classLoader = Utils.getContextOrSparkClassLoader - val jars: Array[URL] = if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) { - // Do nothing. The system classloader is no longer a URLClassLoader in Java 9, - // so it won't match the case in allJars. It no longer exposes URLs of - // the system classpath - Array.empty[URL] - } else { - val loadedJars = allJars(classLoader) - // Verify at least one jar was found - if (loadedJars.length == 0) { - throw new IllegalArgumentException( - "Unable to locate hive jars to connect to metastore. " + - s"Please set ${HIVE_METASTORE_JARS.key}.") - } - loadedJars - } - logInfo( s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion using Spark classes.") new IsolatedClientLoader( version = metaVersion, sparkConf = conf, hadoopConf = hadoopConf, - execJars = jars.toSeq, + execJars = Seq.empty, config = configurations, isolationOn = !isCliSessionState(), barrierPrefixes = hiveMetastoreBarrierPrefixes, From 686d9397c5580f2df17f61939d3a19ac40008c9c Mon Sep 17 00:00:00 2001 From: Erik Krogen Date: Thu, 23 Feb 2023 10:04:52 -0800 Subject: [PATCH 3/5] fix up some comments --- .../org/apache/spark/sql/hive/client/IsolatedClientLoader.scala | 2 +- .../test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index e65e6d42937c..20fde0a16718 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -233,7 +233,7 @@ private[hive] class IsolatedClientLoader( val isolatedClassLoader = if (isolationOn) { if (allJars.isEmpty) { - // See HiveUtils; this is the Java 9+ + builtin mode scenario + // See HiveUtils; this is the builtin mode scenario baseClassLoader } else { val rootClassLoader: ClassLoader = diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala index 1f1bf1e7afef..1438701de2c3 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala @@ -80,7 +80,7 @@ class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton } } - test("User-provided JARs should not take precedence over builtin Hive JARs") { + test("SPARK-42539 User-provided JARs should not take precedence over builtin Hive JARs") { withTempDir { tmpDir => val classFile = TestUtils.createCompiledClass( "Hive", tmpDir, packageName = Some("org.apache.hadoop.hive.ql.metadata")) From e596598dbdc3b670f170e88d27e1e260f17414d9 Mon Sep 17 00:00:00 2001 From: Erik Krogen Date: Thu, 23 Feb 2023 10:13:24 -0800 Subject: [PATCH 4/5] simplify / clean up unnecessary code --- .../org/apache/spark/sql/hive/HiveUtils.scala | 20 +---- .../hive/client/IsolatedClientLoader.scala | 83 +++++++++---------- 2 files changed, 40 insertions(+), 63 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index a19f5d8d4d12..1a0cac42fa79 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -29,7 +29,6 @@ import scala.util.Try import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars -import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.util.VersionInfo import org.apache.hive.common.util.HiveVersionInfo @@ -320,22 +319,6 @@ private[spark] object HiveUtils extends Logging { (commonTimeVars ++ hardcodingTimeVars).toMap } - /** - * Check current Thread's SessionState type - * @return true when SessionState.get returns an instance of CliSessionState, - * false when it gets non-CliSessionState instance or null - */ - def isCliSessionState(): Boolean = { - val state = SessionState.get - var temp: Class[_] = if (state != null) state.getClass else null - var found = false - while (temp != null && !found) { - found = temp.getName == "org.apache.hadoop.hive.cli.CliSessionState" - temp = temp.getSuperclass - } - found - } - /** * Create a [[HiveClient]] used for execution. * @@ -414,9 +397,8 @@ private[spark] object HiveUtils extends Logging { version = metaVersion, sparkConf = conf, hadoopConf = hadoopConf, - execJars = Seq.empty, config = configurations, - isolationOn = !isCliSessionState(), + isolationOn = false, barrierPrefixes = hiveMetastoreBarrierPrefixes, sharedPrefixes = hiveMetastoreSharedPrefixes) } else if (hiveMetastoreJars == "maven") { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 20fde0a16718..879b2451cae2 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -232,51 +232,46 @@ private[hive] class IsolatedClientLoader( private[hive] val classLoader: MutableURLClassLoader = { val isolatedClassLoader = if (isolationOn) { - if (allJars.isEmpty) { - // See HiveUtils; this is the builtin mode scenario - baseClassLoader - } else { - val rootClassLoader: ClassLoader = - if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) { - // In Java 9, the boot classloader can see few JDK classes. The intended parent - // classloader for delegation is now the platform classloader. - // See http://java9.wtf/class-loading/ - val platformCL = - classOf[ClassLoader].getMethod("getPlatformClassLoader"). - invoke(null).asInstanceOf[ClassLoader] - // Check to make sure that the root classloader does not know about Hive. - assert(Try(platformCL.loadClass("org.apache.hadoop.hive.conf.HiveConf")).isFailure) - platformCL + val rootClassLoader: ClassLoader = + if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) { + // In Java 9, the boot classloader can see few JDK classes. The intended parent + // classloader for delegation is now the platform classloader. + // See http://java9.wtf/class-loading/ + val platformCL = + classOf[ClassLoader].getMethod("getPlatformClassLoader"). + invoke(null).asInstanceOf[ClassLoader] + // Check to make sure that the root classloader does not know about Hive. + assert(Try(platformCL.loadClass("org.apache.hadoop.hive.conf.HiveConf")).isFailure) + platformCL + } else { + // The boot classloader is represented by null (the instance itself isn't accessible) + // and before Java 9 can see all JDK classes + null + } + new URLClassLoader(allJars, rootClassLoader) { + override def loadClass(name: String, resolve: Boolean): Class[_] = { + val loaded = findLoadedClass(name) + if (loaded == null) doLoadClass(name, resolve) else loaded + } + def doLoadClass(name: String, resolve: Boolean): Class[_] = { + val classFileName = name.replaceAll("\\.", "/") + ".class" + if (isBarrierClass(name)) { + // For barrier classes, we construct a new copy of the class. + val bytes = IOUtils.toByteArray(baseClassLoader.getResourceAsStream(classFileName)) + logDebug(s"custom defining: $name - ${util.Arrays.hashCode(bytes)}") + defineClass(name, bytes, 0, bytes.length) + } else if (!isSharedClass(name)) { + logDebug(s"hive class: $name - ${getResource(classToPath(name))}") + super.loadClass(name, resolve) } else { - // The boot classloader is represented by null (the instance itself isn't accessible) - // and before Java 9 can see all JDK classes - null - } - new URLClassLoader(allJars, rootClassLoader) { - override def loadClass(name: String, resolve: Boolean): Class[_] = { - val loaded = findLoadedClass(name) - if (loaded == null) doLoadClass(name, resolve) else loaded - } - def doLoadClass(name: String, resolve: Boolean): Class[_] = { - val classFileName = name.replaceAll("\\.", "/") + ".class" - if (isBarrierClass(name)) { - // For barrier classes, we construct a new copy of the class. - val bytes = IOUtils.toByteArray(baseClassLoader.getResourceAsStream(classFileName)) - logDebug(s"custom defining: $name - ${util.Arrays.hashCode(bytes)}") - defineClass(name, bytes, 0, bytes.length) - } else if (!isSharedClass(name)) { - logDebug(s"hive class: $name - ${getResource(classToPath(name))}") - super.loadClass(name, resolve) - } else { - // For shared classes, we delegate to baseClassLoader, but fall back in case the - // class is not found. - logDebug(s"shared class: $name") - try { - baseClassLoader.loadClass(name) - } catch { - case _: ClassNotFoundException => - super.loadClass(name, resolve) - } + // For shared classes, we delegate to baseClassLoader, but fall back in case the + // class is not found. + logDebug(s"shared class: $name") + try { + baseClassLoader.loadClass(name) + } catch { + case _: ClassNotFoundException => + super.loadClass(name, resolve) } } } From a5f5d9eff35bf75fa4f458d3c2de9af02903d581 Mon Sep 17 00:00:00 2001 From: Erik Krogen Date: Mon, 27 Feb 2023 11:50:38 -0800 Subject: [PATCH 5/5] minor style fixups --- .../scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala index 1438701de2c3..823ac8ed957e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala @@ -17,8 +17,12 @@ package org.apache.spark.sql.hive +import java.io.File +import java.net.URI + import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hive.conf.HiveConf.ConfVars + import org.apache.spark.{SparkConf, TestUtils} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.sql.QueryTest @@ -27,9 +31,6 @@ import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader} -import java.io.File -import java.net.URI - class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { private def testFormatTimeVarsForHiveClient(key: String, value: String, expected: Long): Unit = { @@ -80,7 +81,7 @@ class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton } } - test("SPARK-42539 User-provided JARs should not take precedence over builtin Hive JARs") { + test("SPARK-42539: User-provided JARs should not take precedence over builtin Hive JARs") { withTempDir { tmpDir => val classFile = TestUtils.createCompiledClass( "Hive", tmpDir, packageName = Some("org.apache.hadoop.hive.ql.metadata"))