From 698159a0a1fcf8c0ba042daf875d037df3f8ed6f Mon Sep 17 00:00:00 2001 From: Jungtaek Lim Date: Thu, 19 Nov 2015 00:21:18 +0900 Subject: [PATCH 1/3] [SPARK-11818][REPL] Fix ExecutorClassLoader to lookup resources from parent class loader * Without patch, some tests of ExecutorClassLoaderSuite fails --- .../scala/org/apache/spark/TestUtils.scala | 10 +++++++ .../spark/repl/ExecutorClassLoader.scala | 12 +++++++- .../spark/repl/ExecutorClassLoaderSuite.scala | 30 +++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala index acfe751f6c746..9a2e47ba5b723 100644 --- a/core/src/main/scala/org/apache/spark/TestUtils.scala +++ b/core/src/main/scala/org/apache/spark/TestUtils.scala @@ -159,6 +159,16 @@ private[spark] object TestUtils { createCompiledClass(className, destDir, sourceFile, classpathUrls) } + def createResource( + resourceName: String, + destDir: File, + fileContent: String = "test"): File = { + val out: File = new File(destDir, resourceName) + Files.write(fileContent.getBytes(StandardCharsets.UTF_8), out) + assert(out.exists(), "Resource file is not written: " + out.getAbsolutePath()) + out + } + /** * Run some code involving jobs submitted to the given context and assert that the jobs spilled. */ diff --git a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala index 3d2d235a00c93..d387fa454fc89 100644 --- a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala +++ b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala @@ -34,7 +34,9 @@ import org.apache.spark.util.ParentClassLoader /** * A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI, * used to load classes defined by the interpreter when the REPL is used. - * Allows the user to specify if user class path should be first + * Allows the user to specify if user class path should be first. + * This class loader delegates getting/finding resources to parent loader, + * which makes sense until REPL never provide resource dynamically. */ class ExecutorClassLoader(conf: SparkConf, classUri: String, parent: ClassLoader, userClassPathFirst: Boolean) extends ClassLoader with Logging { @@ -55,6 +57,14 @@ class ExecutorClassLoader(conf: SparkConf, classUri: String, parent: ClassLoader } } + override def getResource(name: String): URL = { + parentLoader.getResource(name) + } + + override def getResources(name: String): java.util.Enumeration[URL] = { + parentLoader.getResources(name) + } + override def findClass(name: String): Class[_] = { userClassPathFirst match { case true => findClassLocally(name).getOrElse(parentLoader.loadClass(name)) diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala index a58eda12b1120..dccf4d513eca7 100644 --- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala +++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala @@ -19,8 +19,10 @@ package org.apache.spark.repl import java.io.File import java.net.{URL, URLClassLoader} +import java.util import scala.concurrent.duration._ +import scala.io.Source import scala.language.implicitConversions import scala.language.postfixOps @@ -41,6 +43,7 @@ class ExecutorClassLoaderSuite val childClassNames = List("ReplFakeClass1", "ReplFakeClass2") val parentClassNames = List("ReplFakeClass1", "ReplFakeClass2", "ReplFakeClass3") + val parentResourceNames = List("fake-resource.txt") var tempDir1: File = _ var tempDir2: File = _ var url1: String = _ @@ -54,6 +57,7 @@ class ExecutorClassLoaderSuite url1 = "file://" + tempDir1 urls2 = List(tempDir2.toURI.toURL).toArray childClassNames.foreach(TestUtils.createCompiledClass(_, tempDir1, "1")) + parentResourceNames.foreach(TestUtils.createResource(_, tempDir2, "resource")) parentClassNames.foreach(TestUtils.createCompiledClass(_, tempDir2, "2")) } @@ -99,6 +103,32 @@ class ExecutorClassLoaderSuite } } + test("resource from parent") { + val parentLoader = new URLClassLoader(urls2, null) + val classLoader = new ExecutorClassLoader(new SparkConf(), url1, parentLoader, true) + val resourceName: String = "fake-resource.txt" + Option(classLoader.getResource(resourceName)) match { + case Some(url) => { + val fileReader = Source.fromInputStream(url.openStream()).bufferedReader() + assert(fileReader.readLine().contains("resource"), "File doesn't contain 'resource'") + } + case None => fail(s"Resource $resourceName not found") + } + } + + test("resources from parent") { + val parentLoader = new URLClassLoader(urls2, null) + val classLoader = new ExecutorClassLoader(new SparkConf(), url1, parentLoader, true) + val resourceName: String = "fake-resource.txt" + val resources: util.Enumeration[URL] = classLoader.getResources(resourceName) + if (!resources.hasMoreElements) { + fail(s"Resource $resourceName not found") + } else { + val fileReader = Source.fromInputStream(resources.nextElement().openStream()).bufferedReader() + assert(fileReader.readLine().contains("resource"), "File doesn't contain 'resource'") + } + } + test("failing to fetch classes from HTTP server should not leak resources (SPARK-6209)") { // This is a regression test for SPARK-6209, a bug where each failed attempt to load a class // from the driver's class server would leak a HTTP connection, causing the class server's From f2dd621db5f2735d818cb383332db2a97cbdf6fb Mon Sep 17 00:00:00 2001 From: Jungtaek Lim Date: Thu, 19 Nov 2015 06:34:16 +0900 Subject: [PATCH 2/3] get rid of TestUtils.createResource() via inlining --- core/src/main/scala/org/apache/spark/TestUtils.scala | 10 ---------- .../apache/spark/repl/ExecutorClassLoaderSuite.scala | 6 +++++- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala index 9a2e47ba5b723..acfe751f6c746 100644 --- a/core/src/main/scala/org/apache/spark/TestUtils.scala +++ b/core/src/main/scala/org/apache/spark/TestUtils.scala @@ -159,16 +159,6 @@ private[spark] object TestUtils { createCompiledClass(className, destDir, sourceFile, classpathUrls) } - def createResource( - resourceName: String, - destDir: File, - fileContent: String = "test"): File = { - val out: File = new File(destDir, resourceName) - Files.write(fileContent.getBytes(StandardCharsets.UTF_8), out) - assert(out.exists(), "Resource file is not written: " + out.getAbsolutePath()) - out - } - /** * Run some code involving jobs submitted to the given context and assert that the jobs spilled. */ diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala index dccf4d513eca7..47fadbe4ae48f 100644 --- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala +++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala @@ -19,8 +19,11 @@ package org.apache.spark.repl import java.io.File import java.net.{URL, URLClassLoader} +import java.nio.charset.StandardCharsets import java.util +import com.google.common.io.Files + import scala.concurrent.duration._ import scala.io.Source import scala.language.implicitConversions @@ -57,7 +60,8 @@ class ExecutorClassLoaderSuite url1 = "file://" + tempDir1 urls2 = List(tempDir2.toURI.toURL).toArray childClassNames.foreach(TestUtils.createCompiledClass(_, tempDir1, "1")) - parentResourceNames.foreach(TestUtils.createResource(_, tempDir2, "resource")) + parentResourceNames.foreach(x => + Files.write("resource".getBytes(StandardCharsets.UTF_8), new File(tempDir2, x))) parentClassNames.foreach(TestUtils.createCompiledClass(_, tempDir2, "2")) } From ac326687716f842cf1fb5e165bfc10fcf6fea6fd Mon Sep 17 00:00:00 2001 From: Jungtaek Lim Date: Fri, 20 Nov 2015 18:40:52 +0900 Subject: [PATCH 3/3] Addressing @vanzin's review --- .../spark/repl/ExecutorClassLoaderSuite.scala | 29 ++++++++----------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala index 47fadbe4ae48f..c1211f7596b9c 100644 --- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala +++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala @@ -60,8 +60,9 @@ class ExecutorClassLoaderSuite url1 = "file://" + tempDir1 urls2 = List(tempDir2.toURI.toURL).toArray childClassNames.foreach(TestUtils.createCompiledClass(_, tempDir1, "1")) - parentResourceNames.foreach(x => - Files.write("resource".getBytes(StandardCharsets.UTF_8), new File(tempDir2, x))) + parentResourceNames.foreach { x => + Files.write("resource".getBytes(StandardCharsets.UTF_8), new File(tempDir2, x)) + } parentClassNames.foreach(TestUtils.createCompiledClass(_, tempDir2, "2")) } @@ -110,27 +111,21 @@ class ExecutorClassLoaderSuite test("resource from parent") { val parentLoader = new URLClassLoader(urls2, null) val classLoader = new ExecutorClassLoader(new SparkConf(), url1, parentLoader, true) - val resourceName: String = "fake-resource.txt" - Option(classLoader.getResource(resourceName)) match { - case Some(url) => { - val fileReader = Source.fromInputStream(url.openStream()).bufferedReader() - assert(fileReader.readLine().contains("resource"), "File doesn't contain 'resource'") - } - case None => fail(s"Resource $resourceName not found") - } + val resourceName: String = parentResourceNames.head + val is = classLoader.getResourceAsStream(resourceName) + assert(is != null, s"Resource $resourceName not found") + val content = Source.fromInputStream(is, "UTF-8").getLines().next() + assert(content.contains("resource"), "File doesn't contain 'resource'") } test("resources from parent") { val parentLoader = new URLClassLoader(urls2, null) val classLoader = new ExecutorClassLoader(new SparkConf(), url1, parentLoader, true) - val resourceName: String = "fake-resource.txt" + val resourceName: String = parentResourceNames.head val resources: util.Enumeration[URL] = classLoader.getResources(resourceName) - if (!resources.hasMoreElements) { - fail(s"Resource $resourceName not found") - } else { - val fileReader = Source.fromInputStream(resources.nextElement().openStream()).bufferedReader() - assert(fileReader.readLine().contains("resource"), "File doesn't contain 'resource'") - } + assert(resources.hasMoreElements, s"Resource $resourceName not found") + val fileReader = Source.fromInputStream(resources.nextElement().openStream()).bufferedReader() + assert(fileReader.readLine().contains("resource"), "File doesn't contain 'resource'") } test("failing to fetch classes from HTTP server should not leak resources (SPARK-6209)") {