From 5fe679f3271692729e8b9a2a9351725a80fcf8f6 Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Wed, 3 Jan 2018 14:32:30 -0800 Subject: [PATCH 1/9] Initial commit --- .../HiveExternalCatalogVersionsSuite.scala | 55 +++++++++++++++++-- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index a3d5b941a6761..982e76b1e7682 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -17,12 +17,16 @@ package org.apache.spark.sql.hive -import java.io.File +import java.io.{File, FileInputStream} import java.nio.file.Files +import scala.io.Codec +import scala.io.Source import scala.sys.process._ -import org.apache.spark.TestUtils +import org.apache.hadoop.conf.Configuration + +import org.apache.spark.{SecurityManager, SparkConf, TestUtils} import org.apache.spark.sql.{QueryTest, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.CatalogTableType @@ -56,10 +60,12 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { // Try mirrors a few times until one succeeds for (i <- 0 until 3) { val preferredMirror = - Seq("wget", "https://www.apache.org/dyn/closer.lua?preferred=true", "-q", "-O", "-").!!.trim - val url = s"$preferredMirror/spark/spark-$version/spark-$version-bin-hadoop2.7.tgz" + getStringFromUrl("https://www.apache.org/dyn/closer.lua?preferred=true") + logWarning("Mirror is " + preferredMirror) + val filename = s"spark-$version-bin-hadoop2.7.tgz" + val url = s"$preferredMirror/spark/spark-$version/" + filename logInfo(s"Downloading Spark $version from $url") - if (Seq("wget", url, "-q", "-P", path).! == 0) { + if (getFileFromUrl(url, path, filename)) { return } logWarning(s"Failed to download Spark $version from $url") @@ -85,6 +91,45 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { new File(tmpDataDir, name).getCanonicalPath } + def getFileFromUrl(urlString: String, targetDir: String, filename: String): Boolean = { + val conf = new SparkConf + val securityManager = new SecurityManager(conf) + val hadoopConf = new Configuration + + val outDir = new File(targetDir) + if (!outDir.exists()) { + outDir.mkdirs() + } + + logWarning("Download url is " + urlString) + logWarning("Target file is " + outDir.getAbsolutePath + File.separator + filename) + try { + val result = Utils.doFetchFile(urlString, outDir, filename, conf, securityManager, hadoopConf) + result.exists() + } catch { + case ex: Exception => logError("Could not get file from url " + urlString + ": " + + ex.getMessage) + false + } + } + + def getStringFromUrl(urlString: String, encoding: String = "UTF-8"): String = { + val outDir = Files.createTempDirectory("string-") + val filename = "string-out.txt" + + if (!getFileFromUrl(urlString, outDir.toString, filename)) { + throw new java.io.IOException("Could not get string from url " + urlString) + } + + val outputFile = new File(outDir.toString + File.separator + filename) + val fis = new FileInputStream(outputFile) + val result = Source.fromInputStream(fis)(Codec(encoding)).mkString + fis.close() + outputFile.delete() + outDir.toFile.delete() + result + } + override def beforeAll(): Unit = { super.beforeAll() From 92e82d484a30a171141d2cdd1f800254fe33ceaf Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Wed, 3 Jan 2018 15:12:09 -0800 Subject: [PATCH 2/9] Remove test log messages --- .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 982e76b1e7682..9e1c22936ca4c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -61,7 +61,6 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { for (i <- 0 until 3) { val preferredMirror = getStringFromUrl("https://www.apache.org/dyn/closer.lua?preferred=true") - logWarning("Mirror is " + preferredMirror) val filename = s"spark-$version-bin-hadoop2.7.tgz" val url = s"$preferredMirror/spark/spark-$version/" + filename logInfo(s"Downloading Spark $version from $url") @@ -101,11 +100,9 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { outDir.mkdirs() } - logWarning("Download url is " + urlString) - logWarning("Target file is " + outDir.getAbsolutePath + File.separator + filename) try { val result = Utils.doFetchFile(urlString, outDir, filename, conf, securityManager, hadoopConf) - result.exists() + result.exists() } catch { case ex: Exception => logError("Could not get file from url " + urlString + ": " + ex.getMessage) From c5e835eb317d0b6970daef0ffc1214576874e8c6 Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Wed, 3 Jan 2018 18:39:03 -0800 Subject: [PATCH 3/9] The 2 new methods should be private --- .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 9e1c22936ca4c..24d95d89ab160 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -90,7 +90,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { new File(tmpDataDir, name).getCanonicalPath } - def getFileFromUrl(urlString: String, targetDir: String, filename: String): Boolean = { + private def getFileFromUrl(urlString: String, targetDir: String, filename: String): Boolean = { val conf = new SparkConf val securityManager = new SecurityManager(conf) val hadoopConf = new Configuration @@ -110,7 +110,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { } } - def getStringFromUrl(urlString: String, encoding: String = "UTF-8"): String = { + private def getStringFromUrl(urlString: String, encoding: String = "UTF-8"): String = { val outDir = Files.createTempDirectory("string-") val filename = "string-out.txt" From 8b71ea32d57e301e95700d86bba480dac2b4dad4 Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Thu, 4 Jan 2018 10:32:14 -0800 Subject: [PATCH 4/9] Address pull request comments --- .../HiveExternalCatalogVersionsSuite.scala | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 24d95d89ab160..3e034d8957a19 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.hive -import java.io.{File, FileInputStream} +import java.io.{File, FileInputStream, IOException} import java.nio.file.Files import scala.io.Codec @@ -61,6 +61,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { for (i <- 0 until 3) { val preferredMirror = getStringFromUrl("https://www.apache.org/dyn/closer.lua?preferred=true") + logWarning("The mirror is " + preferredMirror) val filename = s"spark-$version-bin-hadoop2.7.tgz" val url = s"$preferredMirror/spark/spark-$version/" + filename logInfo(s"Downloading Spark $version from $url") @@ -104,7 +105,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { val result = Utils.doFetchFile(urlString, outDir, filename, conf, securityManager, hadoopConf) result.exists() } catch { - case ex: Exception => logError("Could not get file from url " + urlString + ": " + case ex: Exception => logWarning("Could not get file from url " + urlString + ": " + ex.getMessage) false } @@ -115,16 +116,17 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { val filename = "string-out.txt" if (!getFileFromUrl(urlString, outDir.toString, filename)) { - throw new java.io.IOException("Could not get string from url " + urlString) + throw new IOException("Could not get string from url " + urlString) } - val outputFile = new File(outDir.toString + File.separator + filename) - val fis = new FileInputStream(outputFile) - val result = Source.fromInputStream(fis)(Codec(encoding)).mkString - fis.close() - outputFile.delete() - outDir.toFile.delete() - result + val contentFile = new File(outDir.toFile, filename) + logWarning("content file is " + contentFile.getAbsolutePath) + try { + Source.fromFile(contentFile)(Codec(encoding)).mkString + } finally { + contentFile.delete() + outDir.toFile.delete() + } } override def beforeAll(): Unit = { From e85b813889f5e66215e7298d045c132a5c6a7856 Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Thu, 4 Jan 2018 10:43:35 -0800 Subject: [PATCH 5/9] Remove testing log statements --- .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 3e034d8957a19..20f4e55fea3a0 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -61,7 +61,6 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { for (i <- 0 until 3) { val preferredMirror = getStringFromUrl("https://www.apache.org/dyn/closer.lua?preferred=true") - logWarning("The mirror is " + preferredMirror) val filename = s"spark-$version-bin-hadoop2.7.tgz" val url = s"$preferredMirror/spark/spark-$version/" + filename logInfo(s"Downloading Spark $version from $url") @@ -120,7 +119,6 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { } val contentFile = new File(outDir.toFile, filename) - logWarning("content file is " + contentFile.getAbsolutePath) try { Source.fromFile(contentFile)(Codec(encoding)).mkString } finally { From 51f358954e945a349c72de061907dacb797c307b Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Thu, 4 Jan 2018 16:39:17 -0800 Subject: [PATCH 6/9] Address pull request comments --- .../HiveExternalCatalogVersionsSuite.scala | 53 +++++++++---------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 20f4e55fea3a0..ff8840c115cc9 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -17,11 +17,10 @@ package org.apache.spark.sql.hive -import java.io.{File, FileInputStream, IOException} -import java.nio.file.Files +import java.io.File +import java.nio.charset.StandardCharsets +import java.nio.file.{Files, Paths} -import scala.io.Codec -import scala.io.Source import scala.sys.process._ import org.apache.hadoop.conf.Configuration @@ -59,15 +58,20 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { private def tryDownloadSpark(version: String, path: String): Unit = { // Try mirrors a few times until one succeeds for (i <- 0 until 3) { + // we don't retry on a failure to get mirror url. If we can't get a mirror url, + // the test fails (getStringFromUrl will throw an exception) val preferredMirror = getStringFromUrl("https://www.apache.org/dyn/closer.lua?preferred=true") + logWarning(s"Mirror is $preferredMirror") val filename = s"spark-$version-bin-hadoop2.7.tgz" - val url = s"$preferredMirror/spark/spark-$version/" + filename + val url = s"$preferredMirror/spark/spark-$version/$filename" logInfo(s"Downloading Spark $version from $url") - if (getFileFromUrl(url, path, filename)) { + try { + getFileFromUrl(url, path, filename) return + } catch { + case ex: Exception => logWarning(s"Failed to download Spark $version from $url", ex) } - logWarning(s"Failed to download Spark $version from $url") } fail(s"Unable to download Spark $version") } @@ -90,8 +94,11 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { new File(tmpDataDir, name).getCanonicalPath } - private def getFileFromUrl(urlString: String, targetDir: String, filename: String): Boolean = { + private def getFileFromUrl(urlString: String, targetDir: String, filename: String): Unit = { val conf = new SparkConf + // if the caller passes the name of an existing file, we want doFetchFile to write over it with + // the contents from the specified url. + conf.set("spark.files.overwrite", "true") val securityManager = new SecurityManager(conf) val hadoopConf = new Configuration @@ -100,31 +107,21 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { outDir.mkdirs() } - try { - val result = Utils.doFetchFile(urlString, outDir, filename, conf, securityManager, hadoopConf) - result.exists() - } catch { - case ex: Exception => logWarning("Could not get file from url " + urlString + ": " - + ex.getMessage) - false - } + // propogate exceptions up to the caller of getFileFromUrl + Utils.doFetchFile(urlString, outDir, filename, conf, securityManager, hadoopConf) } private def getStringFromUrl(urlString: String, encoding: String = "UTF-8"): String = { - val outDir = Files.createTempDirectory("string-") - val filename = "string-out.txt" + val contentFile = File.createTempFile("string-", ".txt") + contentFile.deleteOnExit() - if (!getFileFromUrl(urlString, outDir.toString, filename)) { - throw new IOException("Could not get string from url " + urlString) - } + // exceptions will propogate to the caller of getStringFromUrl + getFileFromUrl(urlString, contentFile.getParent, contentFile.getName) - val contentFile = new File(outDir.toFile, filename) - try { - Source.fromFile(contentFile)(Codec(encoding)).mkString - } finally { - contentFile.delete() - outDir.toFile.delete() - } + logWarning(s"content file is at ${contentFile.getAbsolutePath}" ) + + val contentPath = Paths.get(contentFile.toURI) + new String(Files.readAllBytes(contentPath), StandardCharsets.UTF_8) } override def beforeAll(): Unit = { From 46225e84b6838f057232684c223c69c6a0e20fe3 Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Thu, 4 Jan 2018 17:40:39 -0800 Subject: [PATCH 7/9] Remove test log messages --- .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala | 3 --- 1 file changed, 3 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index ff8840c115cc9..c0e5f125c1ffe 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -62,7 +62,6 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { // the test fails (getStringFromUrl will throw an exception) val preferredMirror = getStringFromUrl("https://www.apache.org/dyn/closer.lua?preferred=true") - logWarning(s"Mirror is $preferredMirror") val filename = s"spark-$version-bin-hadoop2.7.tgz" val url = s"$preferredMirror/spark/spark-$version/$filename" logInfo(s"Downloading Spark $version from $url") @@ -118,8 +117,6 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { // exceptions will propogate to the caller of getStringFromUrl getFileFromUrl(urlString, contentFile.getParent, contentFile.getName) - logWarning(s"content file is at ${contentFile.getAbsolutePath}" ) - val contentPath = Paths.get(contentFile.toURI) new String(Files.readAllBytes(contentPath), StandardCharsets.UTF_8) } From 3dbfffd9a764cb35e05b85fc5c691a7708f31a0e Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Thu, 4 Jan 2018 17:49:37 -0800 Subject: [PATCH 8/9] Fix comment typo --- .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index c0e5f125c1ffe..8ba45c40148a8 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -106,7 +106,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { outDir.mkdirs() } - // propogate exceptions up to the caller of getFileFromUrl + // propagate exceptions up to the caller of getFileFromUrl Utils.doFetchFile(urlString, outDir, filename, conf, securityManager, hadoopConf) } @@ -114,7 +114,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { val contentFile = File.createTempFile("string-", ".txt") contentFile.deleteOnExit() - // exceptions will propogate to the caller of getStringFromUrl + // exceptions will propagate to the caller of getStringFromUrl getFileFromUrl(urlString, contentFile.getParent, contentFile.getName) val contentPath = Paths.get(contentFile.toURI) From 7b58d994f485255ffad59ed9ffb480a64a00cea2 Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Thu, 4 Jan 2018 20:03:58 -0800 Subject: [PATCH 9/9] Remove unused parameter in getStringFromUrl signature --- .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 8ba45c40148a8..ae4aeb7b4ce4a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -110,7 +110,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { Utils.doFetchFile(urlString, outDir, filename, conf, securityManager, hadoopConf) } - private def getStringFromUrl(urlString: String, encoding: String = "UTF-8"): String = { + private def getStringFromUrl(urlString: String): String = { val contentFile = File.createTempFile("string-", ".txt") contentFile.deleteOnExit()