From 0dc62bf9795ca20dbc54dd6294e5a26a99695c20 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 3 Apr 2019 11:12:20 +0800 Subject: [PATCH 1/5] Dealing with TimeVars removed in Hive 2.x --- .../org/apache/spark/sql/hive/HiveUtils.scala | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index a7f40c6bff0b..ffe9a61a0f95 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -194,7 +194,7 @@ private[spark] object HiveUtils extends Logging { // // Here we enumerate all time `ConfVar`s and convert their values to numeric strings according // to their output time units. - Seq( + val commonTimeVars = Seq( ConfVars.METASTORE_CLIENT_CONNECT_RETRY_DELAY -> TimeUnit.SECONDS, ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT -> TimeUnit.SECONDS, ConfVars.METASTORE_CLIENT_SOCKET_LIFETIME -> TimeUnit.SECONDS, @@ -207,8 +207,6 @@ private[spark] object HiveUtils extends Logging { ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_READER_WAIT -> TimeUnit.MILLISECONDS, ConfVars.HIVES_AUTO_PROGRESS_TIMEOUT -> TimeUnit.SECONDS, ConfVars.HIVE_LOG_INCREMENTAL_PLAN_PROGRESS_INTERVAL -> TimeUnit.MILLISECONDS, - ConfVars.HIVE_STATS_JDBC_TIMEOUT -> TimeUnit.SECONDS, - ConfVars.HIVE_STATS_RETRIES_WAIT -> TimeUnit.MILLISECONDS, ConfVars.HIVE_LOCK_SLEEP_BETWEEN_RETRIES -> TimeUnit.SECONDS, ConfVars.HIVE_ZOOKEEPER_SESSION_TIMEOUT -> TimeUnit.MILLISECONDS, ConfVars.HIVE_ZOOKEEPER_CONNECTION_BASESLEEPTIME -> TimeUnit.MILLISECONDS, @@ -236,7 +234,18 @@ private[spark] object HiveUtils extends Logging { ConfVars.SPARK_RPC_CLIENT_HANDSHAKE_TIMEOUT -> TimeUnit.MILLISECONDS ).map { case (confVar, unit) => confVar.varname -> HiveConf.getTimeVar(hadoopConf, confVar, unit).toString - }.toMap + } + + // The following configurations were removed by HIVE-12164(Hive 2.0) + val removedTimeVars = Seq( + ("hive.stats.jdbc.timeout", "30s") -> TimeUnit.SECONDS, + ("hive.stats.retries.wait", "3000ms") -> TimeUnit.MILLISECONDS + ).map { case ((key, defaultValue), unit) => + val value = hadoopConf.get(key, defaultValue) + key -> HiveConf.toTime(value, unit, unit).toString + } + + (commonTimeVars ++ removedTimeVars).toMap } /** From e67de70778781067aa1386ee3eddd28a796d7956 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 3 Apr 2019 11:16:57 +0800 Subject: [PATCH 2/5] Add test --- .../apache/spark/sql/hive/HiveUtilsSuite.scala | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala index 303dd70760a1..0fe0087487b0 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.hive +import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.spark.SparkConf @@ -61,4 +62,18 @@ class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton Thread.currentThread().setContextClassLoader(contextClassLoader) } } + + test("SPARK-27349: Dealing with TimeVars removed in Hive 2.x") { + def testFormatTimeVarsForHiveClient(key: String, value: String, expected: Long): Unit = { + val conf = new Configuration + conf.set(key, value) + assert(HiveUtils.formatTimeVarsForHiveClient(conf)(key) === expected.toString) + } + + testFormatTimeVarsForHiveClient("hive.stats.jdbc.timeout", "40s", 40) + testFormatTimeVarsForHiveClient("hive.stats.jdbc.timeout", "1d", 86400) + + testFormatTimeVarsForHiveClient("hive.stats.retries.wait", "4000ms", 4000) + testFormatTimeVarsForHiveClient("hive.stats.retries.wait", "1d", 86400000) + } } From 1b7d6366921696cf0611fb54af86e157a748b374 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 3 Apr 2019 22:21:33 +0800 Subject: [PATCH 3/5] Test default value --- .../scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala index 0fe0087487b0..402f6ca9f808 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala @@ -64,6 +64,11 @@ class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton } test("SPARK-27349: Dealing with TimeVars removed in Hive 2.x") { + // Test default value + val defaultConf = new Configuration + assert(HiveUtils.formatTimeVarsForHiveClient(defaultConf)("hive.stats.jdbc.timeout") === "30") + assert(HiveUtils.formatTimeVarsForHiveClient(defaultConf)("hive.stats.retries.wait") === "3000") + def testFormatTimeVarsForHiveClient(key: String, value: String, expected: Long): Unit = { val conf = new Configuration conf.set(key, value) From 2d44ffddce92033e5b630678a25cf6c31eb42b0e Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 4 Apr 2019 06:14:26 +0800 Subject: [PATCH 4/5] removedTimeVars -> hardcodingTimeVars --- .../src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala index ffe9a61a0f95..01a503db78dd 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala @@ -237,7 +237,7 @@ private[spark] object HiveUtils extends Logging { } // The following configurations were removed by HIVE-12164(Hive 2.0) - val removedTimeVars = Seq( + val hardcodingTimeVars = Seq( ("hive.stats.jdbc.timeout", "30s") -> TimeUnit.SECONDS, ("hive.stats.retries.wait", "3000ms") -> TimeUnit.MILLISECONDS ).map { case ((key, defaultValue), unit) => @@ -245,7 +245,7 @@ private[spark] object HiveUtils extends Logging { key -> HiveConf.toTime(value, unit, unit).toString } - (commonTimeVars ++ removedTimeVars).toMap + (commonTimeVars ++ hardcodingTimeVars).toMap } /** From 50ae570d7dee6b42d9509288cef30d9517aedf83 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 4 Apr 2019 06:19:06 +0800 Subject: [PATCH 5/5] Move testFormatTimeVarsForHiveClient out of the test case --- .../apache/spark/sql/hive/HiveUtilsSuite.scala | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala index 402f6ca9f808..daf06645abc2 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala @@ -30,6 +30,12 @@ import org.apache.spark.util.ChildFirstURLClassLoader class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { + private def testFormatTimeVarsForHiveClient(key: String, value: String, expected: Long): Unit = { + val conf = new Configuration + conf.set(key, value) + assert(HiveUtils.formatTimeVarsForHiveClient(conf)(key) === expected.toString) + } + test("newTemporaryConfiguration overwrites listener configurations") { Seq(true, false).foreach { useInMemoryDerby => val conf = HiveUtils.newTemporaryConfiguration(useInMemoryDerby) @@ -69,16 +75,10 @@ class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton assert(HiveUtils.formatTimeVarsForHiveClient(defaultConf)("hive.stats.jdbc.timeout") === "30") assert(HiveUtils.formatTimeVarsForHiveClient(defaultConf)("hive.stats.retries.wait") === "3000") - def testFormatTimeVarsForHiveClient(key: String, value: String, expected: Long): Unit = { - val conf = new Configuration - conf.set(key, value) - assert(HiveUtils.formatTimeVarsForHiveClient(conf)(key) === expected.toString) - } - testFormatTimeVarsForHiveClient("hive.stats.jdbc.timeout", "40s", 40) - testFormatTimeVarsForHiveClient("hive.stats.jdbc.timeout", "1d", 86400) + testFormatTimeVarsForHiveClient("hive.stats.jdbc.timeout", "1d", 1 * 24 * 60 * 60) testFormatTimeVarsForHiveClient("hive.stats.retries.wait", "4000ms", 4000) - testFormatTimeVarsForHiveClient("hive.stats.retries.wait", "1d", 86400000) + testFormatTimeVarsForHiveClient("hive.stats.retries.wait", "1d", 1 * 24 * 60 * 60 * 1000) } }