From 2a0a3f1b3f029c2454a471b33fed7766694fa518 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sat, 14 Oct 2017 20:38:22 -0700 Subject: [PATCH 1/3] [SPARK-22280][SQL][TEST] Improve StatisticsSuite to test `convertMetastore` properly --- .../spark/sql/hive/StatisticsSuite.scala | 36 +++++++++---------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 9ff9ecf7f3677..4d5ca75e9b0cd 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -937,26 +937,22 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto } test("test statistics of LogicalRelation converted from Hive serde tables") { - val parquetTable = "parquetTable" - val orcTable = "orcTable" - withTable(parquetTable, orcTable) { - sql(s"CREATE TABLE $parquetTable (key STRING, value STRING) STORED AS PARQUET") - sql(s"CREATE TABLE $orcTable (key STRING, value STRING) STORED AS ORC") - sql(s"INSERT INTO TABLE $parquetTable SELECT * FROM src") - sql(s"INSERT INTO TABLE $orcTable SELECT * FROM src") - - // the default value for `spark.sql.hive.convertMetastoreParquet` is true, here we just set it - // for robustness - withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> "true") { - checkTableStats(parquetTable, hasSizeInBytes = false, expectedRowCounts = None) - sql(s"ANALYZE TABLE $parquetTable COMPUTE STATISTICS") - checkTableStats(parquetTable, hasSizeInBytes = true, expectedRowCounts = Some(500)) - } - withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") { - // We still can get tableSize from Hive before Analyze - checkTableStats(orcTable, hasSizeInBytes = true, expectedRowCounts = None) - sql(s"ANALYZE TABLE $orcTable COMPUTE STATISTICS") - checkTableStats(orcTable, hasSizeInBytes = true, expectedRowCounts = Some(500)) + Seq("orc", "parquet").foreach { format => + Seq("true", "false").foreach { isConverted => + withSQLConf( + HiveUtils.CONVERT_METASTORE_ORC.key -> isConverted, + HiveUtils.CONVERT_METASTORE_PARQUET.key -> isConverted) { + withTable(format) { + sql(s"CREATE TABLE $format (key STRING, value STRING) STORED AS $format") + sql(s"INSERT INTO TABLE $format SELECT * FROM src") + + val hasHiveStats = !isConverted.toBoolean + checkTableStats(format, hasSizeInBytes = hasHiveStats, expectedRowCounts = None) + + sql(s"ANALYZE TABLE $format COMPUTE STATISTICS") + checkTableStats(format, hasSizeInBytes = true, expectedRowCounts = Some(500)) + } + } } } } From 934da69d4900a2f5eb09c4d88dd9eb1b17cd568e Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sun, 15 Oct 2017 20:49:56 -0700 Subject: [PATCH 2/3] Address comments. --- .../scala/org/apache/spark/sql/hive/StatisticsSuite.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 4d5ca75e9b0cd..50a71b3ef9fa1 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -938,15 +938,15 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto test("test statistics of LogicalRelation converted from Hive serde tables") { Seq("orc", "parquet").foreach { format => - Seq("true", "false").foreach { isConverted => + Seq(true, false).foreach { isConverted => withSQLConf( - HiveUtils.CONVERT_METASTORE_ORC.key -> isConverted, - HiveUtils.CONVERT_METASTORE_PARQUET.key -> isConverted) { + HiveUtils.CONVERT_METASTORE_ORC.key -> s"$isConverted", + HiveUtils.CONVERT_METASTORE_PARQUET.key -> s"$isConverted") { withTable(format) { sql(s"CREATE TABLE $format (key STRING, value STRING) STORED AS $format") sql(s"INSERT INTO TABLE $format SELECT * FROM src") - val hasHiveStats = !isConverted.toBoolean + val hasHiveStats = !isConverted checkTableStats(format, hasSizeInBytes = hasHiveStats, expectedRowCounts = None) sql(s"ANALYZE TABLE $format COMPUTE STATISTICS") From 8abac338617014c77bc097f5c6b69aadafb3d410 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 16 Oct 2017 09:27:21 -0700 Subject: [PATCH 3/3] Address comments. --- .../scala/org/apache/spark/sql/hive/StatisticsSuite.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 50a71b3ef9fa1..b9a5ad7657134 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -946,9 +946,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto sql(s"CREATE TABLE $format (key STRING, value STRING) STORED AS $format") sql(s"INSERT INTO TABLE $format SELECT * FROM src") - val hasHiveStats = !isConverted - checkTableStats(format, hasSizeInBytes = hasHiveStats, expectedRowCounts = None) - + checkTableStats(format, hasSizeInBytes = !isConverted, expectedRowCounts = None) sql(s"ANALYZE TABLE $format COMPUTE STATISTICS") checkTableStats(format, hasSizeInBytes = true, expectedRowCounts = Some(500)) }