From c2d8e90eb4a0e3dff4bc5a278d89d75e85fe3904 Mon Sep 17 00:00:00 2001 From: Xiao Li Date: Wed, 17 May 2017 16:53:45 -0700 Subject: [PATCH 1/7] fix. --- .../sql/catalyst/catalog/interface.scala | 5 +- .../sql/catalyst/trees/TreeNodeSuite.scala | 3 +- .../spark/sql/hive/HiveStrategies.scala | 15 +- .../sql/hive/client/HiveClientImpl.scala | 73 +++++- .../spark/sql/hive/ShowCreateTableSuite.scala | 16 +- .../spark/sql/hive/StatisticsSuite.scala | 221 +++++++++++++++++- .../spark/sql/hive/client/VersionsSuite.scala | 2 +- .../hive/execution/HiveComparisonTest.scala | 6 - .../sql/hive/execution/HiveDDLSuite.scala | 5 - 9 files changed, 302 insertions(+), 44 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index cc0cbba275b8..2f328ccc4945 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -203,6 +203,8 @@ case class BucketSpec( * sensitive schema was unable to be read from the table properties. * Used to trigger case-sensitive schema inference at query time, when * configured. + * @param ignoredProperties is a list of table properties that are used by the underlying table + * but ignored by Spark SQL yet. */ case class CatalogTable( identifier: TableIdentifier, @@ -221,7 +223,8 @@ case class CatalogTable( comment: Option[String] = None, unsupportedFeatures: Seq[String] = Seq.empty, tracksPartitionsInCatalog: Boolean = false, - schemaPreservesCase: Boolean = true) { + schemaPreservesCase: Boolean = true, + ignoredProperties: Map[String, String] = Map.empty) { import CatalogTable._ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala index 37e3dfabd0b2..712841835acd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala @@ -492,7 +492,8 @@ class TreeNodeSuite extends SparkFunSuite { "tracksPartitionsInCatalog" -> false, "properties" -> JNull, "unsupportedFeatures" -> List.empty[String], - "schemaPreservesCase" -> JBool(true))) + "schemaPreservesCase" -> JBool(true), + "ignoredProperties" -> JNull)) // For unknown case class, returns JNull. val bigValue = new Array[Int](10000) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index 09a5eda6e543..17e8fa3b2e65 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -119,20 +119,7 @@ class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] { case relation: CatalogRelation if DDLUtils.isHiveTable(relation.tableMeta) && relation.tableMeta.stats.isEmpty => val table = relation.tableMeta - // TODO: check if this estimate is valid for tables after partition pruning. - // NOTE: getting `totalSize` directly from params is kind of hacky, but this should be - // relatively cheap if parameters for the table are populated into the metastore. - // Besides `totalSize`, there are also `numFiles`, `numRows`, `rawDataSize` keys - // (see StatsSetupConst in Hive) that we can look at in the future. - // When table is external,`totalSize` is always zero, which will influence join strategy - // so when `totalSize` is zero, use `rawDataSize` instead. - val totalSize = table.properties.get(StatsSetupConst.TOTAL_SIZE).map(_.toLong) - val rawDataSize = table.properties.get(StatsSetupConst.RAW_DATA_SIZE).map(_.toLong) - val sizeInBytes = if (totalSize.isDefined && totalSize.get > 0) { - totalSize.get - } else if (rawDataSize.isDefined && rawDataSize.get > 0) { - rawDataSize.get - } else if (session.sessionState.conf.fallBackToHdfsForStatsEnabled) { + val sizeInBytes = if (session.sessionState.conf.fallBackToHdfsForStatsEnabled) { try { val hadoopConf = session.sessionState.newHadoopConf() val tablePath = new Path(table.location) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 04f2751e79a5..f63d1b9fc5f6 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -25,6 +25,7 @@ import scala.collection.mutable.ArrayBuffer import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path +import org.apache.hadoop.hive.common.StatsSetupConst import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.metastore.{TableType => HiveTableType} import org.apache.hadoop.hive.metastore.api.{Database => HiveDatabase, FieldSchema, Order} @@ -414,6 +415,50 @@ private[hive] class HiveClientImpl( val properties = Option(h.getParameters).map(_.asScala.toMap).orNull + // Hive-generated Statistics are also recorded in ignoredProperties + val ignoredProperties = scala.collection.mutable.Map.empty[String, String] + for (key <- HiveStatisticsProperties; value <- properties.get(key)) { + ignoredProperties += key -> value + } + + val excludedTableProperties = HiveStatisticsProperties ++ Set( + // The property value of "comment" is moved to the dedicated field "comment" + "comment", + // For EXTERNAL_TABLE, the table properties has a particular field "EXTERNAL". This is added + // in the function toHiveTable. + "EXTERNAL" + ) + + val filteredProperties = properties.filterNot { + case (key, _) => excludedTableProperties.contains(key) + } + val comment = properties.get("comment") + + val totalSize = properties.get(StatsSetupConst.TOTAL_SIZE).map(BigInt(_)) + val rawDataSize = properties.get(StatsSetupConst.RAW_DATA_SIZE).map(BigInt(_)) + def rowCount = properties.get(StatsSetupConst.ROW_COUNT).map(BigInt(_)) match { + case Some(c) if c >= 0 => Some(c) + case _ => None + } + // TODO: check if this estimate is valid for tables after partition pruning. + // NOTE: getting `totalSize` directly from params is kind of hacky, but this should be + // relatively cheap if parameters for the table are populated into the metastore. + // Currently, only totalSize, rawDataSize, and row_count are used to build the field `stats` + // TODO: stats should include all the other two fields (`numFiles` and `numPartitions`). + // (see StatsSetupConst in Hive) + val stats = + // When table is external, `totalSize` is always zero, which will influence join strategy + // so when `totalSize` is zero, use `rawDataSize` instead. When `rawDataSize` is also zero, + // return None. Later, we will use the other ways to estimate the statistics. + if (totalSize.isDefined && totalSize.get > 0L) { + Some(CatalogStatistics(sizeInBytes = totalSize.get, rowCount = rowCount)) + } else if (rawDataSize.isDefined && rawDataSize.get > 0) { + Some(CatalogStatistics(sizeInBytes = rawDataSize.get, rowCount = rowCount)) + } else { + // TODO: still fill the rowCount even if sizeInBytes is empty. Break some assumptions? + None + } + CatalogTable( identifier = TableIdentifier(h.getTableName, Option(h.getDbName)), tableType = h.getTableType match { @@ -451,13 +496,15 @@ private[hive] class HiveClientImpl( ), // For EXTERNAL_TABLE, the table properties has a particular field "EXTERNAL". This is added // in the function toHiveTable. - properties = properties.filter(kv => kv._1 != "comment" && kv._1 != "EXTERNAL"), - comment = properties.get("comment"), + properties = filteredProperties, + stats = stats, + comment = comment, // In older versions of Spark(before 2.2.0), we expand the view original text and store // that into `viewExpandedText`, and that should be used in view resolution. So we get // `viewExpandedText` instead of `viewOriginalText` for viewText here. viewText = Option(h.getViewExpandedText), - unsupportedFeatures = unsupportedFeatures) + unsupportedFeatures = unsupportedFeatures, + ignoredProperties = ignoredProperties.toMap) } } @@ -474,7 +521,15 @@ private[hive] class HiveClientImpl( } override def alterTable(tableName: String, table: CatalogTable): Unit = withHiveState { - val hiveTable = toHiveTable(table, Some(userName)) + // getTableOption removes all the Hive-specific properties. Here, we fill them back to ensure + // these properties are still available to the others that share the same Hive metastore. + // If users explicitly alter these Hive-specific properties through ALTER TABLE DDL, we respect + // these user-specified values. + val hiveSpecificProps = table.ignoredProperties.filter { + case (key, _) => HiveStatisticsProperties.contains(key) + } + val hiveTable = toHiveTable( + table.copy(properties = hiveSpecificProps ++ table.properties), Some(userName)) // Do not use `table.qualifiedName` here because this may be a rename val qualifiedTableName = s"${table.database}.$tableName" shim.alterTable(client, qualifiedTableName, hiveTable) @@ -956,4 +1011,14 @@ private[hive] object HiveClientImpl { parameters = if (hp.getParameters() != null) hp.getParameters().asScala.toMap else Map.empty) } + + // Below is the key of table properties for storing Hive-generated statistics + private val HiveStatisticsProperties = Set( + StatsSetupConst.COLUMN_STATS_ACCURATE, + StatsSetupConst.NUM_FILES, + StatsSetupConst.NUM_PARTITIONS, + StatsSetupConst.ROW_COUNT, + StatsSetupConst.RAW_DATA_SIZE, + StatsSetupConst.TOTAL_SIZE + ) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala index 081153df8e73..26912ed1200d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala @@ -325,26 +325,24 @@ class ShowCreateTableSuite extends QueryTest with SQLTestUtils with TestHiveSing "last_modified_by", "last_modified_time", "Owner:", - "COLUMN_STATS_ACCURATE", // The following are hive specific schema parameters which we do not need to match exactly. - "numFiles", - "numRows", - "rawDataSize", - "totalSize", "totalNumberFiles", "maxFileSize", - "minFileSize", - // EXTERNAL is not non-deterministic, but it is filtered out for external tables. - "EXTERNAL" + "minFileSize" ) table.copy( createTime = 0L, lastAccessTime = 0L, - properties = table.properties.filterKeys(!nondeterministicProps.contains(_)) + properties = table.properties.filterKeys(!nondeterministicProps.contains(_)), + stats = None, + ignoredProperties = Map.empty ) } + val e = normalize(actual) + val m = normalize(expected) + assert(normalize(actual) == normalize(expected)) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 3191b9975fbf..6006df9d802b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -19,11 +19,14 @@ package org.apache.spark.sql.hive import java.io.{File, PrintWriter} +import org.apache.hadoop.hive.common.StatsSetupConst import scala.reflect.ClassTag +import scala.util.matching.Regex import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogStatistics} +import org.apache.spark.sql.catalyst.util.StringUtils import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.execution.joins._ @@ -61,7 +64,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto val relation = spark.table("csv_table").queryExecution.analyzed.children.head .asInstanceOf[CatalogRelation] - val properties = relation.tableMeta.properties + val properties = relation.tableMeta.ignoredProperties assert(properties("totalSize").toLong <= 0, "external table totalSize must be <= 0") assert(properties("rawDataSize").toLong <= 0, "external table rawDataSize must be <= 0") @@ -175,7 +178,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto sql(s"INSERT INTO TABLE $textTable SELECT * FROM src") checkTableStats( textTable, - hasSizeInBytes = false, + hasSizeInBytes = true, expectedRowCounts = None) // noscan won't count the number of rows @@ -215,6 +218,217 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto } } + private def createNonPartitionedTable( + tabName: String, + analyzedBySpark: Boolean = true, + analyzedByHive: Boolean = true): Unit = { + val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client + sql( + s""" + |CREATE TABLE $tabName (key STRING, value STRING) + |STORED AS TEXTFILE + |TBLPROPERTIES ('prop1' = 'val1', 'prop2' = 'val2') + """.stripMargin) + sql(s"INSERT INTO TABLE $tabName SELECT * FROM src") + if (analyzedBySpark) sql(s"ANALYZE TABLE $tabName COMPUTE STATISTICS") + // This is to mimic the scenario in which Hive genrates statistics before we reading it + if (analyzedByHive) hiveClient.runSqlHive(s"ANALYZE TABLE $tabName COMPUTE STATISTICS") + val describeResult1 = hiveClient.runSqlHive(s"DESCRIBE FORMATTED $tabName") + + val tableMetadata = + spark.sessionState.catalog.getTableMetadata(TableIdentifier(tabName)).properties + // statistics info is not contained in the metadata of the original table + assert(Seq(StatsSetupConst.COLUMN_STATS_ACCURATE, + StatsSetupConst.NUM_FILES, + StatsSetupConst.NUM_PARTITIONS, + StatsSetupConst.ROW_COUNT, + StatsSetupConst.RAW_DATA_SIZE, + StatsSetupConst.TOTAL_SIZE).forall(!tableMetadata.contains(_))) + + if (analyzedByHive) { + assert(StringUtils.filterPattern(describeResult1, "*numRows\\s+500*").nonEmpty) + } else { + assert(StringUtils.filterPattern(describeResult1, "*numRows\\s+500*").isEmpty) + } + } + + private def extractStatsPropValues( + descOutput: Seq[String], + propKey: String): Option[BigInt] = { + val str = descOutput + .filterNot(_.contains(HiveExternalCatalog.STATISTICS_PREFIX)) + .filter(_.contains(propKey)) + if (str.isEmpty) { + None + } else { + assert(str.length == 1, "found more than one matches") + val pattern = new Regex(s"""$propKey\\s+(-?\\d+)""") + val pattern(value) = str.head.trim + Option(BigInt(value)) + } + } + + test("get statistics when not analyzed in both Hive and Spark") { + val tabName = "tab1" + withTable(tabName) { + createNonPartitionedTable(tabName, analyzedByHive = false, analyzedBySpark = false) + checkTableStats( + tabName, hasSizeInBytes = true, expectedRowCounts = None) + + // ALTER TABLE SET TBLPROPERTIES invalidates some contents of Hive specific statistics + // This is triggered by the Hive alterTable API + val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client + val describeResult = hiveClient.runSqlHive(s"DESCRIBE FORMATTED $tabName") + + val rawDataSize = extractStatsPropValues(describeResult, "rawDataSize") + val numRows = extractStatsPropValues(describeResult, "numRows") + val totalSize = extractStatsPropValues(describeResult, "totalSize") + assert(rawDataSize.isEmpty, "rawDataSize should not be shown without table analysis") + assert(numRows.isEmpty, "numRows should not be shown without table analysis") + assert(totalSize.isDefined && totalSize.get > 0, "totalSize is lost") + } + } + + test("alter table rename after analyze table") { + Seq(true, false).foreach { analyzedBySpark => + val oldName = "tab1" + val newName = "tab2" + withTable(oldName, newName) { + createNonPartitionedTable(oldName, analyzedByHive = true, analyzedBySpark = analyzedBySpark) + val fetchedStats1 = checkTableStats( + oldName, hasSizeInBytes = true, expectedRowCounts = Some(500)) + sql(s"ALTER TABLE $oldName RENAME TO $newName") + val fetchedStats2 = checkTableStats( + newName, hasSizeInBytes = true, expectedRowCounts = Some(500)) + assert(fetchedStats1 == fetchedStats2) + + // ALTER TABLE RENAME does not affect the contents of Hive specific statistics + val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client + val describeResult = hiveClient.runSqlHive(s"DESCRIBE FORMATTED $newName") + + val rawDataSize = extractStatsPropValues(describeResult, "rawDataSize") + val numRows = extractStatsPropValues(describeResult, "numRows") + val totalSize = extractStatsPropValues(describeResult, "totalSize") + assert(rawDataSize.isDefined && rawDataSize.get > 0, "rawDataSize is lost") + assert(numRows.isDefined && numRows.get == 500, "numRows is lost") + assert(totalSize.isDefined && totalSize.get > 0, "totalSize is lost") + } + } + } + + test("alter table SET TBLPROPERTIES after analyze table") { + Seq(true, false).foreach { analyzedBySpark => + val tabName = "tab1" + withTable(tabName) { + createNonPartitionedTable(tabName, analyzedByHive = true, analyzedBySpark = analyzedBySpark) + val fetchedStats1 = checkTableStats( + tabName, hasSizeInBytes = true, expectedRowCounts = Some(500)) + sql(s"ALTER TABLE $tabName SET TBLPROPERTIES ('foo' = 'a')") + val fetchedStats2 = checkTableStats( + tabName, hasSizeInBytes = true, expectedRowCounts = Some(500)) + assert(fetchedStats1 == fetchedStats2) + + val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client + val describeResult = hiveClient.runSqlHive(s"DESCRIBE FORMATTED $tabName") + + val totalSize = extractStatsPropValues(describeResult, "totalSize") + assert(totalSize.isDefined && totalSize.get > 0, "totalSize is lost") + + // ALTER TABLE SET TBLPROPERTIES invalidates some Hive specific statistics + // This is triggered by the Hive alterTable API + val numRows = extractStatsPropValues(describeResult, "numRows") + assert(numRows.isDefined && numRows.get == -1, "numRows is lost") + val rawDataSize = extractStatsPropValues(describeResult, "rawDataSize") + assert(rawDataSize.isDefined && rawDataSize.get == -1, "rawDataSize is lost") + } + } + } + + test("alter table UNSET TBLPROPERTIES after analyze table") { + Seq(true, false).foreach { analyzedBySpark => + val tabName = "tab1" + withTable(tabName) { + createNonPartitionedTable(tabName, analyzedByHive = true, analyzedBySpark = analyzedBySpark) + val fetchedStats1 = checkTableStats( + tabName, hasSizeInBytes = true, expectedRowCounts = Some(500)) + sql(s"ALTER TABLE $tabName UNSET TBLPROPERTIES ('prop1')") + val fetchedStats2 = checkTableStats( + tabName, hasSizeInBytes = true, expectedRowCounts = Some(500)) + assert(fetchedStats1 == fetchedStats2) + + val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client + val describeResult = hiveClient.runSqlHive(s"DESCRIBE FORMATTED $tabName") + + val totalSize = extractStatsPropValues(describeResult, "totalSize") + assert(totalSize.isDefined && totalSize.get > 0, "totalSize is lost") + + // ALTER TABLE UNSET TBLPROPERTIES invalidates some Hive specific statistics + // This is triggered by the Hive alterTable API + val numRows = extractStatsPropValues(describeResult, "numRows") + assert(numRows.isDefined && numRows.get == -1, "numRows is lost") + val rawDataSize = extractStatsPropValues(describeResult, "rawDataSize") + assert(rawDataSize.isDefined && rawDataSize.get == -1, "rawDataSize is lost") + } + } + } + + test("add/drop partitions - managed table") { + val catalog = spark.sessionState.catalog + val managedTable = "partitionedTable" + withTable(managedTable) { + sql( + s""" + |CREATE TABLE $managedTable (key INT, value STRING) + |PARTITIONED BY (ds STRING, hr STRING) + """.stripMargin) + + for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq("11", "12")) { + sql( + s""" + |INSERT OVERWRITE TABLE $managedTable + |partition (ds='$ds',hr='$hr') + |SELECT 1, 'a' + """.stripMargin) + } + + checkTableStats( + managedTable, hasSizeInBytes = false, expectedRowCounts = None) + + sql(s"ANALYZE TABLE $managedTable COMPUTE STATISTICS") + + val stats1 = checkTableStats( + managedTable, hasSizeInBytes = true, expectedRowCounts = Some(4)) + + sql( + s""" + |ALTER TABLE $managedTable DROP PARTITION (ds='2008-04-08'), + |PARTITION (hr='12') + """.stripMargin) + assert(catalog.listPartitions(TableIdentifier(managedTable)).map(_.spec).toSet == + Set(Map("ds" -> "2008-04-09", "hr" -> "11"))) + + val stats2 = checkTableStats( + managedTable, hasSizeInBytes = true, expectedRowCounts = Some(4)) + assert(stats1 == stats2) + + sql(s"ANALYZE TABLE $managedTable COMPUTE STATISTICS") + + val stats3 = checkTableStats( + managedTable, hasSizeInBytes = true, expectedRowCounts = Some(1)) + assert(stats2.get.sizeInBytes > stats3.get.sizeInBytes) + + sql(s"ALTER TABLE $managedTable ADD PARTITION (ds='2008-04-08', hr='12')") + assert(stats1 == stats2) + + sql(s"ANALYZE TABLE $managedTable COMPUTE STATISTICS") + val stats4 = checkTableStats( + managedTable, hasSizeInBytes = true, expectedRowCounts = Some(1)) + + assert(stats2.get.sizeInBytes > stats4.get.sizeInBytes) + assert(stats4.get.sizeInBytes == stats3.get.sizeInBytes) + } + } + test("test statistics of LogicalRelation converted from Hive serde tables") { val parquetTable = "parquetTable" val orcTable = "orcTable" @@ -232,7 +446,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto checkTableStats(parquetTable, hasSizeInBytes = true, expectedRowCounts = Some(500)) } withSQLConf("spark.sql.hive.convertMetastoreOrc" -> "true") { - checkTableStats(orcTable, hasSizeInBytes = false, expectedRowCounts = None) + // We still can get tableSize from Hive before Analyze + checkTableStats(orcTable, hasSizeInBytes = true, expectedRowCounts = None) sql(s"ANALYZE TABLE $orcTable COMPUTE STATISTICS") checkTableStats(orcTable, hasSizeInBytes = true, expectedRowCounts = Some(500)) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala index 7aff49c0fc3b..f109843f5be2 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala @@ -576,7 +576,7 @@ class VersionsSuite extends SparkFunSuite with Logging { versionSpark.sql("CREATE TABLE tbl AS SELECT 1 AS a") assert(versionSpark.table("tbl").collect().toSeq == Seq(Row(1))) val tableMeta = versionSpark.sessionState.catalog.getTableMetadata(TableIdentifier("tbl")) - val totalSize = tableMeta.properties.get(StatsSetupConst.TOTAL_SIZE).map(_.toLong) + val totalSize = tableMeta.stats.map(_.sizeInBytes) // Except 0.12, all the following versions will fill the Hive-generated statistics if (version == "0.12") { assert(totalSize.isEmpty) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala index abe5d835719b..96e400939e36 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala @@ -192,13 +192,7 @@ abstract class HiveComparisonTest "last_modified_by", "last_modified_time", "Owner:", - "COLUMN_STATS_ACCURATE", // The following are hive specific schema parameters which we do not need to match exactly. - "numFiles", - "numRows", - "rawDataSize", - "totalSize", - "totalNumberFiles", "maxFileSize", "minFileSize" ) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 13f5c5dd8e80..9a682260e2bf 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -1199,11 +1199,6 @@ class HiveDDLSuite "last_modified_by", "last_modified_time", "Owner:", - "COLUMN_STATS_ACCURATE", - "numFiles", - "numRows", - "rawDataSize", - "totalSize", "totalNumberFiles", "maxFileSize", "minFileSize" From 22a2c00333ffc39458f45d629c1b3199f73f1f3e Mon Sep 17 00:00:00 2001 From: Xiao Li Date: Wed, 17 May 2017 16:56:07 -0700 Subject: [PATCH 2/7] address comments. --- .../org/apache/spark/sql/hive/client/HiveClientImpl.scala | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index f63d1b9fc5f6..9292a976c9d6 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -525,11 +525,8 @@ private[hive] class HiveClientImpl( // these properties are still available to the others that share the same Hive metastore. // If users explicitly alter these Hive-specific properties through ALTER TABLE DDL, we respect // these user-specified values. - val hiveSpecificProps = table.ignoredProperties.filter { - case (key, _) => HiveStatisticsProperties.contains(key) - } val hiveTable = toHiveTable( - table.copy(properties = hiveSpecificProps ++ table.properties), Some(userName)) + table.copy(properties = table.ignoredProperties ++ table.properties), Some(userName)) // Do not use `table.qualifiedName` here because this may be a rename val qualifiedTableName = s"${table.database}.$tableName" shim.alterTable(client, qualifiedTableName, hiveTable) From cce31db80cdc66516e3e537f33a3611b07186b6b Mon Sep 17 00:00:00 2001 From: Xiao Li Date: Wed, 17 May 2017 16:57:28 -0700 Subject: [PATCH 3/7] address comments. --- .../test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 6006df9d802b..5dfa98ceb377 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -380,7 +380,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto s""" |CREATE TABLE $managedTable (key INT, value STRING) |PARTITIONED BY (ds STRING, hr STRING) - """.stripMargin) + """.stripMargin) for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq("11", "12")) { sql( From aa9a36e1c5bff881a053a139f49344be0ad62452 Mon Sep 17 00:00:00 2001 From: Xiao Li Date: Thu, 18 May 2017 15:44:18 -0700 Subject: [PATCH 4/7] address comments. --- .../sql/hive/client/HiveClientImpl.scala | 26 +++++++++---------- .../spark/sql/hive/ShowCreateTableSuite.scala | 4 --- .../spark/sql/hive/StatisticsSuite.scala | 4 +-- .../hive/execution/HiveComparisonTest.scala | 1 + 4 files changed, 15 insertions(+), 20 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 9292a976c9d6..7e5184cc9d47 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -436,28 +436,28 @@ private[hive] class HiveClientImpl( val totalSize = properties.get(StatsSetupConst.TOTAL_SIZE).map(BigInt(_)) val rawDataSize = properties.get(StatsSetupConst.RAW_DATA_SIZE).map(BigInt(_)) - def rowCount = properties.get(StatsSetupConst.ROW_COUNT).map(BigInt(_)) match { + lazy val rowCount = properties.get(StatsSetupConst.ROW_COUNT).map(BigInt(_)) match { case Some(c) if c >= 0 => Some(c) case _ => None } // TODO: check if this estimate is valid for tables after partition pruning. // NOTE: getting `totalSize` directly from params is kind of hacky, but this should be // relatively cheap if parameters for the table are populated into the metastore. - // Currently, only totalSize, rawDataSize, and row_count are used to build the field `stats` + // Currently, only totalSize, rawDataSize, and rowCount are used to build the field `stats` // TODO: stats should include all the other two fields (`numFiles` and `numPartitions`). // (see StatsSetupConst in Hive) val stats = - // When table is external, `totalSize` is always zero, which will influence join strategy - // so when `totalSize` is zero, use `rawDataSize` instead. When `rawDataSize` is also zero, - // return None. Later, we will use the other ways to estimate the statistics. - if (totalSize.isDefined && totalSize.get > 0L) { - Some(CatalogStatistics(sizeInBytes = totalSize.get, rowCount = rowCount)) - } else if (rawDataSize.isDefined && rawDataSize.get > 0) { - Some(CatalogStatistics(sizeInBytes = rawDataSize.get, rowCount = rowCount)) - } else { - // TODO: still fill the rowCount even if sizeInBytes is empty. Break some assumptions? - None - } + // When table is external, `totalSize` is always zero, which will influence join strategy + // so when `totalSize` is zero, use `rawDataSize` instead. When `rawDataSize` is also zero, + // return None. Later, we will use the other ways to estimate the statistics. + if (totalSize.isDefined && totalSize.get > 0L) { + Some(CatalogStatistics(sizeInBytes = totalSize.get, rowCount = rowCount)) + } else if (rawDataSize.isDefined && rawDataSize.get > 0) { + Some(CatalogStatistics(sizeInBytes = rawDataSize.get, rowCount = rowCount)) + } else { + // TODO: still fill the rowCount even if sizeInBytes is empty. Might break anything? + None + } CatalogTable( identifier = TableIdentifier(h.getTableName, Option(h.getDbName)), diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala index 26912ed1200d..fad81c7e9474 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala @@ -339,10 +339,6 @@ class ShowCreateTableSuite extends QueryTest with SQLTestUtils with TestHiveSing ignoredProperties = Map.empty ) } - - val e = normalize(actual) - val m = normalize(expected) - assert(normalize(actual) == normalize(expected)) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 5dfa98ceb377..1ecac83e5180 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -351,7 +351,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto createNonPartitionedTable(tabName, analyzedByHive = true, analyzedBySpark = analyzedBySpark) val fetchedStats1 = checkTableStats( tabName, hasSizeInBytes = true, expectedRowCounts = Some(500)) - sql(s"ALTER TABLE $tabName UNSET TBLPROPERTIES ('prop1')") + sql(s"ALTER TABLE $tabName UNSET TBLPROPERTIES ('totalSize')") val fetchedStats2 = checkTableStats( tabName, hasSizeInBytes = true, expectedRowCounts = Some(500)) assert(fetchedStats1 == fetchedStats2) @@ -418,8 +418,6 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto assert(stats2.get.sizeInBytes > stats3.get.sizeInBytes) sql(s"ALTER TABLE $managedTable ADD PARTITION (ds='2008-04-08', hr='12')") - assert(stats1 == stats2) - sql(s"ANALYZE TABLE $managedTable COMPUTE STATISTICS") val stats4 = checkTableStats( managedTable, hasSizeInBytes = true, expectedRowCounts = Some(1)) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala index 96e400939e36..98aa92a9bb88 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala @@ -193,6 +193,7 @@ abstract class HiveComparisonTest "last_modified_time", "Owner:", // The following are hive specific schema parameters which we do not need to match exactly. + "totalNumberFiles", "maxFileSize", "minFileSize" ) From 1e4182d1e03622cdcc84f6cd951b2c534289e78f Mon Sep 17 00:00:00 2001 From: Xiao Li Date: Thu, 18 May 2017 17:13:58 -0700 Subject: [PATCH 5/7] address comments. --- .../test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 1ecac83e5180..2c6c534a2218 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -351,7 +351,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto createNonPartitionedTable(tabName, analyzedByHive = true, analyzedBySpark = analyzedBySpark) val fetchedStats1 = checkTableStats( tabName, hasSizeInBytes = true, expectedRowCounts = Some(500)) - sql(s"ALTER TABLE $tabName UNSET TBLPROPERTIES ('totalSize')") + sql(s"ALTER TABLE $tabName UNSET TBLPROPERTIES ('prop1')") val fetchedStats2 = checkTableStats( tabName, hasSizeInBytes = true, expectedRowCounts = Some(500)) assert(fetchedStats1 == fetchedStats2) From 2048c970014b3688e60f102008e48849d573b88e Mon Sep 17 00:00:00 2001 From: Xiao Li Date: Mon, 22 May 2017 00:45:36 -0700 Subject: [PATCH 6/7] address comments. --- .../org/apache/spark/sql/hive/client/HiveClientImpl.scala | 5 +---- .../org/apache/spark/sql/hive/test/TestHiveSingleton.scala | 6 +++++- .../org/apache/spark/sql/hive/HiveDDLCommandSuite.scala | 1 - .../HiveExternalCatalogBackwardCompatibilitySuite.scala | 4 ---- .../apache/spark/sql/hive/MetastoreDataSourcesSuite.scala | 5 ----- .../scala/org/apache/spark/sql/hive/StatisticsSuite.scala | 6 ------ .../org/apache/spark/sql/hive/orc/OrcSourceSuite.scala | 1 - 7 files changed, 6 insertions(+), 22 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 7e5184cc9d47..b970be740ab5 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -436,10 +436,7 @@ private[hive] class HiveClientImpl( val totalSize = properties.get(StatsSetupConst.TOTAL_SIZE).map(BigInt(_)) val rawDataSize = properties.get(StatsSetupConst.RAW_DATA_SIZE).map(BigInt(_)) - lazy val rowCount = properties.get(StatsSetupConst.ROW_COUNT).map(BigInt(_)) match { - case Some(c) if c >= 0 => Some(c) - case _ => None - } + val rowCount = properties.get(StatsSetupConst.ROW_COUNT).map(BigInt(_)).filter(_ >= 0) // TODO: check if this estimate is valid for tables after partition pruning. // NOTE: getting `totalSize` directly from params is kind of hacky, but this should be // relatively cheap if parameters for the table are populated into the metastore. diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/test/TestHiveSingleton.scala b/sql/hive/src/test/java/org/apache/spark/sql/hive/test/TestHiveSingleton.scala index 9bf84ab1fb7a..df7988f542b7 100644 --- a/sql/hive/src/test/java/org/apache/spark/sql/hive/test/TestHiveSingleton.scala +++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/test/TestHiveSingleton.scala @@ -19,13 +19,17 @@ package org.apache.spark.sql.hive.test import org.scalatest.BeforeAndAfterAll -import org.apache.spark.sql.SparkSession import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.hive.HiveExternalCatalog +import org.apache.spark.sql.hive.client.HiveClient trait TestHiveSingleton extends SparkFunSuite with BeforeAndAfterAll { protected val spark: SparkSession = TestHive.sparkSession protected val hiveContext: TestHiveContext = TestHive + protected val hiveClient: HiveClient = + spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client protected override def afterAll(): Unit = { try { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala index 7584f1741c62..d97b11e447fe 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala @@ -646,7 +646,6 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle } test("SPARK-15887: hive-site.xml should be loaded") { - val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client assert(hiveClient.getConf("hive.in.test", "") == "true") } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala index 705d43f1f3ab..3bd3d0d6db35 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala @@ -35,10 +35,6 @@ import org.apache.spark.util.Utils class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest with SQLTestUtils with TestHiveSingleton with BeforeAndAfterEach { - // To test `HiveExternalCatalog`, we need to read/write the raw table meta from/to hive client. - val hiveClient: HiveClient = - spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client - val tempDir = Utils.createTempDir().getCanonicalFile val tempDirUri = tempDir.toURI val tempDirStr = tempDir.getAbsolutePath diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala index b55469481557..c785aca98582 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala @@ -52,11 +52,6 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv jsonFilePath = Utils.getSparkClassLoader.getResource("sample.json").getFile } - // To test `HiveExternalCatalog`, we need to read the raw table metadata(schema, partition - // columns and bucket specification are still in table properties) from hive client. - private def hiveClient: HiveClient = - sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client - test("persistent JSON table") { withTable("jsonTable") { sql( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 2c6c534a2218..5d52f8baa3b9 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -222,7 +222,6 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto tabName: String, analyzedBySpark: Boolean = true, analyzedByHive: Boolean = true): Unit = { - val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client sql( s""" |CREATE TABLE $tabName (key STRING, value STRING) @@ -277,7 +276,6 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto // ALTER TABLE SET TBLPROPERTIES invalidates some contents of Hive specific statistics // This is triggered by the Hive alterTable API - val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client val describeResult = hiveClient.runSqlHive(s"DESCRIBE FORMATTED $tabName") val rawDataSize = extractStatsPropValues(describeResult, "rawDataSize") @@ -303,7 +301,6 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto assert(fetchedStats1 == fetchedStats2) // ALTER TABLE RENAME does not affect the contents of Hive specific statistics - val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client val describeResult = hiveClient.runSqlHive(s"DESCRIBE FORMATTED $newName") val rawDataSize = extractStatsPropValues(describeResult, "rawDataSize") @@ -328,7 +325,6 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto tabName, hasSizeInBytes = true, expectedRowCounts = Some(500)) assert(fetchedStats1 == fetchedStats2) - val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client val describeResult = hiveClient.runSqlHive(s"DESCRIBE FORMATTED $tabName") val totalSize = extractStatsPropValues(describeResult, "totalSize") @@ -356,7 +352,6 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto tabName, hasSizeInBytes = true, expectedRowCounts = Some(500)) assert(fetchedStats1 == fetchedStats2) - val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client val describeResult = hiveClient.runSqlHive(s"DESCRIBE FORMATTED $tabName") val totalSize = extractStatsPropValues(describeResult, "totalSize") @@ -467,7 +462,6 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto sql(s"analyze table $tableName compute STATISTICS FOR COLUMNS " + stats.keys.mkString(", ")) // Validate statistics - val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client val table = hiveClient.getTable("default", tableName) val props = table.properties.filterKeys(_.startsWith("spark.sql.statistics.colStats")) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala index 6bfb88c0c1af..52fa401d32c1 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala @@ -153,7 +153,6 @@ abstract class OrcSuite extends QueryTest with TestHiveSingleton with BeforeAndA } test("SPARK-19459/SPARK-18220: read char/varchar column written by Hive") { - val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client val location = Utils.createTempDir() val uri = location.toURI try { From ea7abd41685da94ca1c984ecccacbff3fe594e58 Mon Sep 17 00:00:00 2001 From: Xiao Li Date: Mon, 22 May 2017 12:12:00 -0700 Subject: [PATCH 7/7] fix. --- .../org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala index 5afb37b382e6..97e4c2b6b2db 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala @@ -28,8 +28,6 @@ import org.apache.spark.sql.types.StructType * A test suite for Hive view related functionality. */ class HiveSQLViewSuite extends SQLViewSuite with TestHiveSingleton { - protected override val spark: SparkSession = TestHive.sparkSession - import testImplicits._ test("create a permanent/temp view using a hive, built-in, and permanent user function") {