Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,10 @@ case class AnalyzeTableCommand(
}

def updateTableStats(catalogTable: CatalogTable, newTotalSize: Long): Unit = {
val oldTotalSize = catalogTable.stats.map(_.sizeInBytes.toLong).getOrElse(0L)
val oldTotalSize = catalogTable.stats.map(_.sizeInBytes.toLong).getOrElse(-1L)
val oldRowCount = catalogTable.stats.flatMap(_.rowCount.map(_.toLong)).getOrElse(-1L)
var newStats: Option[Statistics] = None
if (newTotalSize > 0 && newTotalSize != oldTotalSize) {
if (newTotalSize >= 0 && newTotalSize != oldTotalSize) {
newStats = Some(Statistics(sizeInBytes = newTotalSize))
}
// We only set rowCount when noscan is false, because otherwise:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import scala.util.Random

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.execution.datasources.LogicalRelation
import org.apache.spark.sql.internal.StaticSQLConf
import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
import org.apache.spark.sql.test.SQLTestData.ArrayData
Expand All @@ -38,15 +37,20 @@ import org.apache.spark.sql.types._
class StatisticsCollectionSuite extends StatisticsCollectionTestBase with SharedSQLContext {
import testImplicits._

private def checkTableStats(tableName: String, expectedRowCount: Option[Int])
: Option[Statistics] = {
val df = spark.table(tableName)
val stats = df.queryExecution.analyzed.collect { case rel: LogicalRelation =>
assert(rel.catalogTable.get.stats.flatMap(_.rowCount) === expectedRowCount)
rel.catalogTable.get.stats
def checkTableStats(
tableName: String,
hasSizeInBytes: Boolean,
expectedRowCounts: Option[Int]): Option[Statistics] = {
val stats = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName)).stats
if (hasSizeInBytes || expectedRowCounts.nonEmpty) {
assert(stats.isDefined)
assert(stats.get.sizeInBytes >= 0)
assert(stats.get.rowCount === expectedRowCounts)
} else {
assert(stats.isEmpty)
}
assert(stats.size == 1)
stats.head

stats
}

test("estimates the size of a limit 0 on outer join") {
Expand Down Expand Up @@ -86,6 +90,19 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
}
}

test("analyze empty table") {
val table = "emptyTable"
withTable(table) {
sql(s"CREATE TABLE $table (key STRING, value STRING) USING PARQUET")
sql(s"ANALYZE TABLE $table COMPUTE STATISTICS noscan")
val fetchedStats1 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = None)
assert(fetchedStats1.get.sizeInBytes == 0)
sql(s"ANALYZE TABLE $table COMPUTE STATISTICS")
val fetchedStats2 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = Some(0))
assert(fetchedStats2.get.sizeInBytes == 0)
}
}

test("test table-level statistics for data source table") {
val tableName = "tbl"
withTable(tableName) {
Expand All @@ -94,11 +111,11 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared

// noscan won't count the number of rows
sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS noscan")
checkTableStats(tableName, expectedRowCount = None)
checkTableStats(tableName, hasSizeInBytes = true, expectedRowCounts = None)

// without noscan, we count the number of rows
sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS")
checkTableStats(tableName, expectedRowCount = Some(2))
checkTableStats(tableName, hasSizeInBytes = true, expectedRowCounts = Some(2))
}
}

Expand Down