Skip to content

Commit 5d6e559

Browse files
author
Zhenhua Wang
committed
add test cases
1 parent b946df0 commit 5d6e559

File tree

2 files changed

+21
-2
lines changed

2 files changed

+21
-2
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,9 @@ case class AnalyzeTableCommand(tableName: String, noscan: Boolean = true) extend
103103
if (newTotalSize > 0 && newTotalSize != oldTotalSize) {
104104
newStats = Some(Statistics(sizeInBytes = newTotalSize))
105105
}
106-
// We only set rowCount when noscan is false, because otherwise we can't know whether the
107-
// row count we get (`oldRowCount`) is valid or not.
106+
// We only set rowCount when noscan is false, because otherwise:
107+
// 1. when total size is not changed, we don't need to alter the table;
108+
// 2. when total size is changed, `oldRowCount` becomes invalid.
108109
// This is to make sure that we only record the right statistics.
109110
if (!noscan) {
110111
val newRowCount = Dataset.ofRows(sparkSession, relation).count()

sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import org.apache.spark.sql.execution.joins._
3030
import org.apache.spark.sql.hive.test.TestHiveSingleton
3131
import org.apache.spark.sql.internal.SQLConf
3232
import org.apache.spark.sql.test.SQLTestUtils
33+
import org.apache.spark.sql.types.StructType
3334

3435
class StatisticsSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
3536

@@ -220,6 +221,11 @@ class StatisticsSuite extends QueryTest with TestHiveSingleton with SQLTestUtils
220221
checkMetastoreRelationStats(textTable, expectedStats =
221222
Some(Statistics(sizeInBytes = 5812, rowCount = Some(500))))
222223

224+
sql(s"ANALYZE TABLE $textTable COMPUTE STATISTICS noscan")
225+
// when the total size is not changed, the old row count is kept
226+
checkMetastoreRelationStats(textTable, expectedStats =
227+
Some(Statistics(sizeInBytes = 5812, rowCount = Some(500))))
228+
223229
sql(s"INSERT INTO TABLE $textTable SELECT * FROM src")
224230
sql(s"ANALYZE TABLE $textTable COMPUTE STATISTICS noscan")
225231
// update total size and remove the old and invalid row count
@@ -301,6 +307,18 @@ class StatisticsSuite extends QueryTest with TestHiveSingleton with SQLTestUtils
301307
}
302308
}
303309

310+
test("statistics collection of a table with zero column") {
311+
val table_no_cols = "table_no_cols"
312+
withTable(table_no_cols) {
313+
val rddNoCols = sparkContext.parallelize(1 to 10).map(_ => Row.empty)
314+
val dfNoCols = spark.createDataFrame(rddNoCols, StructType(Seq.empty))
315+
dfNoCols.write.format("json").saveAsTable(table_no_cols)
316+
sql(s"ANALYZE TABLE $table_no_cols COMPUTE STATISTICS")
317+
checkLogicalRelationStats(table_no_cols, expectedStats =
318+
Some(Statistics(sizeInBytes = 30, rowCount = Some(10))))
319+
}
320+
}
321+
304322
test("estimates the size of a test MetastoreRelation") {
305323
val df = sql("""SELECT * FROM src""")
306324
val sizes = df.queryExecution.analyzed.collect { case mr: MetastoreRelation =>

0 commit comments

Comments
 (0)