Skip to content

Commit aa9859a

Browse files
committed
check null value count for only null value column
1 parent 7c7685c commit aa9859a

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,16 @@ object AggregateEstimation {
3939
// Multiply distinct counts of group-by columns. This is an upper bound, which assumes
4040
// the data contains all combinations of distinct values of group-by columns.
4141
var outputRows: BigInt = agg.groupingExpressions.foldLeft(BigInt(1))(
42-
(res, expr) => res *
43-
childStats.attributeStats(expr.asInstanceOf[Attribute]).distinctCount.get.max(BigInt(1)))
42+
(res, expr) => {
43+
val columnStat = childStats.attributeStats(expr.asInstanceOf[Attribute])
44+
val distinctValue: BigInt = if (columnStat.distinctCount.get == 0 &&
45+
columnStat.nullCount.get > 0) {
46+
1
47+
} else {
48+
columnStat.distinctCount.get
49+
}
50+
res * distinctValue
51+
})
4452

4553
outputRows = if (agg.groupingExpressions.isEmpty) {
4654
// If there's no group-by columns, the output is a single row containing values of aggregate

0 commit comments

Comments
 (0)