@@ -22,11 +22,16 @@ import org.apache.spark.sql.AnalysisException
2222import org .apache .spark .sql .catalyst .analysis ._
2323import org .apache .spark .sql .catalyst .expressions ._
2424import org .apache .spark .sql .catalyst .plans .QueryPlan
25+ import org .apache .spark .sql .catalyst .plans .logical .statsEstimation .LogicalPlanStats
2526import org .apache .spark .sql .catalyst .trees .CurrentOrigin
2627import org .apache .spark .sql .types .StructType
2728
2829
29- abstract class LogicalPlan extends QueryPlan [LogicalPlan ] with QueryPlanConstraints with Logging {
30+ abstract class LogicalPlan
31+ extends QueryPlan [LogicalPlan ]
32+ with LogicalPlanStats
33+ with QueryPlanConstraints
34+ with Logging {
3035
3136 private var _analyzed : Boolean = false
3237
@@ -80,40 +85,6 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with QueryPlanConstrai
8085 }
8186 }
8287
83- /** A cache for the estimated statistics, such that it will only be computed once. */
84- private var statsCache : Option [Statistics ] = None
85-
86- /**
87- * Returns the estimated statistics for the current logical plan node. Under the hood, this
88- * method caches the return value, which is computed based on the configuration passed in the
89- * first time. If the configuration changes, the cache can be invalidated by calling
90- * [[invalidateStatsCache() ]].
91- */
92- final def stats : Statistics = statsCache.getOrElse {
93- statsCache = Some (computeStats)
94- statsCache.get
95- }
96-
97- /** Invalidates the stats cache. See [[stats ]] for more information. */
98- final def invalidateStatsCache (): Unit = {
99- statsCache = None
100- children.foreach(_.invalidateStatsCache())
101- }
102-
103- /**
104- * Computes [[Statistics ]] for this plan. The default implementation assumes the output
105- * cardinality is the product of all child plan's cardinality, i.e. applies in the case
106- * of cartesian joins.
107- *
108- * [[LeafNode ]]s must override this.
109- */
110- protected def computeStats : Statistics = {
111- if (children.isEmpty) {
112- throw new UnsupportedOperationException (s " LeafNode $nodeName must implement statistics. " )
113- }
114- Statistics (sizeInBytes = children.map(_.stats.sizeInBytes).product)
115- }
116-
11788 override def verboseStringWithSuffix : String = {
11889 super .verboseString + statsCache.map(" , " + _.toString).getOrElse(" " )
11990 }
@@ -300,6 +271,9 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with QueryPlanConstrai
300271abstract class LeafNode extends LogicalPlan {
301272 override final def children : Seq [LogicalPlan ] = Nil
302273 override def producedAttributes : AttributeSet = outputSet
274+
275+ /** Leaf nodes that can survive analysis must define their own statistics. */
276+ def computeStats (): Statistics = throw new UnsupportedOperationException
303277}
304278
305279/**
@@ -331,23 +305,6 @@ abstract class UnaryNode extends LogicalPlan {
331305 }
332306
333307 override protected def validConstraints : Set [Expression ] = child.constraints
334-
335- override def computeStats : Statistics = {
336- // There should be some overhead in Row object, the size should not be zero when there is
337- // no columns, this help to prevent divide-by-zero error.
338- val childRowSize = child.output.map(_.dataType.defaultSize).sum + 8
339- val outputRowSize = output.map(_.dataType.defaultSize).sum + 8
340- // Assume there will be the same number of rows as child has.
341- var sizeInBytes = (child.stats.sizeInBytes * outputRowSize) / childRowSize
342- if (sizeInBytes == 0 ) {
343- // sizeInBytes can't be zero, or sizeInBytes of BinaryNode will also be zero
344- // (product of children).
345- sizeInBytes = 1
346- }
347-
348- // Don't propagate rowCount and attributeStats, since they are not estimated here.
349- Statistics (sizeInBytes = sizeInBytes, hints = child.stats.hints)
350- }
351308}
352309
353310/**
0 commit comments