@@ -22,9 +22,9 @@ import scala.collection.mutable.ArrayBuffer
2222import org .apache .spark .util .SizeEstimator
2323
2424/**
25- * A general interface for collections that keeps track of its estimated size in bytes.
25+ * A general interface for collections to keep track of their estimated sizes in bytes.
2626 * We sample with a slow exponential back-off using the SizeEstimator to amortize the time,
27- * as each call to SizeEstimator can take a sizable amount of time (order of a few milliseconds).
27+ * as each call to SizeEstimator is somewhat expensive (order of a few milliseconds).
2828 */
2929private [spark] trait SizeTracker {
3030
@@ -36,7 +36,7 @@ private[spark] trait SizeTracker {
3636 */
3737 private val SAMPLE_GROWTH_RATE = 1.1
3838
39- /** All samples taken since last resetSamples(). Only the last two are used for extrapolation. */
39+ /** Samples taken since last resetSamples(). Only the last two are kept for extrapolation. */
4040 private val samples = new ArrayBuffer [Sample ]
4141
4242 /** The average number of bytes per update between our last two samples. */
@@ -50,23 +50,30 @@ private[spark] trait SizeTracker {
5050
5151 resetSamples()
5252
53- /** Called after the collection undergoes a dramatic change in size. */
53+ /**
54+ * Reset samples collected so far.
55+ * This should be called after the collection undergoes a dramatic change in size.
56+ */
5457 protected def resetSamples (): Unit = {
5558 numUpdates = 1
5659 nextSampleNum = 1
5760 samples.clear()
5861 takeSample()
5962 }
6063
61- /** Callback to be invoked after an update. */
64+ /**
65+ * Callback to be invoked after every update.
66+ */
6267 protected def afterUpdate (): Unit = {
6368 numUpdates += 1
6469 if (nextSampleNum == numUpdates) {
6570 takeSample()
6671 }
6772 }
6873
69- /** Takes a new sample of the current collection's size. */
74+ /**
75+ * Take a new sample of the current collection's size.
76+ */
7077 private def takeSample (): Unit = {
7178 samples += Sample (SizeEstimator .estimate(this ), numUpdates)
7279 // Only use the last two samples to extrapolate
@@ -83,7 +90,9 @@ private[spark] trait SizeTracker {
8390 nextSampleNum = math.ceil(numUpdates * SAMPLE_GROWTH_RATE ).toLong
8491 }
8592
86- /** Estimates the current size of the collection in bytes. O(1) time. */
93+ /**
94+ * Estimate the current size of the collection in bytes. O(1) time.
95+ */
8796 def estimateSize (): Long = {
8897 assert(samples.nonEmpty)
8998 val extrapolatedDelta = bytesPerUpdate * (numUpdates - samples.last.numUpdates)
0 commit comments