Skip to content

Commit 2a4ee99

Browse files
author
Zhenhua Wang
committed
fix comments
1 parent ad14a5e commit 2a4ee99

File tree

3 files changed

+48
-45
lines changed

3 files changed

+48
-45
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -214,24 +214,25 @@ object EstimationUtils {
214214
}
215215

216216
/**
217-
* Returns overlapped ranges between two histograms, in the given value range [newMin, newMax].
217+
* Returns overlapped ranges between two histograms, in the given value range
218+
* [lowerBound, upperBound].
218219
*/
219220
def getOverlappedRanges(
220221
leftHistogram: Histogram,
221222
rightHistogram: Histogram,
222-
newMin: Double,
223-
newMax: Double): Seq[OverlappedRange] = {
223+
lowerBound: Double,
224+
upperBound: Double): Seq[OverlappedRange] = {
224225
val overlappedRanges = new ArrayBuffer[OverlappedRange]()
225-
// Only bins whose range intersect [newMin, newMax] have join possibility.
226+
// Only bins whose range intersect [lowerBound, upperBound] have join possibility.
226227
val leftBins = leftHistogram.bins
227-
.filter(b => b.lo <= newMax && b.hi >= newMin)
228+
.filter(b => b.lo <= upperBound && b.hi >= lowerBound)
228229
val rightBins = rightHistogram.bins
229-
.filter(b => b.lo <= newMax && b.hi >= newMin)
230+
.filter(b => b.lo <= upperBound && b.hi >= lowerBound)
230231

231232
leftBins.foreach { lb =>
232233
rightBins.foreach { rb =>
233-
val (left, leftHeight) = trimBin(lb, leftHistogram.height, newMin, newMax)
234-
val (right, rightHeight) = trimBin(rb, rightHistogram.height, newMin, newMax)
234+
val (left, leftHeight) = trimBin(lb, leftHistogram.height, lowerBound, upperBound)
235+
val (right, rightHeight) = trimBin(rb, rightHistogram.height, lowerBound, upperBound)
235236
// Only collect overlapped ranges.
236237
if (left.lo <= right.hi && left.hi >= right.lo) {
237238
// Collect overlapped ranges.
@@ -259,9 +260,7 @@ object EstimationUtils {
259260
// Case3: the left bin is "smaller" than the right bin
260261
// left.lo right.lo left.hi right.hi
261262
// --------+------------------+------------+----------------+------->
262-
val leftRatio = (left.hi - right.lo) / (left.hi - left.lo)
263-
val rightRatio = (left.hi - right.lo) / (right.hi - right.lo)
264-
if (leftRatio == 0) {
263+
if (left.hi == right.lo) {
265264
// The overlapped range has only one value.
266265
OverlappedRange(
267266
lo = right.lo,
@@ -272,6 +271,8 @@ object EstimationUtils {
272271
rightNumRows = rightHeight / right.ndv
273272
)
274273
} else {
274+
val leftRatio = (left.hi - right.lo) / (left.hi - left.lo)
275+
val rightRatio = (left.hi - right.lo) / (right.hi - right.lo)
275276
OverlappedRange(
276277
lo = right.lo,
277278
hi = left.hi,
@@ -285,9 +286,7 @@ object EstimationUtils {
285286
// Case4: the left bin is "larger" than the right bin
286287
// right.lo left.lo right.hi left.hi
287288
// --------+------------------+------------+----------------+------->
288-
val leftRatio = (right.hi - left.lo) / (left.hi - left.lo)
289-
val rightRatio = (right.hi - left.lo) / (right.hi - right.lo)
290-
if (leftRatio == 0) {
289+
if (right.hi == left.lo) {
291290
// The overlapped range has only one value.
292291
OverlappedRange(
293292
lo = right.hi,
@@ -298,6 +297,8 @@ object EstimationUtils {
298297
rightNumRows = rightHeight / right.ndv
299298
)
300299
} else {
300+
val leftRatio = (right.hi - left.lo) / (left.hi - left.lo)
301+
val rightRatio = (right.hi - left.lo) / (right.hi - right.lo)
301302
OverlappedRange(
302303
lo = left.lo,
303304
hi = right.hi,
@@ -343,24 +344,26 @@ object EstimationUtils {
343344
}
344345

345346
/**
346-
* Given an original bin and a value range [min, max], returns the trimmed bin and its number of
347-
* rows.
347+
* Given an original bin and a value range [lowerBound, upperBound], returns the trimmed part
348+
* of the bin in that range and its number of rows.
348349
*/
349-
def trimBin(bin: HistogramBin, height: Double, min: Double, max: Double)
350+
def trimBin(bin: HistogramBin, height: Double, lowerBound: Double, upperBound: Double)
350351
: (HistogramBin, Double) = {
351-
val (lo, hi) = if (bin.lo <= min && bin.hi >= max) {
352-
// bin.lo min max bin.hi
352+
val (lo, hi) = if (bin.lo <= lowerBound && bin.hi >= upperBound) {
353+
// bin.lo lowerBound upperBound bin.hi
354+
// --------+------------------+------------+-------------+------->
355+
(lowerBound, upperBound)
356+
} else if (bin.lo <= lowerBound && bin.hi >= lowerBound) {
357+
// bin.lo lowerBound bin.hi upperBound
353358
// --------+------------------+------------+-------------+------->
354-
(min, max)
355-
} else if (bin.lo <= min && bin.hi >= min) {
356-
// bin.lo min bin.hi
357-
// --------+------------------+-----------+------->
358-
(min, bin.hi)
359-
} else if (bin.lo <= max && bin.hi >= max) {
360-
// bin.lo max bin.hi
361-
// --------+------------------+-----------+------->
362-
(bin.lo, max)
359+
(lowerBound, bin.hi)
360+
} else if (bin.lo <= upperBound && bin.hi >= upperBound) {
361+
// lowerBound bin.lo upperBound bin.hi
362+
// --------+------------------+------------+-------------+------->
363+
(bin.lo, upperBound)
363364
} else {
365+
// lowerBound bin.lo bin.hi upperBound
366+
// --------+------------------+------------+-------------+------->
364367
(bin.lo, bin.hi)
365368
}
366369

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,8 +245,8 @@ case class JoinEstimation(join: Join) extends Logging {
245245
leftHistogram = leftHistogram,
246246
rightHistogram = rightHistogram,
247247
// Only numeric values have equi-height histograms.
248-
newMin = newMin.get.toString.toDouble,
249-
newMax = newMax.get.toString.toDouble)
248+
lowerBound = newMin.get.toString.toDouble,
249+
upperBound = newMax.get.toString.toDouble)
250250

251251
var card: BigDecimal = 0
252252
var totalNdv: Double = 0

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/JoinEstimationSuite.scala

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -129,9 +129,9 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
129129
val histogram2 = Histogram(height = 100, Array(
130130
HistogramBin(lo = 0, hi = 50, ndv = 50), HistogramBin(lo = 50, hi = 100, ndv = 40)))
131131
// test bin trimming
132-
val (t0, h0) = trimBin(histogram2.bins(0), height = 100, min = 10, max = 60)
132+
val (t0, h0) = trimBin(histogram2.bins(0), height = 100, lowerBound = 10, upperBound = 60)
133133
assert(t0 == HistogramBin(lo = 10, hi = 50, ndv = 40) && h0 == 80)
134-
val (t1, h1) = trimBin(histogram2.bins(1), height = 100, min = 10, max = 60)
134+
val (t1, h1) = trimBin(histogram2.bins(1), height = 100, lowerBound = 10, upperBound = 60)
135135
assert(t1 == HistogramBin(lo = 50, hi = 60, ndv = 8) && h1 == 20)
136136

137137
val expectedRanges = Seq(
@@ -143,7 +143,7 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
143143
OverlappedRange(50, 60, 30*1/3, 8, 300*1/3, 20)
144144
)
145145
assert(expectedRanges.equals(
146-
getOverlappedRanges(histogram1, histogram2, newMin = 10D, newMax = 60D)))
146+
getOverlappedRanges(histogram1, histogram2, lowerBound = 10D, upperBound = 60D)))
147147

148148
estimateByHistogram(
149149
leftHistogram = histogram1,
@@ -162,9 +162,9 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
162162
val histogram2 = Histogram(height = 100, Array(
163163
HistogramBin(lo = 0, hi = 50, ndv = 50), HistogramBin(lo = 50, hi = 100, ndv = 40)))
164164
// test bin trimming
165-
val (t0, h0) = trimBin(histogram2.bins(0), height = 100, min = 50, max = 75)
165+
val (t0, h0) = trimBin(histogram2.bins(0), height = 100, lowerBound = 50, upperBound = 75)
166166
assert(t0 == HistogramBin(lo = 50, hi = 50, ndv = 1) && h0 == 2)
167-
val (t1, h1) = trimBin(histogram2.bins(1), height = 100, min = 50, max = 75)
167+
val (t1, h1) = trimBin(histogram2.bins(1), height = 100, lowerBound = 50, upperBound = 75)
168168
assert(t1 == HistogramBin(lo = 50, hi = 75, ndv = 20) && h1 == 50)
169169

170170
val expectedRanges = Seq(
@@ -176,7 +176,7 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
176176
OverlappedRange(60, 75, 3, 20*15/25, 300, 50*15/25)
177177
)
178178
assert(expectedRanges.equals(
179-
getOverlappedRanges(histogram1, histogram2, newMin = 50D, newMax = 75D)))
179+
getOverlappedRanges(histogram1, histogram2, lowerBound = 50D, upperBound = 75D)))
180180

181181
estimateByHistogram(
182182
leftHistogram = histogram1,
@@ -197,9 +197,9 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
197197
val histogram2 = Histogram(height = 100, Array(
198198
HistogramBin(lo = 0, hi = 50, ndv = 50), HistogramBin(lo = 50, hi = 100, ndv = 40)))
199199
// test bin trimming
200-
val (t0, h0) = trimBin(histogram2.bins(0), height = 100, min = 30, max = 60)
200+
val (t0, h0) = trimBin(histogram2.bins(0), height = 100, lowerBound = 30, upperBound = 60)
201201
assert(t0 == HistogramBin(lo = 30, hi = 50, ndv = 20) && h0 == 40)
202-
val (t1, h1) = trimBin(histogram2.bins(1), height = 100, min = 30, max = 60)
202+
val (t1, h1) = trimBin(histogram2.bins(1), height = 100, lowerBound = 30, upperBound = 60)
203203
assert(t1 ==HistogramBin(lo = 50, hi = 60, ndv = 8) && h1 == 20)
204204

205205
val expectedRanges = Seq(
@@ -209,7 +209,7 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
209209
OverlappedRange(50, 60, 30*1/3, 8, 300*1/3, 20)
210210
)
211211
assert(expectedRanges.equals(
212-
getOverlappedRanges(histogram1, histogram2, newMin = 30D, newMax = 60D)))
212+
getOverlappedRanges(histogram1, histogram2, lowerBound = 30D, upperBound = 60D)))
213213

214214
estimateByHistogram(
215215
leftHistogram = histogram1,
@@ -228,9 +228,9 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
228228
val histogram2 = Histogram(height = 100, Array(
229229
HistogramBin(lo = 0, hi = 50, ndv = 50), HistogramBin(lo = 50, hi = 50, ndv = 1)))
230230
// test bin trimming
231-
val (t0, h0) = trimBin(histogram1.bins(1), height = 300, min = 30, max = 50)
231+
val (t0, h0) = trimBin(histogram1.bins(1), height = 300, lowerBound = 30, upperBound = 50)
232232
assert(t0 == HistogramBin(lo = 30, hi = 50, ndv = 20) && h0 == 200)
233-
val (t1, h1) = trimBin(histogram2.bins(0), height = 100, min = 30, max = 50)
233+
val (t1, h1) = trimBin(histogram2.bins(0), height = 100, lowerBound = 30, upperBound = 50)
234234
assert(t1 == HistogramBin(lo = 30, hi = 50, ndv = 20) && h1 == 40)
235235

236236
val expectedRanges = Seq(
@@ -239,7 +239,7 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
239239
OverlappedRange(50, 50, 1, 1, 200/20, 100)
240240
)
241241
assert(expectedRanges.equals(
242-
getOverlappedRanges(histogram1, histogram2, newMin = 30D, newMax = 50D)))
242+
getOverlappedRanges(histogram1, histogram2, lowerBound = 30D, upperBound = 50D)))
243243

244244
estimateByHistogram(
245245
leftHistogram = histogram1,
@@ -258,9 +258,9 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
258258
val histogram2 = Histogram(height = 150, Array(
259259
HistogramBin(lo = 0, hi = 30, ndv = 30), HistogramBin(lo = 30, hi = 30, ndv = 1)))
260260
// test bin trimming
261-
val (t0, h0) = trimBin(histogram1.bins(1), height = 300, min = 30, max = 30)
261+
val (t0, h0) = trimBin(histogram1.bins(1), height = 300, lowerBound = 30, upperBound = 30)
262262
assert(t0 == HistogramBin(lo = 30, hi = 30, ndv = 1) && h0 == 10)
263-
val (t1, h1) = trimBin(histogram2.bins(0), height = 150, min = 30, max = 30)
263+
val (t1, h1) = trimBin(histogram2.bins(0), height = 150, lowerBound = 30, upperBound = 30)
264264
assert(t1 == HistogramBin(lo = 30, hi = 30, ndv = 1) && h1 == 5)
265265

266266
val expectedRanges = Seq(
@@ -270,7 +270,7 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
270270
OverlappedRange(30, 30, 1, 1, 10, 150)
271271
)
272272
assert(expectedRanges.equals(
273-
getOverlappedRanges(histogram1, histogram2, newMin = 30D, newMax = 30D)))
273+
getOverlappedRanges(histogram1, histogram2, lowerBound = 30D, upperBound = 30D)))
274274

275275
estimateByHistogram(
276276
leftHistogram = histogram1,

0 commit comments

Comments
 (0)