Skip to content

Commit b850bfd

Browse files
committed
address comments
1 parent 4fce26e commit b850bfd

File tree

3 files changed

+18
-29
lines changed

3 files changed

+18
-29
lines changed

common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,13 @@ private static long optimalNumOfBits(long n, double p) {
120120
return (long) (-n * Math.log(p) / (Math.log(2) * Math.log(2)));
121121
}
122122

123+
static final double DEFAULT_FPP = 0.03;
124+
123125
/**
124-
* Creates a {@link BloomFilter} with given {@code expectedNumItems} and a default 3% {@code fpp}.
126+
* Creates a {@link BloomFilter} with given {@code expectedNumItems} and the default {@code fpp}.
125127
*/
126128
public static BloomFilter create(long expectedNumItems) {
127-
return create(expectedNumItems, 0.03);
129+
return create(expectedNumItems, DEFAULT_FPP);
128130
}
129131

130132
/**

common/sketch/src/test/scala/org/apache/spark/util/sketch/BitArraySuite.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class BitArraySuite extends FunSuite { // scalastyle:ignore funsuite
5151
val bitArray = new BitArray(320)
5252
val indexes = (1 to 100).map(_ => r.nextInt(320).toLong).distinct
5353

54-
indexes.map(bitArray.set)
54+
indexes.foreach(bitArray.set)
5555
indexes.foreach(i => assert(bitArray.get(i)))
5656
assert(bitArray.cardinality() == indexes.length)
5757
}
@@ -66,8 +66,8 @@ class BitArraySuite extends FunSuite { // scalastyle:ignore funsuite
6666
val indexes1 = (1 to 100).map(_ => r.nextInt(64 * 6).toLong).distinct
6767
val indexes2 = (1 to 100).map(_ => r.nextInt(64 * 6).toLong).distinct
6868

69-
indexes1.map(bitArray1.set)
70-
indexes2.map(bitArray2.set)
69+
indexes1.foreach(bitArray1.set)
70+
indexes2.foreach(bitArray2.set)
7171

7272
bitArray1.putAll(bitArray2)
7373
indexes1.foreach(i => assert(bitArray1.get(i)))

common/sketch/src/test/scala/org/apache/spark/util/sketch/BloomFilterSuite.scala

Lines changed: 11 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import scala.util.Random
2323
import org.scalatest.FunSuite // scalastyle:ignore funsuite
2424

2525
class BloomFilterSuite extends FunSuite { // scalastyle:ignore funsuite
26+
private final val EPSILON = 0.01
2627

2728
def testAccuracy[T: ClassTag](typeName: String, numItems: Int)(itemGen: Random => T): Unit = {
2829
test(s"accuracy - $typeName") {
@@ -36,33 +37,20 @@ class BloomFilterSuite extends FunSuite { // scalastyle:ignore funsuite
3637
val filter = BloomFilter.create(numInsertion, fpp)
3738

3839
// insert first `numInsertion` items.
39-
var i = 0
40-
while (i < numInsertion) {
41-
filter.put(allItems(i))
42-
i += 1
43-
}
44-
45-
i = 0
46-
while (i < numInsertion) {
47-
// false negative is not allowed.
48-
assert(filter.mightContain(allItems(i)))
49-
i += 1
50-
}
40+
allItems.take(numInsertion).foreach(filter.put)
41+
42+
// false negative is not allowed.
43+
assert(allItems.take(numInsertion).forall(filter.mightContain))
5144

5245
// The number of inserted items doesn't exceed `expectedNumItems`, so the `expectedFpp`
5346
// should not be significantly higher than the one we passed in to create this bloom filter.
54-
assert(filter.expectedFpp() - fpp < 0.001)
47+
assert(filter.expectedFpp() - fpp < EPSILON)
5548

56-
var errorCount = 0
57-
while (i < numItems) {
58-
if (filter.mightContain(allItems(i))) errorCount += 1
59-
i += 1
60-
}
49+
val errorCount = allItems.drop(numInsertion).count(filter.mightContain)
6150

6251
// Also check the actual fpp is not significantly higher than we expected.
6352
val actualFpp = errorCount.toDouble / (numItems - numInsertion)
64-
// Skip error count that is too small.
65-
assert(errorCount < 50 || actualFpp - fpp < 0.001)
53+
assert(actualFpp - fpp < EPSILON)
6654
}
6755
}
6856

@@ -83,9 +71,8 @@ class BloomFilterSuite extends FunSuite { // scalastyle:ignore funsuite
8371
filter1.mergeInPlace(filter2)
8472

8573
// After merge, `filter1` has `numItems` items which doesn't exceed `expectedNumItems`, so the
86-
// `expectedFpp` should not be significantly higher than the default one: 3%
87-
// Skip byte type as it has too little distinct values.
88-
assert(typeName == "Byte" || 0.03 - filter1.expectedFpp() < 0.001)
74+
// `expectedFpp` should not be significantly higher than the default one.
75+
assert(filter1.expectedFpp() - BloomFilter.DEFAULT_FPP < EPSILON)
8976

9077
items1.foreach(i => assert(filter1.mightContain(i)))
9178
items2.foreach(i => assert(filter1.mightContain(i)))
@@ -97,7 +84,7 @@ class BloomFilterSuite extends FunSuite { // scalastyle:ignore funsuite
9784
testMergeInPlace[T](typeName, numItems)(itemGen)
9885
}
9986

100-
testItemType[Byte]("Byte", 200) { _.nextInt().toByte }
87+
testItemType[Byte]("Byte", 160) { _.nextInt().toByte }
10188

10289
testItemType[Short]("Short", 1000) { _.nextInt().toShort }
10390

0 commit comments

Comments
 (0)