Skip to content

Commit ffd1f59

Browse files
daviespwendell
authored andcommitted
[SPARK-2887] fix bug of countApproxDistinct() when have more than one partition
fix bug of countApproxDistinct() when have more than one partition Author: Davies Liu <[email protected]> Closes #1812 from davies/approx and squashes the following commits: bf757ce [Davies Liu] fix bug of countApproxDistinct() when have more than one partition
1 parent a263a7e commit ffd1f59

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

core/src/main/scala/org/apache/spark/rdd/RDD.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1004,7 +1004,7 @@ abstract class RDD[T: ClassTag](
10041004
},
10051005
(h1: HyperLogLogPlus, h2: HyperLogLogPlus) => {
10061006
h1.addAll(h2)
1007-
h2
1007+
h1
10081008
}).cardinality()
10091009
}
10101010

core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,11 @@ class RDDSuite extends FunSuite with SharedSparkContext {
8181

8282
def error(est: Long, size: Long) = math.abs(est - size) / size.toDouble
8383

84-
val size = 100
85-
val uniformDistro = for (i <- 1 to 100000) yield i % size
86-
val simpleRdd = sc.makeRDD(uniformDistro)
87-
assert(error(simpleRdd.countApproxDistinct(4, 0), size) < 0.4)
88-
assert(error(simpleRdd.countApproxDistinct(8, 0), size) < 0.1)
84+
val size = 1000
85+
val uniformDistro = for (i <- 1 to 5000) yield i % size
86+
val simpleRdd = sc.makeRDD(uniformDistro, 10)
87+
assert(error(simpleRdd.countApproxDistinct(8, 0), size) < 0.2)
88+
assert(error(simpleRdd.countApproxDistinct(12, 0), size) < 0.1)
8989
}
9090

9191
test("SparkContext.union") {

0 commit comments

Comments
 (0)