Skip to content

Commit a92fe50

Browse files
committed
[SPARK-7844] Fix broken tests in KernelDensity
The densities in KernelDensity are scaled down by (number of parallel processes X number of points). This results in broken tests in KernelDensitySuite which haven't been tested properly.
1 parent 4e5220c commit a92fe50

File tree

2 files changed

+12
-10
lines changed

2 files changed

+12
-10
lines changed

mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -86,20 +86,20 @@ class KernelDensity extends Serializable {
8686
val n = points.length
8787
// This gets used in each Gaussian PDF computation, so compute it up front
8888
val logStandardDeviationPlusHalfLog2Pi = math.log(bandwidth) + 0.5 * math.log(2 * math.Pi)
89-
val (densities, count) = sample.aggregate((new Array[Double](n), 0L))(
89+
val densities = sample.aggregate(new Array[Double](n))(
9090
(x, y) => {
9191
var i = 0
9292
while (i < n) {
93-
x._1(i) += normPdf(y, bandwidth, logStandardDeviationPlusHalfLog2Pi, points(i))
93+
x(i) += normPdf(y, bandwidth, logStandardDeviationPlusHalfLog2Pi, points(i))
9494
i += 1
9595
}
96-
(x._1, n)
96+
x
9797
},
9898
(x, y) => {
99-
blas.daxpy(n, 1.0, y._1, 1, x._1, 1)
100-
(x._1, x._2 + y._2)
99+
blas.daxpy(n, 1.0, y, 1, x, 1)
100+
x
101101
})
102-
blas.dscal(n, 1.0 / count, densities, 1)
102+
blas.dscal(n, 1.0 / sample.count, densities, 1)
103103
densities
104104
}
105105
}

mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ class KernelDensitySuite extends FunSuite with MLlibTestSparkContext {
2929
val densities = new KernelDensity().setSample(rdd).setBandwidth(3.0).estimate(evaluationPoints)
3030
val normal = new NormalDistribution(5.0, 3.0)
3131
val acceptableErr = 1e-6
32-
assert(densities(0) - normal.density(5.0) < acceptableErr)
33-
assert(densities(0) - normal.density(6.0) < acceptableErr)
32+
assert(Math.abs(densities(0) - normal.density(5.0)) < acceptableErr)
33+
assert(Math.abs(densities(1) - normal.density(6.0)) < acceptableErr)
3434
}
3535

3636
test("kernel density multiple samples") {
@@ -40,7 +40,9 @@ class KernelDensitySuite extends FunSuite with MLlibTestSparkContext {
4040
val normal1 = new NormalDistribution(5.0, 3.0)
4141
val normal2 = new NormalDistribution(10.0, 3.0)
4242
val acceptableErr = 1e-6
43-
assert(densities(0) - (normal1.density(5.0) + normal2.density(5.0)) / 2 < acceptableErr)
44-
assert(densities(0) - (normal1.density(6.0) + normal2.density(6.0)) / 2 < acceptableErr)
43+
assert(Math.abs(
44+
densities(0) - (normal1.density(5.0) + normal2.density(5.0)) / 2) < acceptableErr)
45+
assert(Math.abs(
46+
densities(1) - (normal1.density(6.0) + normal2.density(6.0)) / 2) < acceptableErr)
4547
}
4648
}

0 commit comments

Comments
 (0)