From f1708c97533f6a89ade7fca897678d2ccff5ca36 Mon Sep 17 00:00:00 2001 From: Erik van Oosten Date: Mon, 13 Apr 2015 14:13:37 +0200 Subject: [PATCH 1/2] Fix for sum on empty RDD fails with exception (SPARK-6878) --- .../scala/org/apache/spark/rdd/DoubleRDDFunctions.scala | 2 +- .../test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala index 29ca3e9c4bd0..843a893235e5 100644 --- a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala @@ -31,7 +31,7 @@ import org.apache.spark.util.StatCounter class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable { /** Add up the elements in this RDD. */ def sum(): Double = { - self.reduce(_ + _) + self.fold(0.0)(_ + _) } /** diff --git a/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala index 97079382c716..4b8842946066 100644 --- a/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala @@ -18,10 +18,17 @@ package org.apache.spark.rdd import org.scalatest.FunSuite +import org.scalatest.Matchers._ import org.apache.spark._ class DoubleRDDSuite extends FunSuite with SharedSparkContext { + test("sum") { + sc.parallelize(Seq.empty[Double]).sum() should be(0.0 +- 0.0001) + sc.parallelize(Seq(1.0)).sum() should be(1.0 +- 0.0001) + sc.parallelize(Seq(1.0, 2.0)).sum() should be(3.0 +- 0.0001) + } + // Verify tests on the histogram functionality. We test with both evenly // and non-evenly spaced buckets as the bucket lookup function changes. test("WorksOnEmpty") { From 1c9195415d5238a8ce067e27440193ff3f831706 Mon Sep 17 00:00:00 2001 From: Erik van Oosten Date: Mon, 13 Apr 2015 14:58:12 +0200 Subject: [PATCH 2/2] Rewrote double range matcher to an exact equality assert (SPARK-6878) --- .../test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala index 4b8842946066..01039b9449da 100644 --- a/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala @@ -18,15 +18,14 @@ package org.apache.spark.rdd import org.scalatest.FunSuite -import org.scalatest.Matchers._ import org.apache.spark._ class DoubleRDDSuite extends FunSuite with SharedSparkContext { test("sum") { - sc.parallelize(Seq.empty[Double]).sum() should be(0.0 +- 0.0001) - sc.parallelize(Seq(1.0)).sum() should be(1.0 +- 0.0001) - sc.parallelize(Seq(1.0, 2.0)).sum() should be(3.0 +- 0.0001) + assert(sc.parallelize(Seq.empty[Double]).sum() === 0.0) + assert(sc.parallelize(Seq(1.0)).sum() === 1.0) + assert(sc.parallelize(Seq(1.0, 2.0)).sum() === 3.0) } // Verify tests on the histogram functionality. We test with both evenly