From 23d12d1c8b52220242369d01bc60e3157efd347f Mon Sep 17 00:00:00 2001
From: Robert Dodier <robert_dodier@users.sourceforge.net>
Date: Fri, 13 Mar 2015 10:58:04 -0700
Subject: [PATCH 1/7] Initial, incomplete work towards calibration for binary
 classifiers.

 o ProbabilisticClassifier.scala:
    mention calibration in comments

 o BinaryClassificationMetrics.scala:
    adapting code for ROC to calibration; incomplete and commented
    out for now

 o BinaryClassificationMetricsSuite.scala:
    tests for calibration
---
 .../ProbabilisticClassifier.scala             |   2 +
 .../BinaryClassificationMetrics.scala         | 102 ++++++++++++++++--
 .../BinaryClassificationMetricsSuite.scala    |  31 +++++-
 3 files changed, 123 insertions(+), 12 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
index fdd1851ae550..071d05e1b1de 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
@@ -177,6 +177,8 @@ abstract class ProbabilisticClassificationModel[
    * Predict the probability of each class given the features.
    * These predictions are also called class conditional probabilities.
    *
+   * See BinaryClassificationMetrics.calibration to assess calibration.
+   *
    * This internal method is used to implement [[transform()]] and output [[probabilityCol]].
    *
    * @return Estimated class conditional probabilities
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
index 508fe532b130..dcdf0477cad6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
@@ -29,14 +29,14 @@ import org.apache.spark.sql.DataFrame
  * Evaluator for binary classification.
  *
  * @param scoreAndLabels an RDD of (score, label) pairs.
- * @param numBins if greater than 0, then the curves (ROC curve, PR curve) computed internally
- *                will be down-sampled to this many "bins". If 0, no down-sampling will occur.
- *                This is useful because the curve contains a point for each distinct score
- *                in the input, and this could be as large as the input itself -- millions of
- *                points or more, when thousands may be entirely sufficient to summarize
- *                the curve. After down-sampling, the curves will instead be made of approximately
- *                `numBins` points instead. Points are made from bins of equal numbers of
- *                consecutive points. The size of each bin is
+ * @param numBins if greater than 0, then the curves (ROC curve, PR curve, calibration curve)
+ *                computed internally will be down-sampled to this many "bins". If 0, no
+ *                down-sampling will occur. This is useful because the curve contains a point for
+ *                each distinct score in the input, and this could be as large as the input itself
+ *                -- millions of points or more, when thousands may be entirely sufficient to
+ *                summarize the curve. After down-sampling, the curves will instead be made of
+ *                approximately `numBins` points instead. Points are made from bins of equal
+ *                numbers of consecutive points. The size of each bin is
  *                `floor(scoreAndLabels.count() / numBins)`, which means the resulting number
  *                of bins may not exactly equal numBins. The last bin in each partition may
  *                be smaller as a result, meaning there may be an extra sample at
@@ -226,4 +226,90 @@ class BinaryClassificationMetrics @Since("1.3.0") (
       (x(c), y(c))
     }
   }
+
+  /**
+   * Returns the calibration or reliability curve,
+   * which is an RDD of (average score in bin, fraction of positive examples in bin).
+   * @see http://en.wikipedia.org/wiki/Calibration_%28statistics%29#In_classification
+   *
+   * References:
+   *
+   * Mahdi Pakdaman Naeini, Gregory F. Cooper, Milos Hauskrecht.
+   * Binary Classifier Calibration: Non-parametric approach.
+   * http://arxiv.org/abs/1401.3390
+   *
+   * Alexandru Niculescu-Mizil, Rich Caruana.
+   * Predicting Good Probabilities With Supervised Learning.
+   * Appearing in Proceedings of the 22nd International Conference on Machine Learning,
+   * Bonn, Germany, 2005.
+   * http://www.cs.cornell.edu/~alexn/papers/calibration.icml05.crc.rev3.pdf
+   *
+   * Properties and benefits of calibrated classifiers.
+   * Ira Cohen, Moises Goldszmidt.
+   * http://www.hpl.hp.com/techreports/2004/HPL-2004-22R1.pdf
+   */
+  def calibration(): RDD[((Double, Double), (Double, Int))] = {
+    val calibrationCurve = assessedCalibration
+    val sc = confusions.context
+    sc.makeRDD(calibrationCurve, 1)
+  }
+  
+  private lazy val assessedCalibration: Seq[((Double, Double), (Double, Int))] = {
+//  val distinctScoreAndLabels = scoreAndLabels.combineByKey(
+//    createCombiner = (label: Double) => new BinaryLabelCounter(0L, 0L) += label,
+//    mergeValue = (c: BinaryLabelCounter, label: Double) => c += label,
+//    mergeCombiners = (c1: BinaryLabelCounter, c2: BinaryLabelCounter) => c1 += c2
+//  ).sortByKey(ascending = true)
+//
+//  val binnedCounts =
+//    if (numBins == 0) {
+//      counts
+//    } else {
+//      val countsSize = counts.count()
+//
+//      var grouping = countsSize / numBins
+//      if (grouping < 2) {
+//        logInfo(s"Curve is too small ($countsSize) for $numBins bins to be useful")
+//        counts
+//      } else {
+//        if (grouping >= Int.MaxValue) {
+//          logWarning(
+//            s"Curve too large ($countsSize) for $numBins bins; capping at ${Int.MaxValue}")
+//          grouping = Int.MaxValue
+//        }
+//        counts.mapPartitions(_.grouped(grouping.toInt).map { pairs =>
+//          // The score of the combined point will be just the first one's score
+//          // I THINK WE WANT THE AVERAGE OF SCORE OVER THE BIN HERE
+//          val firstScore = pairs.head._1
+//          // The point will contain all counts in this chunk
+//          val agg = new BinaryLabelCounter()
+//          pairs.foreach(pair => agg += pair._2)
+//          (firstScore, agg)
+//        })
+//      }
+//    }
+//
+//  val agg = binnedCounts.values.mapPartitions { iter =>
+//    val agg = new BinaryLabelCounter()
+//    iter.foreach(agg += _)
+//    Iterator(agg)
+//  }.collect()
+//  val partitionwiseCumulativeCounts =
+//    agg.scanLeft(new BinaryLabelCounter())(
+//      (agg: BinaryLabelCounter, c: BinaryLabelCounter) => agg.clone() += c)
+//  val totalCount = partitionwiseCumulativeCounts.last
+//  logInfo(s"Total counts: $totalCount")
+//  // WE WANT PER-BIN COUNTS HERE, NOT CUMULATIVE
+//  val correctCounts = binnedCounts.mapPartitionsWithIndex(
+//    (index: Int, iter: Iterator[(Double, BinaryLabelCounter)]) => {
+//      val cumCount = partitionwiseCumulativeCounts(index)
+//      iter.map { case (score, c) =>
+//        cumCount += c
+//        (score, cumCount.clone())
+//      }
+//    }, preservesPartitioning = true)
+//  correctCounts.persist()
+//  correctCounts
+    Seq(((0.0, 0.0), (0.0, 0)), ((1.0, 1.0), (1.0, 0)))
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
index 99d52fabc530..bf40b143cc20 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
@@ -28,6 +28,10 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark
   private def pairsWithinEpsilon(x: ((Double, Double), (Double, Double))): Boolean =
     (x._1._1 ~= x._2._1 absTol 1E-5) && (x._1._2 ~= x._2._2 absTol 1E-5)
 
+  private def pairPairsWithinEpsilon(x: (((Double, Double), (Double, Int)), ((Double, Double), (Double, Int)))): Boolean =
+    (x._1._1._1 ~= x._2._1._1 absTol 1E-5) && (x._1._1._2 ~= x._2._1._2 absTol 1E-5) &&
+      (x._1._2._1 ~= x._2._2._1 absTol 1E-5) && x._1._2._2 == x._2._2._2
+
   private def assertSequencesMatch(left: Seq[Double], right: Seq[Double]): Unit = {
       assert(left.zip(right).forall(areWithinEpsilon))
   }
@@ -37,6 +41,11 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark
     assert(left.zip(right).forall(pairsWithinEpsilon))
   }
 
+  private def assertTupleTupleSequencesMatch(left: Seq[((Double, Double), (Double, Int))],
+       right: Seq[((Double, Double), (Double, Int))]): Unit = {
+    assert(left.zip(right).forall(pairPairsWithinEpsilon))
+  }
+
   private def validateMetrics(metrics: BinaryClassificationMetrics,
       expectedThresholds: Seq[Double],
       expectedROCCurve: Seq[(Double, Double)],
@@ -44,7 +53,8 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark
       expectedFMeasures1: Seq[Double],
       expectedFmeasures2: Seq[Double],
       expectedPrecisions: Seq[Double],
-      expectedRecalls: Seq[Double]) = {
+      expectedRecalls: Seq[Double],
+      expectedCalibration: Seq[((Double, Double), (Double, Int))]) = {
 
     assertSequencesMatch(metrics.thresholds().collect(), expectedThresholds)
     assertTupleSequencesMatch(metrics.roc().collect(), expectedROCCurve)
@@ -59,6 +69,7 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark
       expectedThresholds.zip(expectedPrecisions))
     assertTupleSequencesMatch(metrics.recallByThreshold().collect(),
       expectedThresholds.zip(expectedRecalls))
+    assertTupleTupleSequencesMatch(metrics.calibration().collect(), expectedCalibration)
   }
 
   test("binary evaluation metrics") {
@@ -80,8 +91,11 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark
     val prCurve = Seq((0.0, 1.0)) ++ pr
     val f1 = pr.map { case (r, p) => 2.0 * (p * r) / (p + r)}
     val f2 = pr.map { case (r, p) => 5.0 * (p * r) / (4.0 * p + r)}
+    val calibration = Seq(((0.1, 0.1), (0.5, 2)), ((0.4, 0.4), (0.0, 1)), ((0.6, 0.6), (2/3.0, 3)),
+      ((0.8, 0.8), (1.0, 1)))
 
-    validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls)
+    validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls,
+      calibration)
   }
 
   test("binary evaluation metrics for RDD where all examples have positive label") {
@@ -97,8 +111,10 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark
     val prCurve = Seq((0.0, 1.0)) ++ pr
     val f1 = pr.map { case (r, p) => 2.0 * (p * r) / (p + r)}
     val f2 = pr.map { case (r, p) => 5.0 * (p * r) / (4.0 * p + r)}
+    val calibration = Seq(((0.5, 0.5), (1.0, 2)))
 
-    validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls)
+    validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls,
+      calibration)
   }
 
   test("binary evaluation metrics for RDD where all examples have negative label") {
@@ -121,7 +137,10 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark
       case (r, p) => 5.0 * (p * r) / (4.0 * p + r)
     }
 
-    validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls)
+    val calibration = Seq(((0.5, 0.5), (0.0, 2)))
+
+    validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls,
+      calibration)
   }
 
   test("binary evaluation metrics with downsampling") {
@@ -157,6 +176,10 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark
         (1.0, 1.0), (1.0, 1.0)
       ) ==
       downsampledROC)
+
+    val calibration = Seq(((0.1, 0.2), (0.0, 2)), ((0.3, 0.4), (0.5, 2)), ((0.5, 0.6), (0.5, 2)),
+      ((0.7, 0.9), (2/3.0, 3)))
+    assert(calibration == downsampled.calibration().collect())
   }
 
 }

From 1df8619c2112f3c4cc1554c86b901599abebf11f Mon Sep 17 00:00:00 2001
From: Robert Dodier <robert_dodier@users.sourceforge.net>
Date: Fri, 13 Mar 2015 14:08:14 -0700
Subject: [PATCH 2/7] Initial attempt to implement calibration; compiles, not
 tested yet.

---
 .../BinaryClassificationMetrics.scala         | 101 +++++++-----------
 1 file changed, 40 insertions(+), 61 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
index dcdf0477cad6..eaf03a3bf8b5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
@@ -248,68 +248,47 @@ class BinaryClassificationMetrics @Since("1.3.0") (
    * Ira Cohen, Moises Goldszmidt.
    * http://www.hpl.hp.com/techreports/2004/HPL-2004-22R1.pdf
    */
-  def calibration(): RDD[((Double, Double), (Double, Int))] = {
-    val calibrationCurve = assessedCalibration
-    val sc = confusions.context
-    sc.makeRDD(calibrationCurve, 1)
+  def calibration(): RDD[((Double, Double), (Double, Long))] = {
+    assessedCalibration
   }
   
-  private lazy val assessedCalibration: Seq[((Double, Double), (Double, Int))] = {
-//  val distinctScoreAndLabels = scoreAndLabels.combineByKey(
-//    createCombiner = (label: Double) => new BinaryLabelCounter(0L, 0L) += label,
-//    mergeValue = (c: BinaryLabelCounter, label: Double) => c += label,
-//    mergeCombiners = (c1: BinaryLabelCounter, c2: BinaryLabelCounter) => c1 += c2
-//  ).sortByKey(ascending = true)
-//
-//  val binnedCounts =
-//    if (numBins == 0) {
-//      counts
-//    } else {
-//      val countsSize = counts.count()
-//
-//      var grouping = countsSize / numBins
-//      if (grouping < 2) {
-//        logInfo(s"Curve is too small ($countsSize) for $numBins bins to be useful")
-//        counts
-//      } else {
-//        if (grouping >= Int.MaxValue) {
-//          logWarning(
-//            s"Curve too large ($countsSize) for $numBins bins; capping at ${Int.MaxValue}")
-//          grouping = Int.MaxValue
-//        }
-//        counts.mapPartitions(_.grouped(grouping.toInt).map { pairs =>
-//          // The score of the combined point will be just the first one's score
-//          // I THINK WE WANT THE AVERAGE OF SCORE OVER THE BIN HERE
-//          val firstScore = pairs.head._1
-//          // The point will contain all counts in this chunk
-//          val agg = new BinaryLabelCounter()
-//          pairs.foreach(pair => agg += pair._2)
-//          (firstScore, agg)
-//        })
-//      }
-//    }
-//
-//  val agg = binnedCounts.values.mapPartitions { iter =>
-//    val agg = new BinaryLabelCounter()
-//    iter.foreach(agg += _)
-//    Iterator(agg)
-//  }.collect()
-//  val partitionwiseCumulativeCounts =
-//    agg.scanLeft(new BinaryLabelCounter())(
-//      (agg: BinaryLabelCounter, c: BinaryLabelCounter) => agg.clone() += c)
-//  val totalCount = partitionwiseCumulativeCounts.last
-//  logInfo(s"Total counts: $totalCount")
-//  // WE WANT PER-BIN COUNTS HERE, NOT CUMULATIVE
-//  val correctCounts = binnedCounts.mapPartitionsWithIndex(
-//    (index: Int, iter: Iterator[(Double, BinaryLabelCounter)]) => {
-//      val cumCount = partitionwiseCumulativeCounts(index)
-//      iter.map { case (score, c) =>
-//        cumCount += c
-//        (score, cumCount.clone())
-//      }
-//    }, preservesPartitioning = true)
-//  correctCounts.persist()
-//  correctCounts
-    Seq(((0.0, 0.0), (0.0, 0)), ((1.0, 1.0), (1.0, 0)))
+  private lazy val assessedCalibration: RDD[((Double, Double), (Double, Long))] = {
+    val distinctScoresAndLabelCounts = scoreAndLabels.combineByKey(
+      createCombiner = (label: Double) => new BinaryLabelCounter(0L, 0L) += label,
+      mergeValue = (c: BinaryLabelCounter, label: Double) => c += label,
+      mergeCombiners = (c1: BinaryLabelCounter, c2: BinaryLabelCounter) => c1 += c2
+    ).sortByKey(ascending = true)
+  
+    val binnedDistinctScoresAndLabelCounts =
+      if (numBins == 0) {
+        distinctScoresAndLabelCounts.map { pair => ((pair._1, pair._1), pair._2) }
+      } else {
+        val distinctScoresCount = distinctScoresAndLabelCounts.count()
+  
+        var groupCount = distinctScoresCount / numBins
+        if (groupCount < 2) {
+          logInfo(s"Too few distinct scores ($distinctScoresCount) for $numBins bins to be useful")
+          distinctScoresAndLabelCounts.map { pair => ((pair._1, pair._1), pair._2) }
+        } else {
+          if (groupCount >= Int.MaxValue) {
+            val n = distinctScoresCount
+            logWarning(
+              s"Too many distinct scores ($n) for $numBins bins; capping at ${Int.MaxValue}")
+            groupCount = Int.MaxValue
+          }
+          distinctScoresAndLabelCounts.mapPartitions(_.grouped(groupCount.toInt).map { pairs =>
+            val firstScore = pairs.head._1
+            val lastScore = pairs.last._1
+            val agg = new BinaryLabelCounter()
+            pairs.foreach(pair => agg += pair._2)
+            ((firstScore, lastScore), agg)
+          })
+        }
+      }
+  
+    binnedDistinctScoresAndLabelCounts.map { pair =>
+      val n = pair._2.numPositives + pair._2.numNegatives
+      (pair._1, (pair._2.numPositives / n.toDouble, n))
+    }
   }
 }

From 0769ee6f69017a390c861cce5f3655bb79f03d40 Mon Sep 17 00:00:00 2001
From: Robert Dodier <robert_dodier@users.sourceforge.net>
Date: Fri, 13 Mar 2015 14:26:32 -0700
Subject: [PATCH 3/7] Change (..., (Double, Int)) to (..., (Double, Long)) to
 match types to what calibration actually returns.

---
 .../BinaryClassificationMetricsSuite.scala    | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
index bf40b143cc20..9d330973460b 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
@@ -28,7 +28,7 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark
   private def pairsWithinEpsilon(x: ((Double, Double), (Double, Double))): Boolean =
     (x._1._1 ~= x._2._1 absTol 1E-5) && (x._1._2 ~= x._2._2 absTol 1E-5)
 
-  private def pairPairsWithinEpsilon(x: (((Double, Double), (Double, Int)), ((Double, Double), (Double, Int)))): Boolean =
+  private def pairPairsWithinEpsilon(x: (((Double, Double), (Double, Long)), ((Double, Double), (Double, Long)))): Boolean =
     (x._1._1._1 ~= x._2._1._1 absTol 1E-5) && (x._1._1._2 ~= x._2._1._2 absTol 1E-5) &&
       (x._1._2._1 ~= x._2._2._1 absTol 1E-5) && x._1._2._2 == x._2._2._2
 
@@ -41,8 +41,8 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark
     assert(left.zip(right).forall(pairsWithinEpsilon))
   }
 
-  private def assertTupleTupleSequencesMatch(left: Seq[((Double, Double), (Double, Int))],
-       right: Seq[((Double, Double), (Double, Int))]): Unit = {
+  private def assertTupleTupleSequencesMatch(left: Seq[((Double, Double), (Double, Long))],
+       right: Seq[((Double, Double), (Double, Long))]): Unit = {
     assert(left.zip(right).forall(pairPairsWithinEpsilon))
   }
 
@@ -54,7 +54,7 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark
       expectedFmeasures2: Seq[Double],
       expectedPrecisions: Seq[Double],
       expectedRecalls: Seq[Double],
-      expectedCalibration: Seq[((Double, Double), (Double, Int))]) = {
+      expectedCalibration: Seq[((Double, Double), (Double, Long))]) = {
 
     assertSequencesMatch(metrics.thresholds().collect(), expectedThresholds)
     assertTupleSequencesMatch(metrics.roc().collect(), expectedROCCurve)
@@ -91,8 +91,8 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark
     val prCurve = Seq((0.0, 1.0)) ++ pr
     val f1 = pr.map { case (r, p) => 2.0 * (p * r) / (p + r)}
     val f2 = pr.map { case (r, p) => 5.0 * (p * r) / (4.0 * p + r)}
-    val calibration = Seq(((0.1, 0.1), (0.5, 2)), ((0.4, 0.4), (0.0, 1)), ((0.6, 0.6), (2/3.0, 3)),
-      ((0.8, 0.8), (1.0, 1)))
+    val calibration = Seq(((0.1, 0.1), (0.5, 2L)), ((0.4, 0.4), (0.0, 1L)), ((0.6, 0.6), (2/3.0, 3L)),
+      ((0.8, 0.8), (1.0, 1L)))
 
     validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls,
       calibration)
@@ -111,7 +111,7 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark
     val prCurve = Seq((0.0, 1.0)) ++ pr
     val f1 = pr.map { case (r, p) => 2.0 * (p * r) / (p + r)}
     val f2 = pr.map { case (r, p) => 5.0 * (p * r) / (4.0 * p + r)}
-    val calibration = Seq(((0.5, 0.5), (1.0, 2)))
+    val calibration = Seq(((0.5, 0.5), (1.0, 2L)))
 
     validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls,
       calibration)
@@ -137,7 +137,7 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark
       case (r, p) => 5.0 * (p * r) / (4.0 * p + r)
     }
 
-    val calibration = Seq(((0.5, 0.5), (0.0, 2)))
+    val calibration = Seq(((0.5, 0.5), (0.0, 2L)))
 
     validateMetrics(metrics, thresholds, rocCurve, prCurve, f1, f2, precisions, recalls,
       calibration)
@@ -177,8 +177,8 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark
       ) ==
       downsampledROC)
 
-    val calibration = Seq(((0.1, 0.2), (0.0, 2)), ((0.3, 0.4), (0.5, 2)), ((0.5, 0.6), (0.5, 2)),
-      ((0.7, 0.9), (2/3.0, 3)))
+    val calibration = Seq(((0.1, 0.2), (0.0, 2L)), ((0.3, 0.4), (0.5, 2L)), ((0.5, 0.6), (0.5, 2L)),
+      ((0.7, 0.9), (2/3.0, 3L)))
     assert(calibration == downsampled.calibration().collect())
   }
 

From bf682c0fd0ae22d43b6bf752416b9a599e21e70b Mon Sep 17 00:00:00 2001
From: Robert Dodier <robert_dodier@users.sourceforge.net>
Date: Fri, 13 Mar 2015 17:48:56 -0700
Subject: [PATCH 4/7] Adjust JVM command line to get tests to run.

---
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index d04ed1e79865..f016ab674909 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1889,7 +1889,7 @@
               <include>**/*Suite.java</include>
             </includes>
             <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
-            <argLine>-Xmx3g -Xss4096k -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
+            <argLine>-Xmx1024M -XX:MaxPermSize=1024M -Xss4M</argLine>
             <environmentVariables>
               <!--
                 Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes
@@ -1927,7 +1927,7 @@
             <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
             <junitxml>.</junitxml>
             <filereports>SparkTestSuite.txt</filereports>
-            <argLine>-ea -Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
+            <argLine>-ea -Xmx1024M -XX:MaxPermSize=1024M -Xss4M</argLine>
             <stderr/>
             <environmentVariables>
               <!--

From 967e96194c6ec1b1d08c8d46475e078aa68578b8 Mon Sep 17 00:00:00 2001
From: Robert Dodier <robert_dodier@users.sourceforge.net>
Date: Fri, 13 Mar 2015 17:50:11 -0700
Subject: [PATCH 5/7] Adjust bin size to prevent final bin from being very
 small compared to others.

---
 .../mllib/evaluation/BinaryClassificationMetrics.scala   | 9 ++++++++-
 .../evaluation/BinaryClassificationMetricsSuite.scala    | 5 ++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
index eaf03a3bf8b5..604c6c6258c2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
@@ -265,7 +265,14 @@ class BinaryClassificationMetrics @Since("1.3.0") (
       } else {
         val distinctScoresCount = distinctScoresAndLabelCounts.count()
   
-        var groupCount = distinctScoresCount / numBins
+        var groupCount =
+          if (distinctScoresCount % numBins == 0) {
+            distinctScoresCount / numBins
+          } else {
+            // prevent the last bin from being very small compared to the others
+            distinctScoresCount / numBins + 1
+          }
+        
         if (groupCount < 2) {
           logInfo(s"Too few distinct scores ($distinctScoresCount) for $numBins bins to be useful")
           distinctScoresAndLabelCounts.map { pair => ((pair._1, pair._1), pair._2) }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
index 9d330973460b..f4a3c1283a27 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
@@ -177,9 +177,8 @@ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSpark
       ) ==
       downsampledROC)
 
-    val calibration = Seq(((0.1, 0.2), (0.0, 2L)), ((0.3, 0.4), (0.5, 2L)), ((0.5, 0.6), (0.5, 2L)),
-      ((0.7, 0.9), (2/3.0, 3L)))
-    assert(calibration == downsampled.calibration().collect())
+    val calibration = Array(((0.1, 0.3), (1/3.0, 3L)), ((0.4, 0.6), (1/3.0, 3L)), ((0.7, 0.9), (2/3.0, 3L)))
+    assertTupleTupleSequencesMatch(calibration, downsampled.calibration().collect())
   }
 
 }

From 4281f556ff948df8818e178ab70d5183ee20e32d Mon Sep 17 00:00:00 2001
From: Robert Dodier <robert_dodier@users.sourceforge.net>
Date: Fri, 13 Mar 2015 19:14:30 -0700
Subject: [PATCH 6/7] Revert local changes to pom.xml.

---
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index f016ab674909..0a3d205cd3a2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1889,7 +1889,7 @@
               <include>**/*Suite.java</include>
             </includes>
             <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
-            <argLine>-Xmx1024M -XX:MaxPermSize=1024M -Xss4M</argLine>
+            <argLine>-Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
             <environmentVariables>
               <!--
                 Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes
@@ -1927,7 +1927,7 @@
             <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
             <junitxml>.</junitxml>
             <filereports>SparkTestSuite.txt</filereports>
-            <argLine>-ea -Xmx1024M -XX:MaxPermSize=1024M -Xss4M</argLine>
+            <argLine>-ea -Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
             <stderr/>
             <environmentVariables>
               <!--

From 6cf1e2c77139aaa76e06845f646894ef96ac47aa Mon Sep 17 00:00:00 2001
From: Robert Dodier <robert_dodier@users.sourceforge.net>
Date: Mon, 5 Oct 2015 16:06:46 -0700
Subject: [PATCH 7/7] Adjust command line arguments to make my pom.xml the same
 as spark master again.

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 0a3d205cd3a2..d04ed1e79865 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1889,7 +1889,7 @@
               <include>**/*Suite.java</include>
             </includes>
             <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
-            <argLine>-Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
+            <argLine>-Xmx3g -Xss4096k -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
             <environmentVariables>
               <!--
                 Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes