Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package org.apache.spark.mllib.evaluation

import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.util.MLlibTestSparkContext
import org.apache.spark.mllib.util.TestingUtils._
import org.apache.spark.rdd.RDD

class MultilabelMetricsSuite extends SparkFunSuite with MLlibTestSparkContext {
Expand Down Expand Up @@ -79,24 +80,24 @@ class MultilabelMetricsSuite extends SparkFunSuite with MLlibTestSparkContext {
val hammingLoss = (1.0 / (7 * 3)) * (2 + 2 + 1 + 0 + 0 + 1 + 1)
val strictAccuracy = 2.0 / 7
val accuracy = 1.0 / 7 * (1.0 / 3 + 1.0 /3 + 0 + 1.0 / 1 + 2.0 / 2 + 2.0 / 3 + 1.0 / 2)
assert(math.abs(metrics.precision(0.0) - precision0) < delta)
assert(math.abs(metrics.precision(1.0) - precision1) < delta)
assert(math.abs(metrics.precision(2.0) - precision2) < delta)
assert(math.abs(metrics.recall(0.0) - recall0) < delta)
assert(math.abs(metrics.recall(1.0) - recall1) < delta)
assert(math.abs(metrics.recall(2.0) - recall2) < delta)
assert(math.abs(metrics.f1Measure(0.0) - f1measure0) < delta)
assert(math.abs(metrics.f1Measure(1.0) - f1measure1) < delta)
assert(math.abs(metrics.f1Measure(2.0) - f1measure2) < delta)
assert(math.abs(metrics.microPrecision - microPrecisionClass) < delta)
assert(math.abs(metrics.microRecall - microRecallClass) < delta)
assert(math.abs(metrics.microF1Measure - microF1MeasureClass) < delta)
assert(math.abs(metrics.precision - macroPrecisionDoc) < delta)
assert(math.abs(metrics.recall - macroRecallDoc) < delta)
assert(math.abs(metrics.f1Measure - macroF1MeasureDoc) < delta)
assert(math.abs(metrics.hammingLoss - hammingLoss) < delta)
assert(math.abs(metrics.subsetAccuracy - strictAccuracy) < delta)
assert(math.abs(metrics.accuracy - accuracy) < delta)
assert(metrics.precision(0.0) ~== precision0 absTol delta)
assert(metrics.precision(1.0) ~== precision1 absTol delta)
assert(metrics.precision(2.0) ~== precision2 absTol delta)
assert(metrics.recall(0.0) ~== recall0 absTol delta)
assert(metrics.recall(1.0) ~== recall1 absTol delta)
assert(metrics.recall(2.0) ~== recall2 absTol delta)
assert(metrics.f1Measure(0.0) ~== f1measure0 absTol delta)
assert(metrics.f1Measure(1.0) ~== f1measure1 absTol delta)
assert(metrics.f1Measure(2.0) ~== f1measure2 absTol delta)
assert(metrics.microPrecision ~== microPrecisionClass absTol delta)
assert(metrics.microRecall ~== microRecallClass absTol delta)
assert(metrics.microF1Measure ~== microF1MeasureClass absTol delta)
assert(metrics.precision ~== macroPrecisionDoc absTol delta)
assert(metrics.recall ~== macroRecallDoc absTol delta)
assert(metrics.f1Measure ~== macroF1MeasureDoc absTol delta)
assert(metrics.hammingLoss ~== hammingLoss absTol delta)
assert(metrics.subsetAccuracy ~== strictAccuracy absTol delta)
assert(metrics.accuracy ~== accuracy absTol delta)
assert(metrics.labels.sameElements(Array(0.0, 1.0, 2.0)))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package org.apache.spark.mllib.fpm

import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.util.MLlibTestSparkContext
import org.apache.spark.mllib.util.TestingUtils._

class AssociationRulesSuite extends SparkFunSuite with MLlibTestSparkContext {

Expand Down Expand Up @@ -63,7 +64,7 @@ class AssociationRulesSuite extends SparkFunSuite with MLlibTestSparkContext {
[1] 23
*/
assert(results1.size === 23)
assert(results1.count(rule => math.abs(rule.confidence - 1.0D) < 1e-6) == 23)
assert(results1.count(rule => rule.confidence ~= 1.0D absTol 1e-6) == 23)

val results2 = ar
.setMinConfidence(0)
Expand All @@ -84,7 +85,7 @@ class AssociationRulesSuite extends SparkFunSuite with MLlibTestSparkContext {
[1] 23
*/
assert(results2.size === 30)
assert(results2.count(rule => math.abs(rule.confidence - 1.0D) < 1e-6) == 23)
assert(results2.count(rule => rule.confidence ~= 1.0D absTol 1e-6) == 23)
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package org.apache.spark.mllib.fpm

import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.util.MLlibTestSparkContext
import org.apache.spark.mllib.util.TestingUtils._
import org.apache.spark.util.Utils

class FPGrowthSuite extends SparkFunSuite with MLlibTestSparkContext {
Expand Down Expand Up @@ -172,7 +173,7 @@ class FPGrowthSuite extends SparkFunSuite with MLlibTestSparkContext {
.collect()

assert(rules.size === 23)
assert(rules.count(rule => math.abs(rule.confidence - 1.0D) < 1e-6) == 23)
assert(rules.count(rule => rule.confidence ~= 1.0D absTol 1e-6) == 23)
}

test("FP-Growth using Int type") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import breeze.linalg.{diag => brzDiag, DenseMatrix => BDM, DenseVector => BDV}
import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.linalg._
import org.apache.spark.mllib.util.MLlibTestSparkContext
import org.apache.spark.mllib.util.TestingUtils._
import org.apache.spark.rdd.RDD

class IndexedRowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {
Expand Down Expand Up @@ -238,7 +239,7 @@ class IndexedRowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {

for (i <- 0 until n; j <- i + 1 until n) {
val trueResult = gram(i, j) / scala.math.sqrt(gram(i, i) * gram(j, j))
assert(math.abs(G(i, j) - trueResult) < 1e-6)
assert(G(i, j) ~== trueResult absTol 1e-6)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ package org.apache.spark.mllib.random
import org.apache.commons.math3.special.Gamma

import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.util.TestingUtils._
import org.apache.spark.util.StatCounter

// TODO update tests to use TestingUtils for floating point comparison after PR 1367 is merged
class RandomDataGeneratorSuite extends SparkFunSuite {

def apiChecks(gen: RandomDataGenerator[Double]) {
Expand Down Expand Up @@ -61,8 +61,8 @@ class RandomDataGeneratorSuite extends SparkFunSuite {
gen.setSeed(seed.toLong)
val sample = (0 until 100000).map { _ => gen.nextValue()}
val stats = new StatCounter(sample)
assert(math.abs(stats.mean - mean) < epsilon)
assert(math.abs(stats.stdev - stddev) < epsilon)
assert(stats.mean ~== mean absTol epsilon)
assert(stats.stdev ~== stddev absTol epsilon)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,13 @@ import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.rdd.{RandomRDD, RandomRDDPartition}
import org.apache.spark.mllib.util.MLlibTestSparkContext
import org.apache.spark.mllib.util.TestingUtils._
import org.apache.spark.rdd.RDD
import org.apache.spark.util.StatCounter

/*
* Note: avoid including APIs that do not set the seed for the RNG in unit tests
* in order to guarantee deterministic behavior.
*
* TODO update tests to use TestingUtils for floating point comparison after PR 1367 is merged
*/
class RandomRDDsSuite extends SparkFunSuite with MLlibTestSparkContext with Serializable {

Expand All @@ -43,8 +42,8 @@ class RandomRDDsSuite extends SparkFunSuite with MLlibTestSparkContext with Seri
val stats = rdd.stats()
assert(expectedSize === stats.count)
assert(expectedNumPartitions === rdd.partitions.size)
assert(math.abs(stats.mean - expectedMean) < epsilon)
assert(math.abs(stats.stdev - expectedStddev) < epsilon)
assert(stats.mean ~== expectedMean absTol epsilon)
assert(stats.stdev ~== expectedStddev absTol epsilon)
}

// assume test RDDs are small
Expand All @@ -63,8 +62,8 @@ class RandomRDDsSuite extends SparkFunSuite with MLlibTestSparkContext with Seri
}}
assert(expectedRows === values.size / expectedColumns)
val stats = new StatCounter(values)
assert(math.abs(stats.mean - expectedMean) < epsilon)
assert(math.abs(stats.stdev - expectedStddev) < epsilon)
assert(stats.mean ~== expectedMean absTol epsilon)
assert(stats.stdev ~== expectedStddev absTol epsilon)
}

test("RandomRDD sizes") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import org.apache.spark.mllib.random.RandomRDDs
import org.apache.spark.mllib.stat.correlation.{Correlations, PearsonCorrelation,
SpearmanCorrelation}
import org.apache.spark.mllib.util.MLlibTestSparkContext
import org.apache.spark.mllib.util.TestingUtils._

class CorrelationSuite extends SparkFunSuite with MLlibTestSparkContext with Logging {

Expand Down Expand Up @@ -57,15 +58,15 @@ class CorrelationSuite extends SparkFunSuite with MLlibTestSparkContext with Log
val expected = 0.6546537
val default = Statistics.corr(x, y)
val p1 = Statistics.corr(x, y, "pearson")
assert(approxEqual(expected, default))
assert(approxEqual(expected, p1))
assert(expected ~== default absTol 1e-6)
assert(expected ~== p1 absTol 1e-6)

// numPartitions >= size for input RDDs
for (numParts <- List(xData.size, xData.size * 2)) {
val x1 = sc.parallelize(xData, numParts)
val y1 = sc.parallelize(yData, numParts)
val p2 = Statistics.corr(x1, y1)
assert(approxEqual(expected, p2))
assert(expected ~== p2 absTol 1e-6)
}

// RDD of zero variance
Expand All @@ -78,14 +79,14 @@ class CorrelationSuite extends SparkFunSuite with MLlibTestSparkContext with Log
val y = sc.parallelize(yData)
val expected = 0.5
val s1 = Statistics.corr(x, y, "spearman")
assert(approxEqual(expected, s1))
assert(expected ~== s1 absTol 1e-6)

// numPartitions >= size for input RDDs
for (numParts <- List(xData.size, xData.size * 2)) {
val x1 = sc.parallelize(xData, numParts)
val y1 = sc.parallelize(yData, numParts)
val s2 = Statistics.corr(x1, y1, "spearman")
assert(approxEqual(expected, s2))
assert(expected ~== s2 absTol 1e-6)
}

// RDD of zero variance => zero variance in ranks
Expand Down Expand Up @@ -141,14 +142,14 @@ class CorrelationSuite extends SparkFunSuite with MLlibTestSparkContext with Log
val a = RandomRDDs.normalRDD(sc, 100000, 10).map(_ + 1000000000.0)
val b = RandomRDDs.normalRDD(sc, 100000, 10).map(_ + 1000000000.0)
val p = Statistics.corr(a, b, method = "pearson")
assert(approxEqual(p, 0.0, 0.01))
assert(p ~== 0.0 absTol 0.01)
}

def approxEqual(v1: Double, v2: Double, threshold: Double = 1e-6): Boolean = {
if (v1.isNaN) {
v2.isNaN
} else {
math.abs(v1 - v2) <= threshold
v1 ~== v2 absTol threshold
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import org.apache.commons.math3.distribution.NormalDistribution

import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.util.MLlibTestSparkContext
import org.apache.spark.mllib.util.TestingUtils._

class KernelDensitySuite extends SparkFunSuite with MLlibTestSparkContext {
test("kernel density single sample") {
Expand All @@ -29,8 +30,8 @@ class KernelDensitySuite extends SparkFunSuite with MLlibTestSparkContext {
val densities = new KernelDensity().setSample(rdd).setBandwidth(3.0).estimate(evaluationPoints)
val normal = new NormalDistribution(5.0, 3.0)
val acceptableErr = 1e-6
assert(math.abs(densities(0) - normal.density(5.0)) < acceptableErr)
assert(math.abs(densities(1) - normal.density(6.0)) < acceptableErr)
assert(densities(0) ~== normal.density(5.0) absTol acceptableErr)
assert(densities(1) ~== normal.density(6.0) absTol acceptableErr)
}

test("kernel density multiple samples") {
Expand All @@ -40,9 +41,9 @@ class KernelDensitySuite extends SparkFunSuite with MLlibTestSparkContext {
val normal1 = new NormalDistribution(5.0, 3.0)
val normal2 = new NormalDistribution(10.0, 3.0)
val acceptableErr = 1e-6
assert(math.abs(
densities(0) - (normal1.density(5.0) + normal2.density(5.0)) / 2) < acceptableErr)
assert(math.abs(
densities(1) - (normal1.density(6.0) + normal2.density(6.0)) / 2) < acceptableErr)
assert(
densities(0) ~== ((normal1.density(5.0) + normal2.density(5.0)) / 2) absTol acceptableErr)
assert(
densities(1) ~== ((normal1.density(6.0) + normal2.density(6.0)) / 2) absTol acceptableErr)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import scala.collection.mutable
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.tree.model.TreeEnsembleModel
import org.apache.spark.mllib.util.TestingUtils._
import org.apache.spark.util.StatCounter

object EnsembleTestHelper {
Expand All @@ -43,8 +44,8 @@ object EnsembleTestHelper {
values ++= row
}
val stats = new StatCounter(values)
assert(math.abs(stats.mean - expectedMean) < epsilon)
assert(math.abs(stats.stdev - expectedStddev) < epsilon)
assert(stats.mean ~== expectedMean absTol epsilon)
assert(stats.stdev ~== expectedStddev absTol epsilon)
}

def validateClassifier(
Expand Down