From 78f37334164a015605d5c23ff7217a131c3ea3a7 Mon Sep 17 00:00:00 2001 From: jiangxingbo Date: Mon, 12 Sep 2016 23:14:28 +0800 Subject: [PATCH 1/5] check the equality of double values with tolerance within percentage range. --- .../ArithmeticExpressionSuite.scala | 8 +++----- .../expressions/ExpressionEvalHelper.scala | 20 +++++++++++++++++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala index 687387507e21..5c9824289b3c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala @@ -170,11 +170,9 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(Remainder(positiveLongLit, positiveLongLit), 0L) checkEvaluation(Remainder(negativeLongLit, negativeLongLit), 0L) - // TODO: the following lines would fail the test due to inconsistency result of interpret - // and codegen for remainder between giant values, seems like a numeric stability issue - // DataTypeTestUtils.numericTypeWithoutDecimal.foreach { tpe => - // checkConsistencyBetweenInterpretedAndCodegen(Remainder, tpe, tpe) - // } + DataTypeTestUtils.numericTypeWithoutDecimal.foreach { tpe => + checkConsistencyBetweenInterpretedAndCodegen(Remainder, tpe, tpe) + } } test("Abs") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala index 668543a28bd3..6e7eaf110bd7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala @@ -289,13 +289,29 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks { (result, expected) match { case (result: Array[Byte], expected: Array[Byte]) => java.util.Arrays.equals(result, expected) - case (result: Double, expected: Spread[Double @unchecked]) => - expected.asInstanceOf[Spread[Double]].isWithin(result) case (result: Double, expected: Double) if result.isNaN && expected.isNaN => true + case (result: Double, expected: Double) => + compareDoubles(result, expected) case (result: Float, expected: Float) if result.isNaN && expected.isNaN => true case _ => result == expected } } + + /** + * Check the equality of two [[Double]] values, allows a tolerance within a certain percentage + * range. + */ + private def compareDoubles( + result: Double, + expected: Double, + tolerance: Double = 1E-10): Boolean = { + if ((result.isNaN && expected.isNaN) || result == expected) { + return true + } + + val spread = Spread[Double](expected, expected.abs * tolerance) + spread.isWithin(result) + } } From 1721e0c026f2bafbc73cb400f434c37b2921110a Mon Sep 17 00:00:00 2001 From: jiangxingbo Date: Tue, 13 Sep 2016 16:29:33 +0800 Subject: [PATCH 2/5] use relTol to compare double values. --- .../org/apache/spark/util/TestingUtils.scala | 108 ++++++++++++++++++ .../apache/spark/util/TestingUtilsSuite.scala | 101 ++++++++++++++++ .../RandomForestClassifierSuite.scala | 2 +- .../evaluation/RegressionEvaluatorSuite.scala | 2 +- .../LogisticRegressionSuite.scala | 1 + .../classification/NaiveBayesSuite.scala | 2 +- .../StreamingLogisticRegressionSuite.scala | 2 +- .../clustering/BisectingKMeansSuite.scala | 1 + .../clustering/GaussianMixtureSuite.scala | 1 + .../spark/mllib/clustering/LDASuite.scala | 1 + .../PowerIterationClusteringSuite.scala | 2 +- .../clustering/StreamingKMeansSuite.scala | 1 + .../evaluation/AreaUnderCurveSuite.scala | 2 +- .../BinaryClassificationMetricsSuite.scala | 2 +- .../evaluation/RankingMetricsSuite.scala | 2 +- .../evaluation/RegressionMetricsSuite.scala | 2 +- .../apache/spark/mllib/feature/IDFSuite.scala | 1 + .../spark/mllib/feature/NormalizerSuite.scala | 1 + .../apache/spark/mllib/linalg/BLASSuite.scala | 1 + .../spark/mllib/linalg/VectorsSuite.scala | 2 +- .../linalg/distributed/RowMatrixSuite.scala | 1 + .../optimization/GradientDescentSuite.scala | 2 +- .../spark/mllib/optimization/LBFGSSuite.scala | 2 +- .../spark/mllib/optimization/NNLSSuite.scala | 2 +- .../MatrixFactorizationModelSuite.scala | 2 +- .../regression/IsotonicRegressionSuite.scala | 2 +- .../mllib/stat/HypothesisTestSuite.scala | 1 + .../MultivariateOnlineSummarizerSuite.scala | 1 + .../MultivariateGaussianSuite.scala | 2 +- .../spark/mllib/util/MLUtilsSuite.scala | 2 +- .../spark/mllib/util/TestingUtils.scala | 86 +------------- .../spark/mllib/util/TestingUtilsSuite.scala | 75 ------------ .../expressions/ExpressionEvalHelper.scala | 19 +-- 33 files changed, 240 insertions(+), 194 deletions(-) create mode 100644 core/src/test/scala/org/apache/spark/util/TestingUtils.scala create mode 100644 core/src/test/scala/org/apache/spark/util/TestingUtilsSuite.scala diff --git a/core/src/test/scala/org/apache/spark/util/TestingUtils.scala b/core/src/test/scala/org/apache/spark/util/TestingUtils.scala new file mode 100644 index 000000000000..2bc7f982458e --- /dev/null +++ b/core/src/test/scala/org/apache/spark/util/TestingUtils.scala @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.util + +import org.scalatest.exceptions.TestFailedException + +object TestingUtils { + + val ABS_TOL_MSG = " using absolute tolerance" + val REL_TOL_MSG = " using relative tolerance" + + /** + * Private helper function for comparing two values using relative tolerance. + * Note that if x or y is extremely close to zero, i.e., smaller than Double.MinPositiveValue, + * the relative tolerance is meaningless, so the exception will be raised to warn users. + */ + private def RelativeErrorComparison(x: Double, y: Double, eps: Double): Boolean = { + val absX = math.abs(x) + val absY = math.abs(y) + val diff = math.abs(x - y) + if (x == y) { + true + } else if (absX < Double.MinPositiveValue || absY < Double.MinPositiveValue) { + throw new TestFailedException( + s"$x or $y is extremely close to zero, so the relative tolerance is meaningless.", 0) + } else { + diff < eps * math.min(absX, absY) + } + } + + /** + * Private helper function for comparing two values using absolute tolerance. + */ + private def AbsoluteErrorComparison(x: Double, y: Double, eps: Double): Boolean = { + math.abs(x - y) < eps + } + + case class CompareDoubleRightSide( + fun: (Double, Double, Double) => Boolean, y: Double, eps: Double, method: String) + + /** + * Implicit class for comparing two double values using relative tolerance or absolute tolerance. + */ + implicit class DoubleWithAlmostEquals(val x: Double) { + + /** + * When the difference of two values are within eps, returns true; otherwise, returns false. + */ + def ~=(r: CompareDoubleRightSide): Boolean = r.fun(x, r.y, r.eps) + + /** + * When the difference of two values are within eps, returns false; otherwise, returns true. + */ + def !~=(r: CompareDoubleRightSide): Boolean = !r.fun(x, r.y, r.eps) + + /** + * Throws exception when the difference of two values are NOT within eps; + * otherwise, returns true. + */ + def ~==(r: CompareDoubleRightSide): Boolean = { + if (!r.fun(x, r.y, r.eps)) { + throw new TestFailedException( + s"Expected $x and ${r.y} to be within ${r.eps}${r.method}.", 0) + } + true + } + + /** + * Throws exception when the difference of two values are within eps; otherwise, returns true. + */ + def !~==(r: CompareDoubleRightSide): Boolean = { + if (r.fun(x, r.y, r.eps)) { + throw new TestFailedException( + s"Did not expect $x and ${r.y} to be within ${r.eps}${r.method}.", 0) + } + true + } + + /** + * Comparison using absolute tolerance. + */ + def absTol(eps: Double): CompareDoubleRightSide = + CompareDoubleRightSide(AbsoluteErrorComparison, x, eps, ABS_TOL_MSG) + + /** + * Comparison using relative tolerance. + */ + def relTol(eps: Double): CompareDoubleRightSide = + CompareDoubleRightSide(RelativeErrorComparison, x, eps, REL_TOL_MSG) + + override def toString: String = x.toString + } +} diff --git a/core/src/test/scala/org/apache/spark/util/TestingUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/TestingUtilsSuite.scala new file mode 100644 index 000000000000..0b7b26aa9792 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/util/TestingUtilsSuite.scala @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.util + +import org.scalatest.exceptions.TestFailedException + +import org.apache.spark.SparkFunSuite +import org.apache.spark.util.TestingUtils._ + +class TestingUtilsSuite extends SparkFunSuite { + + test("Comparing doubles using relative error.") { + + assert(23.1 ~== 23.52 relTol 0.02) + assert(23.1 ~== 22.74 relTol 0.02) + assert(23.1 ~= 23.52 relTol 0.02) + assert(23.1 ~= 22.74 relTol 0.02) + assert(!(23.1 !~= 23.52 relTol 0.02)) + assert(!(23.1 !~= 22.74 relTol 0.02)) + + // Should throw exception with message when test fails. + intercept[TestFailedException](23.1 !~== 23.52 relTol 0.02) + intercept[TestFailedException](23.1 !~== 22.74 relTol 0.02) + intercept[TestFailedException](23.1 ~== 23.63 relTol 0.02) + intercept[TestFailedException](23.1 ~== 22.34 relTol 0.02) + + assert(23.1 !~== 23.63 relTol 0.02) + assert(23.1 !~== 22.34 relTol 0.02) + assert(23.1 !~= 23.63 relTol 0.02) + assert(23.1 !~= 22.34 relTol 0.02) + assert(!(23.1 ~= 23.63 relTol 0.02)) + assert(!(23.1 ~= 22.34 relTol 0.02)) + + // Comparing against zero should fail the test and throw exception with message + // saying that the relative error is meaningless in this situation. + intercept[TestFailedException](0.1 ~== 0.0 relTol 0.032) + intercept[TestFailedException](0.1 ~= 0.0 relTol 0.032) + intercept[TestFailedException](0.1 !~== 0.0 relTol 0.032) + intercept[TestFailedException](0.1 !~= 0.0 relTol 0.032) + intercept[TestFailedException](0.0 ~== 0.1 relTol 0.032) + intercept[TestFailedException](0.0 ~= 0.1 relTol 0.032) + intercept[TestFailedException](0.0 !~== 0.1 relTol 0.032) + intercept[TestFailedException](0.0 !~= 0.1 relTol 0.032) + + // Comparisons of numbers very close to zero. + assert(10 * Double.MinPositiveValue ~== 9.5 * Double.MinPositiveValue relTol 0.01) + assert(10 * Double.MinPositiveValue !~== 11 * Double.MinPositiveValue relTol 0.01) + + assert(-Double.MinPositiveValue ~== 1.18 * -Double.MinPositiveValue relTol 0.012) + assert(-Double.MinPositiveValue ~== 1.38 * -Double.MinPositiveValue relTol 0.012) + } + + test("Comparing doubles using absolute error.") { + + assert(17.8 ~== 17.99 absTol 0.2) + assert(17.8 ~== 17.61 absTol 0.2) + assert(17.8 ~= 17.99 absTol 0.2) + assert(17.8 ~= 17.61 absTol 0.2) + assert(!(17.8 !~= 17.99 absTol 0.2)) + assert(!(17.8 !~= 17.61 absTol 0.2)) + + // Should throw exception with message when test fails. + intercept[TestFailedException](17.8 !~== 17.99 absTol 0.2) + intercept[TestFailedException](17.8 !~== 17.61 absTol 0.2) + intercept[TestFailedException](17.8 ~== 18.01 absTol 0.2) + intercept[TestFailedException](17.8 ~== 17.59 absTol 0.2) + + assert(17.8 !~== 18.01 absTol 0.2) + assert(17.8 !~== 17.59 absTol 0.2) + assert(17.8 !~= 18.01 absTol 0.2) + assert(17.8 !~= 17.59 absTol 0.2) + assert(!(17.8 ~= 18.01 absTol 0.2)) + assert(!(17.8 ~= 17.59 absTol 0.2)) + + // Comparisons of numbers very close to zero, and both side of zeros + assert( + Double.MinPositiveValue ~== 4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue) + assert( + Double.MinPositiveValue !~== 6 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue) + + assert( + -Double.MinPositiveValue ~== 3 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue) + assert( + Double.MinPositiveValue !~== -4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue) + } +} diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala index 2e99ee157ae9..9cbba515a394 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala @@ -28,9 +28,9 @@ import org.apache.spark.mllib.regression.{LabeledPoint => OldLabeledPoint} import org.apache.spark.mllib.tree.{EnsembleTestHelper, RandomForest => OldRandomForest} import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo} import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.mllib.util.TestingUtils._ import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, Row} +import org.apache.spark.util.TestingUtils._ /** * Test suite for [[RandomForestClassifier]]. diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala index 42ff8adf6bd6..3809afb43302 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala @@ -22,7 +22,7 @@ import org.apache.spark.ml.param.ParamsSuite import org.apache.spark.ml.regression.LinearRegression import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils} import org.apache.spark.mllib.util.{LinearDataGenerator, MLlibTestSparkContext} -import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ class RegressionEvaluatorSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala index 5cf437776851..4cd113d18afa 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala @@ -30,6 +30,7 @@ import org.apache.spark.mllib.regression._ import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext} import org.apache.spark.mllib.util.TestingUtils._ import org.apache.spark.rdd.RDD +import org.apache.spark.util.TestingUtils._ import org.apache.spark.util.Utils diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala index 5ec4c15387e9..34e3593d746d 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala @@ -27,7 +27,7 @@ import org.apache.spark.{SparkException, SparkFunSuite} import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext} -import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ import org.apache.spark.util.Utils object NaiveBayesSuite { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala index 5f797a60f09e..3ffc02bf6b3d 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala @@ -22,9 +22,9 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.LabeledPoint -import org.apache.spark.mllib.util.TestingUtils._ import org.apache.spark.streaming.{StreamingContext, TestSuiteBase} import org.apache.spark.streaming.dstream.DStream +import org.apache.spark.util.TestingUtils._ class StreamingLogisticRegressionSuite extends SparkFunSuite with TestSuiteBase { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/BisectingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/BisectingKMeansSuite.scala index 35f7932ae822..3bee799a909d 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/BisectingKMeansSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/BisectingKMeansSuite.scala @@ -21,6 +21,7 @@ import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ import org.apache.spark.util.Utils class BisectingKMeansSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala index 67e680be7330..670f6b21c2c9 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala @@ -22,6 +22,7 @@ import org.apache.spark.mllib.linalg.{Matrices, Vector, Vectors} import org.apache.spark.mllib.stat.distribution.MultivariateGaussian import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ import org.apache.spark.util.Utils class GaussianMixtureSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala index 211e2bc026c7..516dcb4d1070 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala @@ -26,6 +26,7 @@ import org.apache.spark.graphx.Edge import org.apache.spark.mllib.linalg.{DenseMatrix, Matrix, Vector, Vectors} import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ import org.apache.spark.util.Utils class LDASuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala index b33b86b39a42..7733492c30df 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala @@ -23,7 +23,7 @@ import scala.util.Random import org.apache.spark.{SparkContext, SparkFunSuite} import org.apache.spark.graphx.{Edge, Graph} import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ import org.apache.spark.util.Utils class PowerIterationClusteringSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala index fdaa098345d1..8a159c1c8f66 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala @@ -23,6 +23,7 @@ import org.apache.spark.mllib.util.TestingUtils._ import org.apache.spark.streaming.{StreamingContext, TestSuiteBase} import org.apache.spark.streaming.dstream.DStream import org.apache.spark.util.random.XORShiftRandom +import org.apache.spark.util.TestingUtils._ class StreamingKMeansSuite extends SparkFunSuite with TestSuiteBase { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/AreaUnderCurveSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/AreaUnderCurveSuite.scala index 87ccc7eda44e..a2ac142ebd7b 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/AreaUnderCurveSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/AreaUnderCurveSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.evaluation import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ class AreaUnderCurveSuite extends SparkFunSuite with MLlibTestSparkContext { test("auc computation") { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala index 99d52fabc530..9c740d4e1e8f 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.evaluation import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala index 8e9d910e646c..a866ef7ed685 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.evaluation import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ class RankingMetricsSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala index f1d517383643..f848be028246 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.evaluation import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ class RegressionMetricsSuite extends SparkFunSuite with MLlibTestSparkContext { val obs = List[Double](77, 85, 62, 55, 63, 88, 57, 81, 51) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala index 5c938a61ed99..f7f2fce7d66d 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala @@ -21,6 +21,7 @@ import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors} import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ class IDFSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala index 10f7bafd6cf5..514f8b6b566a 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala @@ -23,6 +23,7 @@ import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vectors} import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ class NormalizerSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala index 80da03cc2efe..e99cc3fb725a 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.mllib.linalg import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.BLAS._ import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ class BLASSuite extends SparkFunSuite { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala index 71a3ceac1b94..18fdb6167f83 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala @@ -25,7 +25,7 @@ import org.json4s.jackson.JsonMethods.{parse => parseJson} import org.apache.spark.{SparkException, SparkFunSuite} import org.apache.spark.internal.Logging import org.apache.spark.ml.{linalg => newlinalg} -import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ class VectorsSuite extends SparkFunSuite with Logging { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala index 7c9e14f8cee7..d993470501d2 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala @@ -29,6 +29,7 @@ import org.apache.spark.mllib.linalg.{Matrices, Vector, Vectors} import org.apache.spark.mllib.random.RandomRDDs import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext} import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala index 37eb794b0c5c..cfe7174fe962 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala @@ -26,7 +26,7 @@ import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression._ import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext, MLUtils} -import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ object GradientDescentSuite { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala index 75ae0eb32fb7..4ea446e94258 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala @@ -25,7 +25,7 @@ import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext} -import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ class LBFGSSuite extends SparkFunSuite with MLlibTestSparkContext with Matchers { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala index 4ec3dc0df03b..691a89637dfb 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala @@ -22,7 +22,7 @@ import scala.util.Random import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV} import org.apache.spark.SparkFunSuite -import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ class NNLSSuite extends SparkFunSuite { /** Generate an NNLS problem whose optimal solution is the all-ones vector. */ diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModelSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModelSuite.scala index 2c8ed057a516..8138b299862a 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModelSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModelSuite.scala @@ -19,8 +19,8 @@ package org.apache.spark.mllib.recommendation import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.mllib.util.TestingUtils._ import org.apache.spark.rdd.RDD +import org.apache.spark.util.TestingUtils._ import org.apache.spark.util.Utils class MatrixFactorizationModelSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala index 94da626d92eb..6922375c8ba6 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala @@ -21,7 +21,7 @@ import org.scalatest.Matchers import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ import org.apache.spark.util.Utils class IsotonicRegressionSuite extends SparkFunSuite with MLlibTestSparkContext with Matchers { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala index 46fcebe13274..6c4e0f791c5c 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala @@ -29,6 +29,7 @@ import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.stat.test.ChiSqTest import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ class HypothesisTestSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala index 797e84fcc737..c816807e75d0 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.mllib.stat import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ class MultivariateOnlineSummarizerSuite extends SparkFunSuite { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala index 669d44223d71..098d245ce500 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.mllib.stat.distribution import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.{Matrices, Vectors} import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.TestingUtils._ class MultivariateGaussianSuite extends SparkFunSuite with MLlibTestSparkContext { test("univariate") { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala index 6aa93c907600..e8dc5d2f6344 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala @@ -29,10 +29,10 @@ import org.apache.spark.{SparkException, SparkFunSuite} import org.apache.spark.mllib.linalg.{DenseVector, Matrices, SparseVector, Vectors} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.util.MLUtils._ -import org.apache.spark.mllib.util.TestingUtils._ import org.apache.spark.sql.Row import org.apache.spark.sql.functions.col import org.apache.spark.sql.types.MetadataBuilder +import org.apache.spark.util.TestingUtils._ import org.apache.spark.util.Utils class MLUtilsSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala index 39a6bc37d963..e54aa0f50e2c 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala @@ -20,94 +20,10 @@ package org.apache.spark.mllib.util import org.scalatest.exceptions.TestFailedException import org.apache.spark.mllib.linalg.{Matrix, Vector} +import org.apache.spark.util.TestingUtils._ object TestingUtils { - val ABS_TOL_MSG = " using absolute tolerance" - val REL_TOL_MSG = " using relative tolerance" - - /** - * Private helper function for comparing two values using relative tolerance. - * Note that if x or y is extremely close to zero, i.e., smaller than Double.MinPositiveValue, - * the relative tolerance is meaningless, so the exception will be raised to warn users. - */ - private def RelativeErrorComparison(x: Double, y: Double, eps: Double): Boolean = { - val absX = math.abs(x) - val absY = math.abs(y) - val diff = math.abs(x - y) - if (x == y) { - true - } else if (absX < Double.MinPositiveValue || absY < Double.MinPositiveValue) { - throw new TestFailedException( - s"$x or $y is extremely close to zero, so the relative tolerance is meaningless.", 0) - } else { - diff < eps * math.min(absX, absY) - } - } - - /** - * Private helper function for comparing two values using absolute tolerance. - */ - private def AbsoluteErrorComparison(x: Double, y: Double, eps: Double): Boolean = { - math.abs(x - y) < eps - } - - case class CompareDoubleRightSide( - fun: (Double, Double, Double) => Boolean, y: Double, eps: Double, method: String) - - /** - * Implicit class for comparing two double values using relative tolerance or absolute tolerance. - */ - implicit class DoubleWithAlmostEquals(val x: Double) { - - /** - * When the difference of two values are within eps, returns true; otherwise, returns false. - */ - def ~=(r: CompareDoubleRightSide): Boolean = r.fun(x, r.y, r.eps) - - /** - * When the difference of two values are within eps, returns false; otherwise, returns true. - */ - def !~=(r: CompareDoubleRightSide): Boolean = !r.fun(x, r.y, r.eps) - - /** - * Throws exception when the difference of two values are NOT within eps; - * otherwise, returns true. - */ - def ~==(r: CompareDoubleRightSide): Boolean = { - if (!r.fun(x, r.y, r.eps)) { - throw new TestFailedException( - s"Expected $x and ${r.y} to be within ${r.eps}${r.method}.", 0) - } - true - } - - /** - * Throws exception when the difference of two values are within eps; otherwise, returns true. - */ - def !~==(r: CompareDoubleRightSide): Boolean = { - if (r.fun(x, r.y, r.eps)) { - throw new TestFailedException( - s"Did not expect $x and ${r.y} to be within ${r.eps}${r.method}.", 0) - } - true - } - - /** - * Comparison using absolute tolerance. - */ - def absTol(eps: Double): CompareDoubleRightSide = - CompareDoubleRightSide(AbsoluteErrorComparison, x, eps, ABS_TOL_MSG) - - /** - * Comparison using relative tolerance. - */ - def relTol(eps: Double): CompareDoubleRightSide = - CompareDoubleRightSide(RelativeErrorComparison, x, eps, REL_TOL_MSG) - - override def toString: String = x.toString - } - case class CompareVectorRightSide( fun: (Vector, Vector, Double) => Boolean, y: Vector, eps: Double, method: String) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala index 1aff44480aac..c3f33b5f8d18 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala @@ -25,81 +25,6 @@ import org.apache.spark.mllib.util.TestingUtils._ class TestingUtilsSuite extends SparkFunSuite { - test("Comparing doubles using relative error.") { - - assert(23.1 ~== 23.52 relTol 0.02) - assert(23.1 ~== 22.74 relTol 0.02) - assert(23.1 ~= 23.52 relTol 0.02) - assert(23.1 ~= 22.74 relTol 0.02) - assert(!(23.1 !~= 23.52 relTol 0.02)) - assert(!(23.1 !~= 22.74 relTol 0.02)) - - // Should throw exception with message when test fails. - intercept[TestFailedException](23.1 !~== 23.52 relTol 0.02) - intercept[TestFailedException](23.1 !~== 22.74 relTol 0.02) - intercept[TestFailedException](23.1 ~== 23.63 relTol 0.02) - intercept[TestFailedException](23.1 ~== 22.34 relTol 0.02) - - assert(23.1 !~== 23.63 relTol 0.02) - assert(23.1 !~== 22.34 relTol 0.02) - assert(23.1 !~= 23.63 relTol 0.02) - assert(23.1 !~= 22.34 relTol 0.02) - assert(!(23.1 ~= 23.63 relTol 0.02)) - assert(!(23.1 ~= 22.34 relTol 0.02)) - - // Comparing against zero should fail the test and throw exception with message - // saying that the relative error is meaningless in this situation. - intercept[TestFailedException](0.1 ~== 0.0 relTol 0.032) - intercept[TestFailedException](0.1 ~= 0.0 relTol 0.032) - intercept[TestFailedException](0.1 !~== 0.0 relTol 0.032) - intercept[TestFailedException](0.1 !~= 0.0 relTol 0.032) - intercept[TestFailedException](0.0 ~== 0.1 relTol 0.032) - intercept[TestFailedException](0.0 ~= 0.1 relTol 0.032) - intercept[TestFailedException](0.0 !~== 0.1 relTol 0.032) - intercept[TestFailedException](0.0 !~= 0.1 relTol 0.032) - - // Comparisons of numbers very close to zero. - assert(10 * Double.MinPositiveValue ~== 9.5 * Double.MinPositiveValue relTol 0.01) - assert(10 * Double.MinPositiveValue !~== 11 * Double.MinPositiveValue relTol 0.01) - - assert(-Double.MinPositiveValue ~== 1.18 * -Double.MinPositiveValue relTol 0.012) - assert(-Double.MinPositiveValue ~== 1.38 * -Double.MinPositiveValue relTol 0.012) - } - - test("Comparing doubles using absolute error.") { - - assert(17.8 ~== 17.99 absTol 0.2) - assert(17.8 ~== 17.61 absTol 0.2) - assert(17.8 ~= 17.99 absTol 0.2) - assert(17.8 ~= 17.61 absTol 0.2) - assert(!(17.8 !~= 17.99 absTol 0.2)) - assert(!(17.8 !~= 17.61 absTol 0.2)) - - // Should throw exception with message when test fails. - intercept[TestFailedException](17.8 !~== 17.99 absTol 0.2) - intercept[TestFailedException](17.8 !~== 17.61 absTol 0.2) - intercept[TestFailedException](17.8 ~== 18.01 absTol 0.2) - intercept[TestFailedException](17.8 ~== 17.59 absTol 0.2) - - assert(17.8 !~== 18.01 absTol 0.2) - assert(17.8 !~== 17.59 absTol 0.2) - assert(17.8 !~= 18.01 absTol 0.2) - assert(17.8 !~= 17.59 absTol 0.2) - assert(!(17.8 ~= 18.01 absTol 0.2)) - assert(!(17.8 ~= 17.59 absTol 0.2)) - - // Comparisons of numbers very close to zero, and both side of zeros - assert( - Double.MinPositiveValue ~== 4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue) - assert( - Double.MinPositiveValue !~== 6 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue) - - assert( - -Double.MinPositiveValue ~== 3 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue) - assert( - Double.MinPositiveValue !~== -4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue) - } - test("Comparing vectors using relative error.") { // Comparisons of two dense vectors diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala index 6e7eaf110bd7..65801b429656 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala @@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.optimizer.SimpleTestOptimizer import org.apache.spark.sql.catalyst.plans.logical.{OneRowRelation, Project} import org.apache.spark.sql.catalyst.util.MapData import org.apache.spark.sql.types.DataType +import org.apache.spark.util.TestingUtils._ import org.apache.spark.util.Utils /** @@ -292,26 +293,10 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks { case (result: Double, expected: Double) if result.isNaN && expected.isNaN => true case (result: Double, expected: Double) => - compareDoubles(result, expected) + result ~== expected relTol 1E-10 case (result: Float, expected: Float) if result.isNaN && expected.isNaN => true case _ => result == expected } } - - /** - * Check the equality of two [[Double]] values, allows a tolerance within a certain percentage - * range. - */ - private def compareDoubles( - result: Double, - expected: Double, - tolerance: Double = 1E-10): Boolean = { - if ((result.isNaN && expected.isNaN) || result == expected) { - return true - } - - val spread = Spread[Double](expected, expected.abs * tolerance) - spread.isWithin(result) - } } From b4ad8e3ec3635be48e37220fd8f9dba1ef583ac3 Mon Sep 17 00:00:00 2001 From: jiangxingbo Date: Wed, 14 Sep 2016 16:47:08 +0800 Subject: [PATCH 3/5] Revert "use relTol to compare double values." This reverts commit 1721e0c026f2bafbc73cb400f434c37b2921110a. --- .../org/apache/spark/util/TestingUtils.scala | 108 ------------------ .../apache/spark/util/TestingUtilsSuite.scala | 101 ---------------- .../RandomForestClassifierSuite.scala | 2 +- .../evaluation/RegressionEvaluatorSuite.scala | 2 +- .../LogisticRegressionSuite.scala | 1 - .../classification/NaiveBayesSuite.scala | 2 +- .../StreamingLogisticRegressionSuite.scala | 2 +- .../clustering/BisectingKMeansSuite.scala | 1 - .../clustering/GaussianMixtureSuite.scala | 1 - .../spark/mllib/clustering/LDASuite.scala | 1 - .../PowerIterationClusteringSuite.scala | 2 +- .../clustering/StreamingKMeansSuite.scala | 1 - .../evaluation/AreaUnderCurveSuite.scala | 2 +- .../BinaryClassificationMetricsSuite.scala | 2 +- .../evaluation/RankingMetricsSuite.scala | 2 +- .../evaluation/RegressionMetricsSuite.scala | 2 +- .../apache/spark/mllib/feature/IDFSuite.scala | 1 - .../spark/mllib/feature/NormalizerSuite.scala | 1 - .../apache/spark/mllib/linalg/BLASSuite.scala | 1 - .../spark/mllib/linalg/VectorsSuite.scala | 2 +- .../linalg/distributed/RowMatrixSuite.scala | 1 - .../optimization/GradientDescentSuite.scala | 2 +- .../spark/mllib/optimization/LBFGSSuite.scala | 2 +- .../spark/mllib/optimization/NNLSSuite.scala | 2 +- .../MatrixFactorizationModelSuite.scala | 2 +- .../regression/IsotonicRegressionSuite.scala | 2 +- .../mllib/stat/HypothesisTestSuite.scala | 1 - .../MultivariateOnlineSummarizerSuite.scala | 1 - .../MultivariateGaussianSuite.scala | 2 +- .../spark/mllib/util/MLUtilsSuite.scala | 2 +- .../spark/mllib/util/TestingUtils.scala | 86 +++++++++++++- .../spark/mllib/util/TestingUtilsSuite.scala | 75 ++++++++++++ .../expressions/ExpressionEvalHelper.scala | 19 ++- 33 files changed, 194 insertions(+), 240 deletions(-) delete mode 100644 core/src/test/scala/org/apache/spark/util/TestingUtils.scala delete mode 100644 core/src/test/scala/org/apache/spark/util/TestingUtilsSuite.scala diff --git a/core/src/test/scala/org/apache/spark/util/TestingUtils.scala b/core/src/test/scala/org/apache/spark/util/TestingUtils.scala deleted file mode 100644 index 2bc7f982458e..000000000000 --- a/core/src/test/scala/org/apache/spark/util/TestingUtils.scala +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.util - -import org.scalatest.exceptions.TestFailedException - -object TestingUtils { - - val ABS_TOL_MSG = " using absolute tolerance" - val REL_TOL_MSG = " using relative tolerance" - - /** - * Private helper function for comparing two values using relative tolerance. - * Note that if x or y is extremely close to zero, i.e., smaller than Double.MinPositiveValue, - * the relative tolerance is meaningless, so the exception will be raised to warn users. - */ - private def RelativeErrorComparison(x: Double, y: Double, eps: Double): Boolean = { - val absX = math.abs(x) - val absY = math.abs(y) - val diff = math.abs(x - y) - if (x == y) { - true - } else if (absX < Double.MinPositiveValue || absY < Double.MinPositiveValue) { - throw new TestFailedException( - s"$x or $y is extremely close to zero, so the relative tolerance is meaningless.", 0) - } else { - diff < eps * math.min(absX, absY) - } - } - - /** - * Private helper function for comparing two values using absolute tolerance. - */ - private def AbsoluteErrorComparison(x: Double, y: Double, eps: Double): Boolean = { - math.abs(x - y) < eps - } - - case class CompareDoubleRightSide( - fun: (Double, Double, Double) => Boolean, y: Double, eps: Double, method: String) - - /** - * Implicit class for comparing two double values using relative tolerance or absolute tolerance. - */ - implicit class DoubleWithAlmostEquals(val x: Double) { - - /** - * When the difference of two values are within eps, returns true; otherwise, returns false. - */ - def ~=(r: CompareDoubleRightSide): Boolean = r.fun(x, r.y, r.eps) - - /** - * When the difference of two values are within eps, returns false; otherwise, returns true. - */ - def !~=(r: CompareDoubleRightSide): Boolean = !r.fun(x, r.y, r.eps) - - /** - * Throws exception when the difference of two values are NOT within eps; - * otherwise, returns true. - */ - def ~==(r: CompareDoubleRightSide): Boolean = { - if (!r.fun(x, r.y, r.eps)) { - throw new TestFailedException( - s"Expected $x and ${r.y} to be within ${r.eps}${r.method}.", 0) - } - true - } - - /** - * Throws exception when the difference of two values are within eps; otherwise, returns true. - */ - def !~==(r: CompareDoubleRightSide): Boolean = { - if (r.fun(x, r.y, r.eps)) { - throw new TestFailedException( - s"Did not expect $x and ${r.y} to be within ${r.eps}${r.method}.", 0) - } - true - } - - /** - * Comparison using absolute tolerance. - */ - def absTol(eps: Double): CompareDoubleRightSide = - CompareDoubleRightSide(AbsoluteErrorComparison, x, eps, ABS_TOL_MSG) - - /** - * Comparison using relative tolerance. - */ - def relTol(eps: Double): CompareDoubleRightSide = - CompareDoubleRightSide(RelativeErrorComparison, x, eps, REL_TOL_MSG) - - override def toString: String = x.toString - } -} diff --git a/core/src/test/scala/org/apache/spark/util/TestingUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/TestingUtilsSuite.scala deleted file mode 100644 index 0b7b26aa9792..000000000000 --- a/core/src/test/scala/org/apache/spark/util/TestingUtilsSuite.scala +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.util - -import org.scalatest.exceptions.TestFailedException - -import org.apache.spark.SparkFunSuite -import org.apache.spark.util.TestingUtils._ - -class TestingUtilsSuite extends SparkFunSuite { - - test("Comparing doubles using relative error.") { - - assert(23.1 ~== 23.52 relTol 0.02) - assert(23.1 ~== 22.74 relTol 0.02) - assert(23.1 ~= 23.52 relTol 0.02) - assert(23.1 ~= 22.74 relTol 0.02) - assert(!(23.1 !~= 23.52 relTol 0.02)) - assert(!(23.1 !~= 22.74 relTol 0.02)) - - // Should throw exception with message when test fails. - intercept[TestFailedException](23.1 !~== 23.52 relTol 0.02) - intercept[TestFailedException](23.1 !~== 22.74 relTol 0.02) - intercept[TestFailedException](23.1 ~== 23.63 relTol 0.02) - intercept[TestFailedException](23.1 ~== 22.34 relTol 0.02) - - assert(23.1 !~== 23.63 relTol 0.02) - assert(23.1 !~== 22.34 relTol 0.02) - assert(23.1 !~= 23.63 relTol 0.02) - assert(23.1 !~= 22.34 relTol 0.02) - assert(!(23.1 ~= 23.63 relTol 0.02)) - assert(!(23.1 ~= 22.34 relTol 0.02)) - - // Comparing against zero should fail the test and throw exception with message - // saying that the relative error is meaningless in this situation. - intercept[TestFailedException](0.1 ~== 0.0 relTol 0.032) - intercept[TestFailedException](0.1 ~= 0.0 relTol 0.032) - intercept[TestFailedException](0.1 !~== 0.0 relTol 0.032) - intercept[TestFailedException](0.1 !~= 0.0 relTol 0.032) - intercept[TestFailedException](0.0 ~== 0.1 relTol 0.032) - intercept[TestFailedException](0.0 ~= 0.1 relTol 0.032) - intercept[TestFailedException](0.0 !~== 0.1 relTol 0.032) - intercept[TestFailedException](0.0 !~= 0.1 relTol 0.032) - - // Comparisons of numbers very close to zero. - assert(10 * Double.MinPositiveValue ~== 9.5 * Double.MinPositiveValue relTol 0.01) - assert(10 * Double.MinPositiveValue !~== 11 * Double.MinPositiveValue relTol 0.01) - - assert(-Double.MinPositiveValue ~== 1.18 * -Double.MinPositiveValue relTol 0.012) - assert(-Double.MinPositiveValue ~== 1.38 * -Double.MinPositiveValue relTol 0.012) - } - - test("Comparing doubles using absolute error.") { - - assert(17.8 ~== 17.99 absTol 0.2) - assert(17.8 ~== 17.61 absTol 0.2) - assert(17.8 ~= 17.99 absTol 0.2) - assert(17.8 ~= 17.61 absTol 0.2) - assert(!(17.8 !~= 17.99 absTol 0.2)) - assert(!(17.8 !~= 17.61 absTol 0.2)) - - // Should throw exception with message when test fails. - intercept[TestFailedException](17.8 !~== 17.99 absTol 0.2) - intercept[TestFailedException](17.8 !~== 17.61 absTol 0.2) - intercept[TestFailedException](17.8 ~== 18.01 absTol 0.2) - intercept[TestFailedException](17.8 ~== 17.59 absTol 0.2) - - assert(17.8 !~== 18.01 absTol 0.2) - assert(17.8 !~== 17.59 absTol 0.2) - assert(17.8 !~= 18.01 absTol 0.2) - assert(17.8 !~= 17.59 absTol 0.2) - assert(!(17.8 ~= 18.01 absTol 0.2)) - assert(!(17.8 ~= 17.59 absTol 0.2)) - - // Comparisons of numbers very close to zero, and both side of zeros - assert( - Double.MinPositiveValue ~== 4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue) - assert( - Double.MinPositiveValue !~== 6 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue) - - assert( - -Double.MinPositiveValue ~== 3 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue) - assert( - Double.MinPositiveValue !~== -4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue) - } -} diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala index 9cbba515a394..2e99ee157ae9 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala @@ -28,9 +28,9 @@ import org.apache.spark.mllib.regression.{LabeledPoint => OldLabeledPoint} import org.apache.spark.mllib.tree.{EnsembleTestHelper, RandomForest => OldRandomForest} import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo} import org.apache.spark.mllib.util.MLlibTestSparkContext +import org.apache.spark.mllib.util.TestingUtils._ import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, Row} -import org.apache.spark.util.TestingUtils._ /** * Test suite for [[RandomForestClassifier]]. diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala index 3809afb43302..42ff8adf6bd6 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala @@ -22,7 +22,7 @@ import org.apache.spark.ml.param.ParamsSuite import org.apache.spark.ml.regression.LinearRegression import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils} import org.apache.spark.mllib.util.{LinearDataGenerator, MLlibTestSparkContext} -import org.apache.spark.util.TestingUtils._ +import org.apache.spark.mllib.util.TestingUtils._ class RegressionEvaluatorSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala index 4cd113d18afa..5cf437776851 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala @@ -30,7 +30,6 @@ import org.apache.spark.mllib.regression._ import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext} import org.apache.spark.mllib.util.TestingUtils._ import org.apache.spark.rdd.RDD -import org.apache.spark.util.TestingUtils._ import org.apache.spark.util.Utils diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala index 34e3593d746d..5ec4c15387e9 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala @@ -27,7 +27,7 @@ import org.apache.spark.{SparkException, SparkFunSuite} import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext} -import org.apache.spark.util.TestingUtils._ +import org.apache.spark.mllib.util.TestingUtils._ import org.apache.spark.util.Utils object NaiveBayesSuite { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala index 3ffc02bf6b3d..5f797a60f09e 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala @@ -22,9 +22,9 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.LabeledPoint +import org.apache.spark.mllib.util.TestingUtils._ import org.apache.spark.streaming.{StreamingContext, TestSuiteBase} import org.apache.spark.streaming.dstream.DStream -import org.apache.spark.util.TestingUtils._ class StreamingLogisticRegressionSuite extends SparkFunSuite with TestSuiteBase { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/BisectingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/BisectingKMeansSuite.scala index 3bee799a909d..35f7932ae822 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/BisectingKMeansSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/BisectingKMeansSuite.scala @@ -21,7 +21,6 @@ import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ -import org.apache.spark.util.TestingUtils._ import org.apache.spark.util.Utils class BisectingKMeansSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala index 670f6b21c2c9..67e680be7330 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala @@ -22,7 +22,6 @@ import org.apache.spark.mllib.linalg.{Matrices, Vector, Vectors} import org.apache.spark.mllib.stat.distribution.MultivariateGaussian import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ -import org.apache.spark.util.TestingUtils._ import org.apache.spark.util.Utils class GaussianMixtureSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala index 516dcb4d1070..211e2bc026c7 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala @@ -26,7 +26,6 @@ import org.apache.spark.graphx.Edge import org.apache.spark.mllib.linalg.{DenseMatrix, Matrix, Vector, Vectors} import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ -import org.apache.spark.util.TestingUtils._ import org.apache.spark.util.Utils class LDASuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala index 7733492c30df..b33b86b39a42 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala @@ -23,7 +23,7 @@ import scala.util.Random import org.apache.spark.{SparkContext, SparkFunSuite} import org.apache.spark.graphx.{Edge, Graph} import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.util.TestingUtils._ +import org.apache.spark.mllib.util.TestingUtils._ import org.apache.spark.util.Utils class PowerIterationClusteringSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala index 8a159c1c8f66..fdaa098345d1 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala @@ -23,7 +23,6 @@ import org.apache.spark.mllib.util.TestingUtils._ import org.apache.spark.streaming.{StreamingContext, TestSuiteBase} import org.apache.spark.streaming.dstream.DStream import org.apache.spark.util.random.XORShiftRandom -import org.apache.spark.util.TestingUtils._ class StreamingKMeansSuite extends SparkFunSuite with TestSuiteBase { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/AreaUnderCurveSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/AreaUnderCurveSuite.scala index a2ac142ebd7b..87ccc7eda44e 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/AreaUnderCurveSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/AreaUnderCurveSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.evaluation import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.util.TestingUtils._ +import org.apache.spark.mllib.util.TestingUtils._ class AreaUnderCurveSuite extends SparkFunSuite with MLlibTestSparkContext { test("auc computation") { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala index 9c740d4e1e8f..99d52fabc530 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.evaluation import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.util.TestingUtils._ +import org.apache.spark.mllib.util.TestingUtils._ class BinaryClassificationMetricsSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala index a866ef7ed685..8e9d910e646c 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.evaluation import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.util.TestingUtils._ +import org.apache.spark.mllib.util.TestingUtils._ class RankingMetricsSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala index f848be028246..f1d517383643 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.evaluation import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.util.TestingUtils._ +import org.apache.spark.mllib.util.TestingUtils._ class RegressionMetricsSuite extends SparkFunSuite with MLlibTestSparkContext { val obs = List[Double](77, 85, 62, 55, 63, 88, 57, 81, 51) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala index f7f2fce7d66d..5c938a61ed99 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala @@ -21,7 +21,6 @@ import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors} import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ -import org.apache.spark.util.TestingUtils._ class IDFSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala index 514f8b6b566a..10f7bafd6cf5 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala @@ -23,7 +23,6 @@ import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vectors} import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ -import org.apache.spark.util.TestingUtils._ class NormalizerSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala index e99cc3fb725a..80da03cc2efe 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala @@ -20,7 +20,6 @@ package org.apache.spark.mllib.linalg import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.BLAS._ import org.apache.spark.mllib.util.TestingUtils._ -import org.apache.spark.util.TestingUtils._ class BLASSuite extends SparkFunSuite { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala index 18fdb6167f83..71a3ceac1b94 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala @@ -25,7 +25,7 @@ import org.json4s.jackson.JsonMethods.{parse => parseJson} import org.apache.spark.{SparkException, SparkFunSuite} import org.apache.spark.internal.Logging import org.apache.spark.ml.{linalg => newlinalg} -import org.apache.spark.util.TestingUtils._ +import org.apache.spark.mllib.util.TestingUtils._ class VectorsSuite extends SparkFunSuite with Logging { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala index d993470501d2..7c9e14f8cee7 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala @@ -29,7 +29,6 @@ import org.apache.spark.mllib.linalg.{Matrices, Vector, Vectors} import org.apache.spark.mllib.random.RandomRDDs import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext} import org.apache.spark.mllib.util.TestingUtils._ -import org.apache.spark.util.TestingUtils._ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala index cfe7174fe962..37eb794b0c5c 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala @@ -26,7 +26,7 @@ import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression._ import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext, MLUtils} -import org.apache.spark.util.TestingUtils._ +import org.apache.spark.mllib.util.TestingUtils._ object GradientDescentSuite { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala index 4ea446e94258..75ae0eb32fb7 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala @@ -25,7 +25,7 @@ import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext} -import org.apache.spark.util.TestingUtils._ +import org.apache.spark.mllib.util.TestingUtils._ class LBFGSSuite extends SparkFunSuite with MLlibTestSparkContext with Matchers { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala index 691a89637dfb..4ec3dc0df03b 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala @@ -22,7 +22,7 @@ import scala.util.Random import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV} import org.apache.spark.SparkFunSuite -import org.apache.spark.util.TestingUtils._ +import org.apache.spark.mllib.util.TestingUtils._ class NNLSSuite extends SparkFunSuite { /** Generate an NNLS problem whose optimal solution is the all-ones vector. */ diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModelSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModelSuite.scala index 8138b299862a..2c8ed057a516 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModelSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModelSuite.scala @@ -19,8 +19,8 @@ package org.apache.spark.mllib.recommendation import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext +import org.apache.spark.mllib.util.TestingUtils._ import org.apache.spark.rdd.RDD -import org.apache.spark.util.TestingUtils._ import org.apache.spark.util.Utils class MatrixFactorizationModelSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala index 6922375c8ba6..94da626d92eb 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala @@ -21,7 +21,7 @@ import org.scalatest.Matchers import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.util.TestingUtils._ +import org.apache.spark.mllib.util.TestingUtils._ import org.apache.spark.util.Utils class IsotonicRegressionSuite extends SparkFunSuite with MLlibTestSparkContext with Matchers { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala index 6c4e0f791c5c..46fcebe13274 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala @@ -29,7 +29,6 @@ import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.stat.test.ChiSqTest import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ -import org.apache.spark.util.TestingUtils._ class HypothesisTestSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala index c816807e75d0..797e84fcc737 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala @@ -20,7 +20,6 @@ package org.apache.spark.mllib.stat import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.util.TestingUtils._ -import org.apache.spark.util.TestingUtils._ class MultivariateOnlineSummarizerSuite extends SparkFunSuite { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala index 098d245ce500..669d44223d71 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.mllib.stat.distribution import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.{Matrices, Vectors} import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.util.TestingUtils._ +import org.apache.spark.mllib.util.TestingUtils._ class MultivariateGaussianSuite extends SparkFunSuite with MLlibTestSparkContext { test("univariate") { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala index e8dc5d2f6344..6aa93c907600 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala @@ -29,10 +29,10 @@ import org.apache.spark.{SparkException, SparkFunSuite} import org.apache.spark.mllib.linalg.{DenseVector, Matrices, SparseVector, Vectors} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.util.MLUtils._ +import org.apache.spark.mllib.util.TestingUtils._ import org.apache.spark.sql.Row import org.apache.spark.sql.functions.col import org.apache.spark.sql.types.MetadataBuilder -import org.apache.spark.util.TestingUtils._ import org.apache.spark.util.Utils class MLUtilsSuite extends SparkFunSuite with MLlibTestSparkContext { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala index e54aa0f50e2c..39a6bc37d963 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala @@ -20,10 +20,94 @@ package org.apache.spark.mllib.util import org.scalatest.exceptions.TestFailedException import org.apache.spark.mllib.linalg.{Matrix, Vector} -import org.apache.spark.util.TestingUtils._ object TestingUtils { + val ABS_TOL_MSG = " using absolute tolerance" + val REL_TOL_MSG = " using relative tolerance" + + /** + * Private helper function for comparing two values using relative tolerance. + * Note that if x or y is extremely close to zero, i.e., smaller than Double.MinPositiveValue, + * the relative tolerance is meaningless, so the exception will be raised to warn users. + */ + private def RelativeErrorComparison(x: Double, y: Double, eps: Double): Boolean = { + val absX = math.abs(x) + val absY = math.abs(y) + val diff = math.abs(x - y) + if (x == y) { + true + } else if (absX < Double.MinPositiveValue || absY < Double.MinPositiveValue) { + throw new TestFailedException( + s"$x or $y is extremely close to zero, so the relative tolerance is meaningless.", 0) + } else { + diff < eps * math.min(absX, absY) + } + } + + /** + * Private helper function for comparing two values using absolute tolerance. + */ + private def AbsoluteErrorComparison(x: Double, y: Double, eps: Double): Boolean = { + math.abs(x - y) < eps + } + + case class CompareDoubleRightSide( + fun: (Double, Double, Double) => Boolean, y: Double, eps: Double, method: String) + + /** + * Implicit class for comparing two double values using relative tolerance or absolute tolerance. + */ + implicit class DoubleWithAlmostEquals(val x: Double) { + + /** + * When the difference of two values are within eps, returns true; otherwise, returns false. + */ + def ~=(r: CompareDoubleRightSide): Boolean = r.fun(x, r.y, r.eps) + + /** + * When the difference of two values are within eps, returns false; otherwise, returns true. + */ + def !~=(r: CompareDoubleRightSide): Boolean = !r.fun(x, r.y, r.eps) + + /** + * Throws exception when the difference of two values are NOT within eps; + * otherwise, returns true. + */ + def ~==(r: CompareDoubleRightSide): Boolean = { + if (!r.fun(x, r.y, r.eps)) { + throw new TestFailedException( + s"Expected $x and ${r.y} to be within ${r.eps}${r.method}.", 0) + } + true + } + + /** + * Throws exception when the difference of two values are within eps; otherwise, returns true. + */ + def !~==(r: CompareDoubleRightSide): Boolean = { + if (r.fun(x, r.y, r.eps)) { + throw new TestFailedException( + s"Did not expect $x and ${r.y} to be within ${r.eps}${r.method}.", 0) + } + true + } + + /** + * Comparison using absolute tolerance. + */ + def absTol(eps: Double): CompareDoubleRightSide = + CompareDoubleRightSide(AbsoluteErrorComparison, x, eps, ABS_TOL_MSG) + + /** + * Comparison using relative tolerance. + */ + def relTol(eps: Double): CompareDoubleRightSide = + CompareDoubleRightSide(RelativeErrorComparison, x, eps, REL_TOL_MSG) + + override def toString: String = x.toString + } + case class CompareVectorRightSide( fun: (Vector, Vector, Double) => Boolean, y: Vector, eps: Double, method: String) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala index c3f33b5f8d18..1aff44480aac 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala @@ -25,6 +25,81 @@ import org.apache.spark.mllib.util.TestingUtils._ class TestingUtilsSuite extends SparkFunSuite { + test("Comparing doubles using relative error.") { + + assert(23.1 ~== 23.52 relTol 0.02) + assert(23.1 ~== 22.74 relTol 0.02) + assert(23.1 ~= 23.52 relTol 0.02) + assert(23.1 ~= 22.74 relTol 0.02) + assert(!(23.1 !~= 23.52 relTol 0.02)) + assert(!(23.1 !~= 22.74 relTol 0.02)) + + // Should throw exception with message when test fails. + intercept[TestFailedException](23.1 !~== 23.52 relTol 0.02) + intercept[TestFailedException](23.1 !~== 22.74 relTol 0.02) + intercept[TestFailedException](23.1 ~== 23.63 relTol 0.02) + intercept[TestFailedException](23.1 ~== 22.34 relTol 0.02) + + assert(23.1 !~== 23.63 relTol 0.02) + assert(23.1 !~== 22.34 relTol 0.02) + assert(23.1 !~= 23.63 relTol 0.02) + assert(23.1 !~= 22.34 relTol 0.02) + assert(!(23.1 ~= 23.63 relTol 0.02)) + assert(!(23.1 ~= 22.34 relTol 0.02)) + + // Comparing against zero should fail the test and throw exception with message + // saying that the relative error is meaningless in this situation. + intercept[TestFailedException](0.1 ~== 0.0 relTol 0.032) + intercept[TestFailedException](0.1 ~= 0.0 relTol 0.032) + intercept[TestFailedException](0.1 !~== 0.0 relTol 0.032) + intercept[TestFailedException](0.1 !~= 0.0 relTol 0.032) + intercept[TestFailedException](0.0 ~== 0.1 relTol 0.032) + intercept[TestFailedException](0.0 ~= 0.1 relTol 0.032) + intercept[TestFailedException](0.0 !~== 0.1 relTol 0.032) + intercept[TestFailedException](0.0 !~= 0.1 relTol 0.032) + + // Comparisons of numbers very close to zero. + assert(10 * Double.MinPositiveValue ~== 9.5 * Double.MinPositiveValue relTol 0.01) + assert(10 * Double.MinPositiveValue !~== 11 * Double.MinPositiveValue relTol 0.01) + + assert(-Double.MinPositiveValue ~== 1.18 * -Double.MinPositiveValue relTol 0.012) + assert(-Double.MinPositiveValue ~== 1.38 * -Double.MinPositiveValue relTol 0.012) + } + + test("Comparing doubles using absolute error.") { + + assert(17.8 ~== 17.99 absTol 0.2) + assert(17.8 ~== 17.61 absTol 0.2) + assert(17.8 ~= 17.99 absTol 0.2) + assert(17.8 ~= 17.61 absTol 0.2) + assert(!(17.8 !~= 17.99 absTol 0.2)) + assert(!(17.8 !~= 17.61 absTol 0.2)) + + // Should throw exception with message when test fails. + intercept[TestFailedException](17.8 !~== 17.99 absTol 0.2) + intercept[TestFailedException](17.8 !~== 17.61 absTol 0.2) + intercept[TestFailedException](17.8 ~== 18.01 absTol 0.2) + intercept[TestFailedException](17.8 ~== 17.59 absTol 0.2) + + assert(17.8 !~== 18.01 absTol 0.2) + assert(17.8 !~== 17.59 absTol 0.2) + assert(17.8 !~= 18.01 absTol 0.2) + assert(17.8 !~= 17.59 absTol 0.2) + assert(!(17.8 ~= 18.01 absTol 0.2)) + assert(!(17.8 ~= 17.59 absTol 0.2)) + + // Comparisons of numbers very close to zero, and both side of zeros + assert( + Double.MinPositiveValue ~== 4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue) + assert( + Double.MinPositiveValue !~== 6 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue) + + assert( + -Double.MinPositiveValue ~== 3 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue) + assert( + Double.MinPositiveValue !~== -4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue) + } + test("Comparing vectors using relative error.") { // Comparisons of two dense vectors diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala index 65801b429656..6e7eaf110bd7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala @@ -28,7 +28,6 @@ import org.apache.spark.sql.catalyst.optimizer.SimpleTestOptimizer import org.apache.spark.sql.catalyst.plans.logical.{OneRowRelation, Project} import org.apache.spark.sql.catalyst.util.MapData import org.apache.spark.sql.types.DataType -import org.apache.spark.util.TestingUtils._ import org.apache.spark.util.Utils /** @@ -293,10 +292,26 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks { case (result: Double, expected: Double) if result.isNaN && expected.isNaN => true case (result: Double, expected: Double) => - result ~== expected relTol 1E-10 + compareDoubles(result, expected) case (result: Float, expected: Float) if result.isNaN && expected.isNaN => true case _ => result == expected } } + + /** + * Check the equality of two [[Double]] values, allows a tolerance within a certain percentage + * range. + */ + private def compareDoubles( + result: Double, + expected: Double, + tolerance: Double = 1E-10): Boolean = { + if ((result.isNaN && expected.isNaN) || result == expected) { + return true + } + + val spread = Spread[Double](expected, expected.abs * tolerance) + spread.isWithin(result) + } } From f4ef207603f5754bfa14506139c1e5d13ecac60f Mon Sep 17 00:00:00 2001 From: jiangxingbo Date: Wed, 14 Sep 2016 17:31:16 +0800 Subject: [PATCH 4/5] inline the relative tolerance logic. --- .../expressions/ExpressionEvalHelper.scala | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala index 6e7eaf110bd7..9bf6fd887804 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import org.scalacheck.Gen import org.scalactic.TripleEqualsSupport.Spread +import org.scalatest.exceptions.TestFailedException import org.scalatest.prop.GeneratorDrivenPropertyChecks import org.apache.spark.SparkFunSuite @@ -292,7 +293,7 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks { case (result: Double, expected: Double) if result.isNaN && expected.isNaN => true case (result: Double, expected: Double) => - compareDoubles(result, expected) + relativeErrorComparison(result, expected) case (result: Float, expected: Float) if result.isNaN && expected.isNaN => true case _ => result == expected @@ -300,18 +301,21 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks { } /** - * Check the equality of two [[Double]] values, allows a tolerance within a certain percentage - * range. + * Private helper function for comparing two values using relative tolerance. + * Note that if x or y is extremely close to zero, i.e., smaller than Double.MinPositiveValue, + * the relative tolerance is meaningless, so the exception will be raised to warn users. */ - private def compareDoubles( - result: Double, - expected: Double, - tolerance: Double = 1E-10): Boolean = { - if ((result.isNaN && expected.isNaN) || result == expected) { - return true + private def relativeErrorComparison(x: Double, y: Double, eps: Double = 1E-8): Boolean = { + val absX = math.abs(x) + val absY = math.abs(y) + val diff = math.abs(x - y) + if (x == y) { + true + } else if (absX < Double.MinPositiveValue || absY < Double.MinPositiveValue) { + throw new TestFailedException( + s"$x or $y is extremely close to zero, so the relative tolerance is meaningless.", 0) + } else { + diff < eps * math.min(absX, absY) } - - val spread = Spread[Double](expected, expected.abs * tolerance) - spread.isWithin(result) } } From 6f91656db006647705f86b5eef7e3d02727b99a4 Mon Sep 17 00:00:00 2001 From: jiangxingbo Date: Wed, 14 Sep 2016 18:44:12 +0800 Subject: [PATCH 5/5] add comment. --- .../sql/catalyst/expressions/ExpressionEvalHelper.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala index 9bf6fd887804..f0c149c02b9a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala @@ -304,6 +304,11 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks { * Private helper function for comparing two values using relative tolerance. * Note that if x or y is extremely close to zero, i.e., smaller than Double.MinPositiveValue, * the relative tolerance is meaningless, so the exception will be raised to warn users. + * + * TODO: this duplicates functions in spark.ml.util.TestingUtils.relTol and + * spark.mllib.util.TestingUtils.relTol, they could be moved to common utils sub module for the + * whole spark project which does not depend on other modules. See more detail in discussion: + * https://github.com/apache/spark/pull/15059#issuecomment-246940444 */ private def relativeErrorComparison(x: Double, y: Double, eps: Double = 1E-8): Boolean = { val absX = math.abs(x)