Skip to content

Commit ad9d7ac

Browse files
committed
Add some more tests
1 parent 30ae934 commit ad9d7ac

File tree

8 files changed

+40
-33
lines changed

8 files changed

+40
-33
lines changed

mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ import org.apache.spark.sql.{DataFrame, Row}
2626

2727
class BinarizerSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
2828

29+
import testImplicits._
30+
2931
@transient var data: Array[Double] = _
3032

3133
override def beforeAll(): Unit = {
@@ -39,8 +41,7 @@ class BinarizerSuite extends SparkFunSuite with MLlibTestSparkContext with Defau
3941

4042
test("Binarize continuous features with default parameter") {
4143
val defaultBinarized: Array[Double] = data.map(x => if (x > 0.0) 1.0 else 0.0)
42-
val dataFrame: DataFrame = spark.createDataFrame(
43-
data.zip(defaultBinarized)).toDF("feature", "expected")
44+
val dataFrame: DataFrame = data.zip(defaultBinarized).toSeq.toDF("feature", "expected")
4445

4546
val binarizer: Binarizer = new Binarizer()
4647
.setInputCol("feature")
@@ -55,8 +56,7 @@ class BinarizerSuite extends SparkFunSuite with MLlibTestSparkContext with Defau
5556
test("Binarize continuous features with setter") {
5657
val threshold: Double = 0.2
5758
val thresholdBinarized: Array[Double] = data.map(x => if (x > threshold) 1.0 else 0.0)
58-
val dataFrame: DataFrame = spark.createDataFrame(
59-
data.zip(thresholdBinarized)).toDF("feature", "expected")
59+
val dataFrame: DataFrame = data.zip(thresholdBinarized).toSeq.toDF("feature", "expected")
6060

6161
val binarizer: Binarizer = new Binarizer()
6262
.setInputCol("feature")
@@ -71,9 +71,9 @@ class BinarizerSuite extends SparkFunSuite with MLlibTestSparkContext with Defau
7171

7272
test("Binarize vector of continuous features with default parameter") {
7373
val defaultBinarized: Array[Double] = data.map(x => if (x > 0.0) 1.0 else 0.0)
74-
val dataFrame: DataFrame = spark.createDataFrame(Seq(
74+
val dataFrame: DataFrame = Seq(
7575
(Vectors.dense(data), Vectors.dense(defaultBinarized))
76-
)).toDF("feature", "expected")
76+
).toDF("feature", "expected")
7777

7878
val binarizer: Binarizer = new Binarizer()
7979
.setInputCol("feature")
@@ -88,9 +88,9 @@ class BinarizerSuite extends SparkFunSuite with MLlibTestSparkContext with Defau
8888
test("Binarize vector of continuous features with setter") {
8989
val threshold: Double = 0.2
9090
val defaultBinarized: Array[Double] = data.map(x => if (x > threshold) 1.0 else 0.0)
91-
val dataFrame: DataFrame = spark.createDataFrame(Seq(
91+
val dataFrame: DataFrame = Seq(
9292
(Vectors.dense(data), Vectors.dense(defaultBinarized))
93-
)).toDF("feature", "expected")
93+
).toDF("feature", "expected")
9494

9595
val binarizer: Binarizer = new Binarizer()
9696
.setInputCol("feature")

mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ import org.apache.spark.sql.{DataFrame, Row}
2929

3030
class BucketizerSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
3131

32+
import testImplicits._
33+
3234
test("params") {
3335
ParamsSuite.checkParams(new Bucketizer)
3436
}
@@ -38,8 +40,7 @@ class BucketizerSuite extends SparkFunSuite with MLlibTestSparkContext with Defa
3840
val splits = Array(-0.5, 0.0, 0.5)
3941
val validData = Array(-0.5, -0.3, 0.0, 0.2)
4042
val expectedBuckets = Array(0.0, 0.0, 1.0, 1.0)
41-
val dataFrame: DataFrame =
42-
spark.createDataFrame(validData.zip(expectedBuckets)).toDF("feature", "expected")
43+
val dataFrame: DataFrame = validData.zip(expectedBuckets).toSeq.toDF("feature", "expected")
4344

4445
val bucketizer: Bucketizer = new Bucketizer()
4546
.setInputCol("feature")
@@ -55,13 +56,13 @@ class BucketizerSuite extends SparkFunSuite with MLlibTestSparkContext with Defa
5556
// Check for exceptions when using a set of invalid feature values.
5657
val invalidData1: Array[Double] = Array(-0.9) ++ validData
5758
val invalidData2 = Array(0.51) ++ validData
58-
val badDF1 = spark.createDataFrame(invalidData1.zipWithIndex).toDF("feature", "idx")
59+
val badDF1 = invalidData1.zipWithIndex.toSeq.toDF("feature", "idx")
5960
withClue("Invalid feature value -0.9 was not caught as an invalid feature!") {
6061
intercept[SparkException] {
6162
bucketizer.transform(badDF1).collect()
6263
}
6364
}
64-
val badDF2 = spark.createDataFrame(invalidData2.zipWithIndex).toDF("feature", "idx")
65+
val badDF2 = invalidData2.zipWithIndex.toSeq.toDF("feature", "idx")
6566
withClue("Invalid feature value 0.51 was not caught as an invalid feature!") {
6667
intercept[SparkException] {
6768
bucketizer.transform(badDF2).collect()
@@ -73,8 +74,7 @@ class BucketizerSuite extends SparkFunSuite with MLlibTestSparkContext with Defa
7374
val splits = Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, Double.PositiveInfinity)
7475
val validData = Array(-0.9, -0.5, -0.3, 0.0, 0.2, 0.5, 0.9)
7576
val expectedBuckets = Array(0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0)
76-
val dataFrame: DataFrame =
77-
spark.createDataFrame(validData.zip(expectedBuckets)).toDF("feature", "expected")
77+
val dataFrame: DataFrame = validData.zip(expectedBuckets).toSeq.toDF("feature", "expected")
7878

7979
val bucketizer: Bucketizer = new Bucketizer()
8080
.setInputCol("feature")
@@ -92,8 +92,7 @@ class BucketizerSuite extends SparkFunSuite with MLlibTestSparkContext with Defa
9292
val splits = Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, Double.PositiveInfinity)
9393
val validData = Array(-0.9, -0.5, -0.3, 0.0, 0.2, 0.5, 0.9, Double.NaN, Double.NaN, Double.NaN)
9494
val expectedBuckets = Array(0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 4.0)
95-
val dataFrame: DataFrame =
96-
spark.createDataFrame(validData.zip(expectedBuckets)).toDF("feature", "expected")
95+
val dataFrame: DataFrame = validData.zip(expectedBuckets).toSeq.toDF("feature", "expected")
9796

9897
val bucketizer: Bucketizer = new Bucketizer()
9998
.setInputCol("feature")

mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ import org.apache.spark.sql.Row
2929

3030
class IDFSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
3131

32+
import testImplicits._
33+
3234
def scaleDataWithIDF(dataSet: Array[Vector], model: Vector): Array[Vector] = {
3335
dataSet.map {
3436
case data: DenseVector =>
@@ -61,7 +63,7 @@ class IDFSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRead
6163
})
6264
val expected = scaleDataWithIDF(data, idf)
6365

64-
val df = spark.createDataFrame(data.zip(expected)).toDF("features", "expected")
66+
val df = data.zip(expected).toSeq.toDF("features", "expected")
6567

6668
val idfModel = new IDF()
6769
.setInputCol("features")
@@ -87,7 +89,7 @@ class IDFSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRead
8789
})
8890
val expected = scaleDataWithIDF(data, idf)
8991

90-
val df = spark.createDataFrame(data.zip(expected)).toDF("features", "expected")
92+
val df = data.zip(expected).toSeq.toDF("features", "expected")
9193

9294
val idfModel = new IDF()
9395
.setInputCol("features")

mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ import org.apache.spark.mllib.util.MLlibTestSparkContext
2323
import org.apache.spark.sql.Row
2424

2525
class MaxAbsScalerSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
26+
27+
import testImplicits._
28+
2629
test("MaxAbsScaler fit basic case") {
2730
val data = Array(
2831
Vectors.dense(1, 0, 100),
@@ -36,7 +39,7 @@ class MaxAbsScalerSuite extends SparkFunSuite with MLlibTestSparkContext with De
3639
Vectors.sparse(3, Array(0, 2), Array(-1, -1)),
3740
Vectors.sparse(3, Array(0), Array(-0.75)))
3841

39-
val df = spark.createDataFrame(data.zip(expected)).toDF("features", "expected")
42+
val df = data.zip(expected).toSeq.toDF("features", "expected")
4043
val scaler = new MaxAbsScaler()
4144
.setInputCol("features")
4245
.setOutputCol("scaled")

mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class MinMaxScalerSuite extends SparkFunSuite with MLlibTestSparkContext with De
3939
Vectors.sparse(3, Array(0, 2), Array(5, 5)),
4040
Vectors.sparse(3, Array(0), Array(-2.5)))
4141

42-
val df = spark.createDataFrame(data.zip(expected)).toDF("features", "expected")
42+
val df = data.zip(expected).toSeq.toDF("features", "expected")
4343
val scaler = new MinMaxScaler()
4444
.setInputCol("features")
4545
.setOutputCol("scaled")
@@ -104,7 +104,7 @@ class MinMaxScalerSuite extends SparkFunSuite with MLlibTestSparkContext with De
104104
Vectors.dense(-1.0, Double.NaN, -5.0, -5.0),
105105
Vectors.dense(5.0, 0.0, 5.0, Double.NaN))
106106

107-
val df = spark.createDataFrame(data.zip(expected)).toDF("features", "expected")
107+
val df = data.zip(expected).toSeq.toDF("features", "expected")
108108
val scaler = new MinMaxScaler()
109109
.setInputCol("features")
110110
.setOutputCol("scaled")

mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ import org.apache.spark.sql.Row
3030
class PolynomialExpansionSuite
3131
extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
3232

33+
import testImplicits._
34+
3335
test("params") {
3436
ParamsSuite.checkParams(new PolynomialExpansion)
3537
}
@@ -59,7 +61,7 @@ class PolynomialExpansionSuite
5961
Vectors.sparse(19, Array.empty, Array.empty))
6062

6163
test("Polynomial expansion with default parameter") {
62-
val df = spark.createDataFrame(data.zip(twoDegreeExpansion)).toDF("features", "expected")
64+
val df = data.zip(twoDegreeExpansion).toSeq.toDF("features", "expected")
6365

6466
val polynomialExpansion = new PolynomialExpansion()
6567
.setInputCol("features")
@@ -76,7 +78,7 @@ class PolynomialExpansionSuite
7678
}
7779

7880
test("Polynomial expansion with setter") {
79-
val df = spark.createDataFrame(data.zip(threeDegreeExpansion)).toDF("features", "expected")
81+
val df = data.zip(threeDegreeExpansion).toSeq.toDF("features", "expected")
8082

8183
val polynomialExpansion = new PolynomialExpansion()
8284
.setInputCol("features")
@@ -94,7 +96,7 @@ class PolynomialExpansionSuite
9496
}
9597

9698
test("Polynomial expansion with degree 1 is identity on vectors") {
97-
val df = spark.createDataFrame(data.zip(data)).toDF("features", "expected")
99+
val df = data.zip(data).toSeq.toDF("features", "expected")
98100

99101
val polynomialExpansion = new PolynomialExpansion()
100102
.setInputCol("features")
@@ -124,8 +126,7 @@ class PolynomialExpansionSuite
124126
(Vectors.dense(1.0, 2.0, 3.0, 4.0, 5.0, 6.0), 8007, 12375)
125127
)
126128

127-
val df = spark.createDataFrame(data)
128-
.toDF("features", "expectedPoly10size", "expectedPoly11size")
129+
val df = data.toSeq.toDF("features", "expectedPoly10size", "expectedPoly11size")
129130

130131
val t = new PolynomialExpansion()
131132
.setInputCol("features")

mllib/src/test/scala/org/apache/spark/ml/feature/StandardScalerSuite.scala

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ import org.apache.spark.sql.{DataFrame, Row}
2828
class StandardScalerSuite extends SparkFunSuite with MLlibTestSparkContext
2929
with DefaultReadWriteTest {
3030

31+
import testImplicits._
32+
3133
@transient var data: Array[Vector] = _
3234
@transient var resWithStd: Array[Vector] = _
3335
@transient var resWithMean: Array[Vector] = _
@@ -73,7 +75,7 @@ class StandardScalerSuite extends SparkFunSuite with MLlibTestSparkContext
7375
}
7476

7577
test("Standardization with default parameter") {
76-
val df0 = spark.createDataFrame(data.zip(resWithStd)).toDF("features", "expected")
78+
val df0 = data.zip(resWithStd).toSeq.toDF("features", "expected")
7779

7880
val standardScaler0 = new StandardScaler()
7981
.setInputCol("features")
@@ -84,9 +86,9 @@ class StandardScalerSuite extends SparkFunSuite with MLlibTestSparkContext
8486
}
8587

8688
test("Standardization with setter") {
87-
val df1 = spark.createDataFrame(data.zip(resWithBoth)).toDF("features", "expected")
88-
val df2 = spark.createDataFrame(data.zip(resWithMean)).toDF("features", "expected")
89-
val df3 = spark.createDataFrame(data.zip(data)).toDF("features", "expected")
89+
val df1 = data.zip(resWithBoth).toSeq.toDF("features", "expected")
90+
val df2 = data.zip(resWithMean).toSeq.toDF("features", "expected")
91+
val df3 = data.zip(data).toSeq.toDF("features", "expected")
9092

9193
val standardScaler1 = new StandardScaler()
9294
.setInputCol("features")
@@ -120,7 +122,7 @@ class StandardScalerSuite extends SparkFunSuite with MLlibTestSparkContext
120122
Vectors.sparse(3, Array(1, 2), Array(-5.1, 1.0)),
121123
Vectors.dense(1.7, -0.6, 3.3)
122124
)
123-
val df = spark.createDataFrame(someSparseData.zip(resWithMean)).toDF("features", "expected")
125+
val df = someSparseData.zip(resWithMean).toSeq.toDF("features", "expected")
124126
val standardScaler = new StandardScaler()
125127
.setInputCol("features")
126128
.setOutputCol("standardized_features")

mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,12 +1046,12 @@ class GeneralizedLinearRegressionSuite
10461046
[1] 12.92681
10471047
[1] 13.32836
10481048
*/
1049-
val dataset = spark.createDataFrame(Seq(
1049+
val dataset = Seq(
10501050
LabeledPoint(1, Vectors.dense(5, 0)),
10511051
LabeledPoint(0, Vectors.dense(2, 1)),
10521052
LabeledPoint(1, Vectors.dense(1, 2)),
10531053
LabeledPoint(0, Vectors.dense(3, 3))
1054-
))
1054+
).toDF()
10551055
val expected = Seq(12.88188, 12.92681, 13.32836)
10561056

10571057
var idx = 0

0 commit comments

Comments
 (0)