@@ -109,15 +109,6 @@ class BucketizerSuite extends SparkFunSuite with MLlibTestSparkContext with Defa
109109 s " The feature value is not correct after bucketing. Expected $y but found $x" )
110110 }
111111
112- test(" Bucket should only drop NaN in input columns, with handleInvalid=skip" ) {
113- val df = spark.createDataFrame(Seq ((2.3 , 3.0 ), (Double .NaN , 3.0 ), (6.7 , Double .NaN )
114- )).toDF(" a" , " b" )
115- val splits = Array (Double .NegativeInfinity , 3.0 , Double .PositiveInfinity )
116- val bucketizer = new Bucketizer ().setInputCol(" a" ).setOutputCol(" x" ).setSplits(splits)
117- bucketizer.setHandleInvalid(" skip" )
118- assert(bucketizer.transform(df).count() == 2 )
119- }
120-
121112 bucketizer.setHandleInvalid(" skip" )
122113 val skipResults : Array [Double ] = bucketizer.transform(dataFrame)
123114 .select(" result" ).as[Double ].collect()
@@ -132,6 +123,15 @@ class BucketizerSuite extends SparkFunSuite with MLlibTestSparkContext with Defa
132123 }
133124 }
134125
126+ test(" Bucketizer should only drop NaN in input columns, with handleInvalid=skip" ) {
127+ val df = spark.createDataFrame(Seq ((2.3 , 3.0 ), (Double .NaN , 3.0 ), (6.7 , Double .NaN )))
128+ .toDF(" a" , " b" )
129+ val splits = Array (Double .NegativeInfinity , 3.0 , Double .PositiveInfinity )
130+ val bucketizer = new Bucketizer ().setInputCol(" a" ).setOutputCol(" x" ).setSplits(splits)
131+ bucketizer.setHandleInvalid(" skip" )
132+ assert(bucketizer.transform(df).count() == 2 )
133+ }
134+
135135 test(" Bucket continuous features, with NaN splits" ) {
136136 val splits = Array (Double .NegativeInfinity , - 0.5 , 0.0 , 0.5 , Double .PositiveInfinity , Double .NaN )
137137 withClue(" Invalid NaN split was not caught during Bucketizer initialization" ) {
0 commit comments