Skip to content

Commit fc62406

Browse files
committed
fixed test suites after last commit
1 parent bcb9549 commit fc62406

File tree

8 files changed

+20
-22
lines changed

8 files changed

+20
-22
lines changed

examples/src/main/scala/org/apache/spark/examples/ml/CrossValidatorExample.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ object CrossValidatorExample {
101101

102102
// Make predictions on test documents. cvModel uses the best model found (lrModel).
103103
cvModel.transform(test)
104-
.select('id, 'text, 'probability, 'prediction)
104+
.select("id", "text", "probability", "prediction")
105105
.collect()
106106
.foreach { case Row(id: Long, text: String, prob: Vector, prediction: Double) =>
107107
println(s"($id, $text) --> prob=$prob, prediction=$prediction")

examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import org.apache.spark.ml.classification.{Classifier, ClassifierParams, Classif
2222
import org.apache.spark.ml.param.{Params, IntParam, ParamMap}
2323
import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors}
2424
import org.apache.spark.mllib.regression.LabeledPoint
25-
import org.apache.spark.sql.{SchemaRDD, Row, SQLContext}
25+
import org.apache.spark.sql.{DataFrame, Row, SQLContext}
2626

2727

2828
/**
@@ -68,13 +68,15 @@ object DeveloperApiExample {
6868

6969
// Make predictions on test data.
7070
val sumPredictions: Double = model.transform(test)
71-
.select('features, 'label, 'prediction)
71+
.select("features", "label", "prediction")
7272
.collect()
7373
.map { case Row(features: Vector, label: Double, prediction: Double) =>
7474
prediction
7575
}.sum
7676
assert(sumPredictions == 0.0,
7777
"MyLogisticRegression predicted something other than 0, even though all weights are 0!")
78+
79+
sc.stop()
7880
}
7981
}
8082

@@ -113,7 +115,7 @@ private class MyLogisticRegression
113115

114116
// This method is used by fit()
115117
override protected def train(
116-
dataset: SchemaRDD,
118+
dataset: DataFrame,
117119
paramMap: ParamMap): MyLogisticRegressionModel = {
118120
// Extract columns from data using helper method.
119121
val oldDataset = extractLabeledPoints(dataset, paramMap)

examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ object SimpleParamsExample {
9191
// Note that model2.transform() outputs a 'myProbability' column instead of the usual
9292
// 'probability' column since we renamed the lr.probabilityCol parameter previously.
9393
model2.transform(test)
94-
.select('features, 'label, 'myProbability, 'prediction)
94+
.select("features", "label", "myProbability", "prediction")
9595
.collect()
9696
.foreach { case Row(features: Vector, label: Double, prob: Vector, prediction: Double) =>
9797
println("($features, $label) -> prob=$prob, prediction=$prediction")

examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ object SimpleTextClassificationPipeline {
8080

8181
// Make predictions on test documents.
8282
model.transform(test)
83-
.select('id, 'text, 'probability, 'prediction)
83+
.select("id", "text", "probability", "prediction")
8484
.collect()
8585
.foreach { case Row(id: Long, text: String, prob: Vector, prediction: Double) =>
8686
println("($id, $text) --> prob=$prob, prediction=$prediction")

mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,23 +175,22 @@ private[ml] object ClassificationModel {
175175
val features2raw: FeaturesType => Vector = model.predictRaw
176176
tmpData = tmpData.select($"*",
177177
callUDF(features2raw, new VectorUDT,
178-
tmpData(map(model.featuresCol))).as(map(model.rawPredictionCol)))
178+
col(map(model.featuresCol))).as(map(model.rawPredictionCol)))
179179
numColsOutput += 1
180180
if (map(model.predictionCol) != "") {
181181
val raw2pred: Vector => Double = (rawPred) => {
182182
rawPred.toArray.zipWithIndex.maxBy(_._1)._2
183183
}
184184
tmpData = tmpData.select($"*",
185-
callUDF(raw2pred, DoubleType,
186-
tmpData(map(model.rawPredictionCol))).as(map(model.predictionCol)))
185+
callUDF(raw2pred, col(map(model.rawPredictionCol))).as(map(model.predictionCol)))
187186
numColsOutput += 1
188187
}
189188
} else if (map(model.predictionCol) != "") {
190189
// output prediction
191190
val features2pred: FeaturesType => Double = model.predict
192191
tmpData = tmpData.select($"*",
193192
callUDF(features2pred, DoubleType,
194-
tmpData(map(model.featuresCol))).as(map(model.predictionCol)))
193+
col(map(model.featuresCol))).as(map(model.predictionCol)))
195194
numColsOutput += 1
196195
}
197196
(numColsOutput, tmpData)

mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,9 @@ package org.apache.spark.ml.classification
2020
import org.apache.spark.annotation.AlphaComponent
2121
import org.apache.spark.ml.param._
2222
import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
23-
import org.apache.spark.mllib.linalg.{VectorUDT, BLAS, Vector, Vectors}
23+
import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors}
2424
import org.apache.spark.sql.DataFrame
2525
import org.apache.spark.sql.Dsl._
26-
import org.apache.spark.sql.types.DoubleType
2726
import org.apache.spark.storage.StorageLevel
2827

2928

@@ -120,7 +119,7 @@ class LogisticRegressionModel private[ml] (
120119
if (map(rawPredictionCol) != "") {
121120
val features2raw: Vector => Vector = predictRaw
122121
tmpData = tmpData.select($"*",
123-
callUDF(features2raw, new VectorUDT, tmpData(map(featuresCol))).as(map(rawPredictionCol)))
122+
callUDF(features2raw, col(map(featuresCol))).as(map(rawPredictionCol)))
124123
numColsOutput += 1
125124
}
126125
if (map(probabilityCol) != "") {
@@ -130,11 +129,11 @@ class LogisticRegressionModel private[ml] (
130129
Vectors.dense(1.0 - prob1, prob1)
131130
}
132131
tmpData = tmpData.select($"*",
133-
callUDF(raw2prob, new VectorUDT, tmpData(map(rawPredictionCol))).as(map(probabilityCol)))
132+
callUDF(raw2prob, col(map(rawPredictionCol))).as(map(probabilityCol)))
134133
} else {
135134
val features2prob: Vector => Vector = predictProbabilities
136135
tmpData = tmpData.select($"*",
137-
callUDF(features2prob, new VectorUDT, tmpData(map(featuresCol))).as(map(probabilityCol)))
136+
callUDF(features2prob, col(map(featuresCol))).as(map(probabilityCol)))
138137
}
139138
numColsOutput += 1
140139
}
@@ -145,18 +144,18 @@ class LogisticRegressionModel private[ml] (
145144
if (probs(1) > t) 1.0 else 0.0
146145
}
147146
tmpData = tmpData.select($"*",
148-
callUDF(predict, DoubleType, tmpData(map(probabilityCol))).as(map(predictionCol)))
147+
callUDF(predict, col(map(probabilityCol))).as(map(predictionCol)))
149148
} else if (map(rawPredictionCol) != "") {
150149
val predict: Vector => Double = (rawPreds) => {
151150
val prob1 = 1.0 / (1.0 + math.exp(-rawPreds(1)))
152151
if (prob1 > t) 1.0 else 0.0
153152
}
154153
tmpData = tmpData.select($"*",
155-
callUDF(predict, DoubleType, tmpData(map(rawPredictionCol))).as(map(predictionCol)))
154+
callUDF(predict, col(map(rawPredictionCol))).as(map(predictionCol)))
156155
} else {
157156
val predict: Vector => Double = this.predict
158157
tmpData = tmpData.select($"*",
159-
callUDF(predict, DoubleType, tmpData(map(featuresCol))).as(map(predictionCol)))
158+
callUDF(predict, col(map(featuresCol))).as(map(predictionCol)))
160159
}
161160
numColsOutput += 1
162161
}

mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,7 @@ abstract class ProbabilisticClassificationModel[
117117
tmpModel.predictProbabilities(features)
118118
}
119119
outputData.select($"*",
120-
callUDF(features2probs, new VectorUDT,
121-
outputData(map(featuresCol))).as(map(probabilityCol)))
120+
callUDF(features2probs, new VectorUDT, col(map(featuresCol))).as(map(probabilityCol)))
122121
} else {
123122
if (numColsOutput == 0) {
124123
this.logWarning(s"$uid: ProbabilisticClassificationModel.transform() was called as NOOP" +

mllib/src/main/scala/org/apache/spark/ml/impl/estimator/Predictor.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,7 @@ abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType,
203203
val pred: FeaturesType => Double = (features) => {
204204
tmpModel.predict(features)
205205
}
206-
dataset.select($"*",
207-
callUDF(pred, DoubleType, dataset(map(featuresCol))).as(map(predictionCol)))
206+
dataset.select($"*", callUDF(pred, DoubleType, col(map(featuresCol))).as(map(predictionCol)))
208207
} else {
209208
this.logWarning(s"$uid: Predictor.transform() was called as NOOP" +
210209
" since no output columns were set.")

0 commit comments

Comments
 (0)