@@ -22,7 +22,6 @@ import org.apache.spark.ml.param._
2222import org .apache .spark .mllib .classification .LogisticRegressionWithLBFGS
2323import org .apache .spark .mllib .linalg .{BLAS , Vector , Vectors }
2424import org .apache .spark .sql .DataFrame
25- import org .apache .spark .sql .Dsl ._
2625import org .apache .spark .storage .StorageLevel
2726
2827
@@ -103,69 +102,6 @@ class LogisticRegressionModel private[ml] (
103102 1.0 / (1.0 + math.exp(- m))
104103 }
105104
106- override def transform (dataset : DataFrame , paramMap : ParamMap ): DataFrame = {
107- // Check schema
108- transformSchema(dataset.schema, paramMap, logging = true )
109-
110- val map = this .paramMap ++ paramMap
111-
112- // Output selected columns only.
113- // This is a bit complicated since it tries to avoid repeated computation.
114- // rawPrediction (-margin, margin)
115- // probability (1.0-score, score)
116- // prediction (max margin)
117- var tmpData = dataset
118- var numColsOutput = 0
119- if (map(rawPredictionCol) != " " ) {
120- val features2raw : Vector => Vector = predictRaw
121- tmpData = tmpData.select($" *" ,
122- callUDF(features2raw, col(map(featuresCol))).as(map(rawPredictionCol)))
123- numColsOutput += 1
124- }
125- if (map(probabilityCol) != " " ) {
126- if (map(rawPredictionCol) != " " ) {
127- val raw2prob : Vector => Vector = (rawPreds) => {
128- val prob1 = 1.0 / (1.0 + math.exp(- rawPreds(1 )))
129- Vectors .dense(1.0 - prob1, prob1)
130- }
131- tmpData = tmpData.select($" *" ,
132- callUDF(raw2prob, col(map(rawPredictionCol))).as(map(probabilityCol)))
133- } else {
134- val features2prob : Vector => Vector = predictProbabilities
135- tmpData = tmpData.select($" *" ,
136- callUDF(features2prob, col(map(featuresCol))).as(map(probabilityCol)))
137- }
138- numColsOutput += 1
139- }
140- if (map(predictionCol) != " " ) {
141- val t = map(threshold)
142- if (map(probabilityCol) != " " ) {
143- val predict : Vector => Double = (probs) => {
144- if (probs(1 ) > t) 1.0 else 0.0
145- }
146- tmpData = tmpData.select($" *" ,
147- callUDF(predict, col(map(probabilityCol))).as(map(predictionCol)))
148- } else if (map(rawPredictionCol) != " " ) {
149- val predict : Vector => Double = (rawPreds) => {
150- val prob1 = 1.0 / (1.0 + math.exp(- rawPreds(1 )))
151- if (prob1 > t) 1.0 else 0.0
152- }
153- tmpData = tmpData.select($" *" ,
154- callUDF(predict, col(map(rawPredictionCol))).as(map(predictionCol)))
155- } else {
156- val predict : Vector => Double = this .predict
157- tmpData = tmpData.select($" *" ,
158- callUDF(predict, col(map(featuresCol))).as(map(predictionCol)))
159- }
160- numColsOutput += 1
161- }
162- if (numColsOutput == 0 ) {
163- this .logWarning(s " $uid: LogisticRegressionModel.transform() was called as NOOP " +
164- " since no output columns were set." )
165- }
166- tmpData
167- }
168-
169105 override val numClasses : Int = 2
170106
171107 /**
0 commit comments