fixes after rebasing on master

jkbradley · jkbradley · commit 8316d5ed8a9e · 2015-02-05T13:29:56.000-08:00
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
@@ -40,10 +40,10 @@ object DeveloperApiExample {
     val conf = new SparkConf().setAppName("DeveloperApiExample")
     val sc = new SparkContext(conf)
     val sqlContext = new SQLContext(sc)
-    import sqlContext._
+    import sqlContext.implicits._
 
     // Prepare training data.
-    val training = sparkContext.parallelize(Seq(
+    val training = sc.parallelize(Seq(
       LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)),
       LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)),
       LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)),
@@ -61,7 +61,7 @@ object DeveloperApiExample {
     val model = lr.fit(training)
 
     // Prepare test data.
-    val test = sparkContext.parallelize(Seq(
+    val test = sc.parallelize(Seq(
       LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)),
       LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)),
       LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5))))
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala
@@ -81,7 +81,7 @@ object SimpleParamsExample {
     println("Model 2 was fit using parameters: " + model2.fittingParamMap)
 
     // Prepare test data.
-    val test = sparkContext.parallelize(Seq(
+    val test = sc.parallelize(Seq(
       LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)),
       LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)),
       LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5))))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
@@ -181,8 +181,8 @@ private[ml] object ClassificationModel {
         val raw2pred: Vector => Double = (rawPred) => {
           rawPred.toArray.zipWithIndex.maxBy(_._1)._2
         }
-        tmpData = tmpData.select($"*",
-          callUDF(raw2pred, col(map(model.rawPredictionCol))).as(map(model.predictionCol)))
+        tmpData = tmpData.select($"*", callUDF(raw2pred, DoubleType,
+          col(map(model.rawPredictionCol))).as(map(model.predictionCol)))
         numColsOutput += 1
       }
     } else if (map(model.predictionCol) != "") {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -22,7 +22,6 @@ import org.apache.spark.ml.param._
 import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
 import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors}
 import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.Dsl._
 import org.apache.spark.storage.StorageLevel
 
 
@@ -103,69 +102,6 @@ class LogisticRegressionModel private[ml] (
     1.0 / (1.0 + math.exp(-m))
   }
 
-  override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = {
-    // Check schema
-    transformSchema(dataset.schema, paramMap, logging = true)
-
-    val map = this.paramMap ++ paramMap
-
-    // Output selected columns only.
-    // This is a bit complicated since it tries to avoid repeated computation.
-    //   rawPrediction (-margin, margin)
-    //   probability (1.0-score, score)
-    //   prediction (max margin)
-    var tmpData = dataset
-    var numColsOutput = 0
-    if (map(rawPredictionCol) != "") {
-      val features2raw: Vector => Vector = predictRaw
-      tmpData = tmpData.select($"*",
-        callUDF(features2raw, col(map(featuresCol))).as(map(rawPredictionCol)))
-      numColsOutput += 1
-    }
-    if (map(probabilityCol) != "") {
-      if (map(rawPredictionCol) != "") {
-        val raw2prob: Vector => Vector = (rawPreds) => {
-          val prob1 = 1.0 / (1.0 + math.exp(-rawPreds(1)))
-          Vectors.dense(1.0 - prob1, prob1)
-        }
-        tmpData = tmpData.select($"*",
-          callUDF(raw2prob, col(map(rawPredictionCol))).as(map(probabilityCol)))
-      } else {
-        val features2prob: Vector => Vector = predictProbabilities
-        tmpData = tmpData.select($"*",
-          callUDF(features2prob, col(map(featuresCol))).as(map(probabilityCol)))
-      }
-      numColsOutput += 1
-    }
-    if (map(predictionCol) != "") {
-      val t = map(threshold)
-      if (map(probabilityCol) != "") {
-        val predict: Vector => Double = (probs) => {
-          if (probs(1) > t) 1.0 else 0.0
-        }
-        tmpData = tmpData.select($"*",
-          callUDF(predict, col(map(probabilityCol))).as(map(predictionCol)))
-      } else if (map(rawPredictionCol) != "") {
-        val predict: Vector => Double = (rawPreds) => {
-          val prob1 = 1.0 / (1.0 + math.exp(-rawPreds(1)))
-          if (prob1 > t) 1.0 else 0.0
-        }
-        tmpData = tmpData.select($"*",
-          callUDF(predict, col(map(rawPredictionCol))).as(map(predictionCol)))
-      } else {
-        val predict: Vector => Double = this.predict
-        tmpData = tmpData.select($"*",
-          callUDF(predict, col(map(featuresCol))).as(map(predictionCol)))
-      }
-      numColsOutput += 1
-    }
-    if (numColsOutput == 0) {
-      this.logWarning(s"$uid: LogisticRegressionModel.transform() was called as NOOP" +
-        " since no output columns were set.")
-    }
-    tmpData
-  }
-
   override val numClasses: Int = 2
 
   /**

Original file line number	Diff line number	Diff line change
`@@ -181,8 +181,8 @@ private[ml] object ClassificationModel {`
`181`	`181`	`val raw2pred: Vector => Double = (rawPred) => {`
`182`	`182`	`rawPred.toArray.zipWithIndex.maxBy(_._1)._2`
`183`	`183`	`}`
`184`		`- tmpData = tmpData.select($"*",`
`185`		`- callUDF(raw2pred, col(map(model.rawPredictionCol))).as(map(model.predictionCol)))`
	`184`	`+ tmpData = tmpData.select($"*", callUDF(raw2pred, DoubleType,`
	`185`	`+ col(map(model.rawPredictionCol))).as(map(model.predictionCol)))`
`186`	`186`	`numColsOutput += 1`
`187`	`187`	`}`
`188`	`188`	`} else if (map(model.predictionCol) != "") {`