add comments

DB Tsai · DB Tsai · commit 2178b63bdf92 · 2015-04-28T00:11:49.000-07:00
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -191,6 +191,23 @@ class LinearRegressionModel private[ml] (
   }
 }
 
+/**
+ * LeastSquaresAggregator computes the gradient and loss for a Least-squared loss function,
+ * as used in linear regression for samples in sparse or dense vector in a online fashion.
+ *
+ * Two LeastSquaresAggregator can be merged together to have a summary of loss and gradient of
+ * the corresponding joint dataset.
+ *
+
+ *  * Compute gradient and loss for a Least-squared loss function, as used in linear regression.
+ * This is correct for the averaged least squares loss function (mean squared error)
+ *              L = 1/2n ||A weights-y||^2
+ * See also the documentation for the precise formulation.
+ *
+ * @param weights weights/coefficients corresponding to features
+ *
+ * @param updater Updater to be used to update weights after every iteration.
+ */
 private class LeastSquaresAggregator(
     weights: Vector,
     labelStd: Double,
@@ -302,6 +319,11 @@ private class LeastSquaresAggregator(
   }
 }
 
+/**
+ * LeastSquaresCostFun implements Breeze's DiffFunction[T] for Least Squares cost.
+ * It returns the loss and gradient with L2 regularization at a particular point (weights).
+ * It's used in Breeze's convex optimization routines.
+ */
 private class LeastSquaresCostFun(
     data: RDD[(Double, Vector)],
     labelStd: Double,
@@ -322,7 +344,7 @@ private class LeastSquaresCostFun(
           case (aggregator1, aggregator2) => aggregator1.merge(aggregator2)
         })
 
-    // regVal is sum of weight squares for L2 regularization
+    // regVal is the sum of weight squares for L2 regularization
     val norm = brzNorm(weights, 2.0)
     val regVal = 0.5 * effectiveL2regParam * norm * norm