From 146c2a8b340f2269612a11469205aa10414cbb13 Mon Sep 17 00:00:00 2001
From: vijaykiran <mail@vijaykiran.com>
Date: Tue, 5 Jan 2016 12:08:34 +0100
Subject: [PATCH 1/6] [SPARK-12633][DOC] Update param descriptions

Updates the param descriptions to be consistent. See [SPARK-11219] for
more details.
---
 python/pyspark/mllib/regression.py | 209 +++++++++++++++++------------
 1 file changed, 121 insertions(+), 88 deletions(-)

diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 13b3397501c0b..26342f5abfc07 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -37,10 +37,11 @@ class LabeledPoint(object):
     """
     Class that represents the features and labels of a data point.
 
-    :param label: Label for this data point.
-    :param features: Vector of features for this point (NumPy array,
-            list, pyspark.mllib.linalg.SparseVector, or scipy.sparse
-            column matrix)
+    :param label:
+      Label for this data point.
+    :param features:
+      Vector of features for this point (NumPy array, list,
+      pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix)
 
     Note: 'label' and 'features' are accessible as class attributes.
 
@@ -66,8 +67,10 @@ class LinearModel(object):
     """
     A linear model that has a vector of coefficients and an intercept.
 
-    :param weights: Weights computed for every feature.
-    :param intercept: Intercept computed for this model.
+    :param weights:
+      Weights computed for every feature.
+    :param intercept:
+      Intercept computed for this model.
 
     .. versionadded:: 0.9.0
     """
@@ -245,37 +248,45 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
         set of rows of A, each with its corresponding right hand side
         label y. See also the documentation for the precise formulation.
 
-        :param data:              The training data, an RDD of
-                                  LabeledPoint.
-        :param iterations:        The number of iterations
-                                  (default: 100).
-        :param step:              The step parameter used in SGD
-                                  (default: 1.0).
-        :param miniBatchFraction: Fraction of data to be used for each
-                                  SGD iteration (default: 1.0).
-        :param initialWeights:    The initial weights (default: None).
-        :param regParam:          The regularizer parameter
-                                  (default: 0.0).
-        :param regType:           The type of regularizer used for
-                                  training our model.
-
-                                  :Allowed values:
-                                     - "l1" for using L1 regularization (lasso),
-                                     - "l2" for using L2 regularization (ridge),
-                                     - None for no regularization
-
-                                     (default: None)
-
-        :param intercept:         Boolean parameter which indicates the
-                                  use or not of the augmented representation
-                                  for training data (i.e. whether bias
-                                  features are activated or not,
-                                  default: False).
-        :param validateData:      Boolean parameter which indicates if
-                                  the algorithm should validate data
-                                  before training. (default: True)
-        :param convergenceTol:    A condition which decides iteration termination.
-                                  (default: 0.001)
+        :param data:
+          The training data, an RDD of LabeledPoint.
+        :param iterations:
+          The number of iterations.
+          (default: 100)
+        :param step:
+          The step parameter used in SGD.
+          (default: 1.0)
+        :param miniBatchFraction:
+          Fraction of data to be used for each SGD iteration.
+          (default: 1.0)
+        :param initialWeights:
+          The initial weights.
+          (default: None)
+        :param regParam:
+          The regularizer parameter.
+          (default: 0.0)
+        :param regType:
+          The type of regularizer used for training our model.
+
+          :Allowed values:
+          - "l1" for using L1 regularization (lasso),
+          - "l2" for using L2 regularization (ridge),
+          - None for no regularization
+
+          (default: None)
+
+        :param intercept:
+          Boolean parameter which indicates the use or not of the augmented
+          representation for training data (i.e., whether bias features are
+          activated or not).
+          (default: False)
+        :param validateData:
+          Boolean parameter which indicates if the algorithm should validate data
+          before training.
+          (default: True)
+        :param convergenceTol:
+          A condition which decides iteration termination.
+          (default: 0.001)
         """
         def train(rdd, i):
             return callMLlibFunc("trainLinearRegressionModelWithSGD", rdd, int(iterations),
@@ -393,27 +404,35 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
         set of rows of A, each with its corresponding right hand side
         label y. See also the documentation for the precise formulation.
 
-        :param data:              The training data, an RDD of
-                                  LabeledPoint.
-        :param iterations:        The number of iterations
-                                  (default: 100).
-        :param step:              The step parameter used in SGD
-                                  (default: 1.0).
-        :param regParam:          The regularizer parameter
-                                  (default: 0.01).
-        :param miniBatchFraction: Fraction of data to be used for each
-                                  SGD iteration (default: 1.0).
-        :param initialWeights:    The initial weights (default: None).
-        :param intercept:         Boolean parameter which indicates the
-                                  use or not of the augmented representation
-                                  for training data (i.e. whether bias
-                                  features are activated or not,
-                                  default: False).
-        :param validateData:      Boolean parameter which indicates if
-                                  the algorithm should validate data
-                                  before training. (default: True)
-        :param convergenceTol:    A condition which decides iteration termination.
-                                  (default: 0.001)
+        :param data:
+          The training data, an RDD of LabeledPoint.
+        :param iterations:
+          The number of iterations.
+          (default: 100)
+        :param step:
+          The step parameter used in SGD.
+          (default: 1.0)
+        :param regParam:
+          The regularizer parameter.
+          (default: 0.01)
+        :param miniBatchFraction:
+          Fraction of data to be used for each SGD iteration.
+          (default: 1.0)
+        :param initialWeights:
+          The initial weights.
+          (default: None)
+        :param intercept:
+          Boolean parameter which indicates the use or not of the augmented
+          representation for training data (i.e. whether bias features are
+          activated or not).
+         (default: False)
+        :param validateData:
+          Boolean parameter which indicates if the algorithm should validate
+          data before training.
+          (default: True)
+        :param convergenceTol:
+          A condition which decides iteration termination.
+          (default: 0.001)
         """
         def train(rdd, i):
             return callMLlibFunc("trainLassoModelWithSGD", rdd, int(iterations), float(step),
@@ -531,27 +550,35 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
         set of rows of A, each with its corresponding right hand side
         label y. See also the documentation for the precise formulation.
 
-        :param data:              The training data, an RDD of
-                                  LabeledPoint.
-        :param iterations:        The number of iterations
-                                  (default: 100).
-        :param step:              The step parameter used in SGD
-                                  (default: 1.0).
-        :param regParam:          The regularizer parameter
-                                  (default: 0.01).
-        :param miniBatchFraction: Fraction of data to be used for each
-                                  SGD iteration (default: 1.0).
-        :param initialWeights:    The initial weights (default: None).
-        :param intercept:         Boolean parameter which indicates the
-                                  use or not of the augmented representation
-                                  for training data (i.e. whether bias
-                                  features are activated or not,
-                                  default: False).
-        :param validateData:      Boolean parameter which indicates if
-                                  the algorithm should validate data
-                                  before training. (default: True)
-        :param convergenceTol:    A condition which decides iteration termination.
-                                  (default: 0.001)
+        :param data:
+          The training data, an RDD of LabeledPoint.
+        :param iterations:
+          The number of iterations.
+          (default: 100)
+        :param step:
+          The step parameter used in SGD.
+          (default: 1.0)
+        :param regParam:
+          The regularizer parameter.
+          (default: 0.01)
+        :param miniBatchFraction:
+          Fraction of data to be used for each SGD iteration.
+          (default: 1.0)
+        :param initialWeights:
+          The initial weights.
+          (default: None)
+        :param intercept:
+          Boolean parameter which indicates the use or not of the augmented
+          representation for training data (i.e. whether bias features are
+          activated or not).
+          (default: False)
+        :param validateData:
+          Boolean parameter which indicates if the algorithm should validate
+          data before training.
+          (default: True)
+        :param convergenceTol:
+          A condition which decides iteration termination.
+          (default: 0.001)
         """
         def train(rdd, i):
             return callMLlibFunc("trainRidgeModelWithSGD", rdd, int(iterations), float(step),
@@ -566,12 +593,14 @@ class IsotonicRegressionModel(Saveable, Loader):
     """
     Regression model for isotonic regression.
 
-    :param boundaries: Array of boundaries for which predictions are
-            known. Boundaries must be sorted in increasing order.
-    :param predictions: Array of predictions associated to the
-            boundaries at the same index. Results of isotonic
-            regression and therefore monotone.
-    :param isotonic: indicates whether this is isotonic or antitonic.
+    :param boundaries:
+      Array of boundaries for which predictions are known. Boundaries must be
+      sorted in increasing order.
+    :param predictions:
+      Array of predictions associated to the boundaries at the same index.
+      Results of isotonic regression and therefore monotone.
+    :param isotonic:
+      Indicates whether this is isotonic or antitonic.
 
     >>> data = [(1, 0, 1), (2, 1, 1), (3, 2, 1), (1, 3, 1), (6, 4, 1), (17, 5, 1), (16, 6, 1)]
     >>> irm = IsotonicRegression.train(sc.parallelize(data))
@@ -622,7 +651,8 @@ def predict(self, x):
         values with the same boundary then the same rules as in 2)
         are used.
 
-        :param x: Feature or RDD of Features to be labeled.
+        :param x:
+          Feature or RDD of Features to be labeled.
         """
         if isinstance(x, RDD):
             return x.map(lambda v: self.predict(v))
@@ -676,8 +706,11 @@ def train(cls, data, isotonic=True):
         """
         Train a isotonic regression model on the given data.
 
-        :param data: RDD of (label, feature, weight) tuples.
-        :param isotonic: Whether this is isotonic or antitonic.
+        :param data:
+          RDD of (label, feature, weight) tuples.
+        :param isotonic:
+          Whether this is isotonic (which is default) or antitonic.
+          (default: True)
         """
         boundaries, predictions = callMLlibFunc("trainIsotonicRegressionModel",
                                                 data.map(_convert_to_vector), bool(isotonic))

From d361d70806a9e758a9ee2986c144a89f6a0c7b63 Mon Sep 17 00:00:00 2001
From: vijaykiran <mail@vijaykiran.com>
Date: Wed, 6 Jan 2016 11:30:18 +0100
Subject: [PATCH 2/6] Style Fixes

Change fill-column to 100.
---
 python/pyspark/mllib/regression.py | 39 ++++++++++++------------------
 1 file changed, 15 insertions(+), 24 deletions(-)

diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 26342f5abfc07..c6b52c9fbd64b 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -40,8 +40,8 @@ class LabeledPoint(object):
     :param label:
       Label for this data point.
     :param features:
-      Vector of features for this point (NumPy array, list,
-      pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix)
+      Vector of features for this point (NumPy array, list, pyspark.mllib.linalg.SparseVector, or
+      scipy.sparse column matrix)
 
     Note: 'label' and 'features' are accessible as class attributes.
 
@@ -267,22 +267,17 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
           (default: 0.0)
         :param regType:
           The type of regularizer used for training our model.
-
           :Allowed values:
           - "l1" for using L1 regularization (lasso),
           - "l2" for using L2 regularization (ridge),
           - None for no regularization
-
           (default: None)
-
         :param intercept:
-          Boolean parameter which indicates the use or not of the augmented
-          representation for training data (i.e., whether bias features are
-          activated or not).
+          Boolean parameter which indicates the use or not of the augmented representation for
+          training data (i.e., whether bias features are activated or not).
           (default: False)
         :param validateData:
-          Boolean parameter which indicates if the algorithm should validate data
-          before training.
+          Boolean parameter which indicates if the algorithm should validate data before training.
           (default: True)
         :param convergenceTol:
           A condition which decides iteration termination.
@@ -422,13 +417,11 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
           The initial weights.
           (default: None)
         :param intercept:
-          Boolean parameter which indicates the use or not of the augmented
-          representation for training data (i.e. whether bias features are
-          activated or not).
+          Boolean parameter which indicates the use or not of the augmented representation for
+          training data (i.e. whether bias features are activated or not).
          (default: False)
         :param validateData:
-          Boolean parameter which indicates if the algorithm should validate
-          data before training.
+          Boolean parameter which indicates if the algorithm should validate data before training.
           (default: True)
         :param convergenceTol:
           A condition which decides iteration termination.
@@ -568,13 +561,11 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
           The initial weights.
           (default: None)
         :param intercept:
-          Boolean parameter which indicates the use or not of the augmented
-          representation for training data (i.e. whether bias features are
-          activated or not).
+          Boolean parameter which indicates the use or not of the augmented representation for
+          training data (i.e. whether bias features are activated or not).
           (default: False)
         :param validateData:
-          Boolean parameter which indicates if the algorithm should validate
-          data before training.
+          Boolean parameter which indicates if the algorithm should validate data before training.
           (default: True)
         :param convergenceTol:
           A condition which decides iteration termination.
@@ -594,11 +585,11 @@ class IsotonicRegressionModel(Saveable, Loader):
     Regression model for isotonic regression.
 
     :param boundaries:
-      Array of boundaries for which predictions are known. Boundaries must be
-      sorted in increasing order.
+      Array of boundaries for which predictions are known. Boundaries must be sorted in increasing
+      order.
     :param predictions:
-      Array of predictions associated to the boundaries at the same index.
-      Results of isotonic regression and therefore monotone.
+      Array of predictions associated to the boundaries at the same index. Results of isotonic
+      regression and therefore monotone.
     :param isotonic:
       Indicates whether this is isotonic or antitonic.
 

From 45bec55b2f6bb165a0491e71bff6f2341a58b744 Mon Sep 17 00:00:00 2001
From: vijaykiran <mail@vijaykiran.com>
Date: Fri, 22 Jan 2016 15:21:51 +0100
Subject: [PATCH 3/6] Limit parameter descriptions to col 74

---
 python/pyspark/mllib/regression.py | 36 +++++++++++++++++-------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index c6b52c9fbd64b..de97ba515fb7b 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -40,8 +40,8 @@ class LabeledPoint(object):
     :param label:
       Label for this data point.
     :param features:
-      Vector of features for this point (NumPy array, list, pyspark.mllib.linalg.SparseVector, or
-      scipy.sparse column matrix)
+      Vector of features for this point (NumPy array, list,
+      pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix)
 
     Note: 'label' and 'features' are accessible as class attributes.
 
@@ -273,11 +273,13 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
           - None for no regularization
           (default: None)
         :param intercept:
-          Boolean parameter which indicates the use or not of the augmented representation for
-          training data (i.e., whether bias features are activated or not).
+          Boolean parameter which indicates the use or not of the
+          augmented representation for training data (i.e., whether bias
+          features are activated or not).
           (default: False)
         :param validateData:
-          Boolean parameter which indicates if the algorithm should validate data before training.
+          Boolean parameter which indicates if the algorithm should
+          validate data before training.
           (default: True)
         :param convergenceTol:
           A condition which decides iteration termination.
@@ -417,11 +419,13 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
           The initial weights.
           (default: None)
         :param intercept:
-          Boolean parameter which indicates the use or not of the augmented representation for
-          training data (i.e. whether bias features are activated or not).
+          Boolean parameter which indicates the use or not of the
+          augmented representation for training data (i.e. whether bias
+          features are activated or not).
          (default: False)
         :param validateData:
-          Boolean parameter which indicates if the algorithm should validate data before training.
+          Boolean parameter which indicates if the algorithm should
+          validate data before training.
           (default: True)
         :param convergenceTol:
           A condition which decides iteration termination.
@@ -561,11 +565,13 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
           The initial weights.
           (default: None)
         :param intercept:
-          Boolean parameter which indicates the use or not of the augmented representation for
-          training data (i.e. whether bias features are activated or not).
+          Boolean parameter which indicates the use or not of the
+          augmented representation for training data (i.e. whether bias
+          features are activated or not).
           (default: False)
         :param validateData:
-          Boolean parameter which indicates if the algorithm should validate data before training.
+          Boolean parameter which indicates if the algorithm should
+          validate data before training.
           (default: True)
         :param convergenceTol:
           A condition which decides iteration termination.
@@ -585,11 +591,11 @@ class IsotonicRegressionModel(Saveable, Loader):
     Regression model for isotonic regression.
 
     :param boundaries:
-      Array of boundaries for which predictions are known. Boundaries must be sorted in increasing
-      order.
+      Array of boundaries for which predictions are known. Boundaries must
+      be sorted in increasing order.
     :param predictions:
-      Array of predictions associated to the boundaries at the same index. Results of isotonic
-      regression and therefore monotone.
+      Array of predictions associated to the boundaries at the same index.
+      Results of isotonic regression and therefore monotone.
     :param isotonic:
       Indicates whether this is isotonic or antitonic.
 

From 5feecbad219895696709d804facfb8c575d1d5b4 Mon Sep 17 00:00:00 2001
From: vijaykiran <mail@vijaykiran.com>
Date: Sat, 23 Jan 2016 08:13:15 +0100
Subject: [PATCH 4/6] Fix indentation

---
 python/pyspark/mllib/regression.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index de97ba515fb7b..d5e3adb5ac9e2 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -41,7 +41,7 @@ class LabeledPoint(object):
       Label for this data point.
     :param features:
       Vector of features for this point (NumPy array, list,
-      pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix)
+      pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix).
 
     Note: 'label' and 'features' are accessible as class attributes.
 
@@ -422,7 +422,7 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
           Boolean parameter which indicates the use or not of the
           augmented representation for training data (i.e. whether bias
           features are activated or not).
-         (default: False)
+          (default: False)
         :param validateData:
           Boolean parameter which indicates if the algorithm should
           validate data before training.

From 2e535424dae80fad627c6c23965046f8680139f6 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Fri, 26 Feb 2016 16:19:28 -0800
Subject: [PATCH 5/6] [SPARK-12633] Fixed allowed values, cleanup, and sync
 with Scala API

---
 python/pyspark/mllib/regression.py | 120 +++++++++++------------------
 1 file changed, 46 insertions(+), 74 deletions(-)

diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index ecd50f0735197..3b77a6200054f 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -220,19 +220,8 @@ def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
 
 class LinearRegressionWithSGD(object):
     """
-    Train a linear regression model with no regularization using Stochastic Gradient Descent.
-    This solves the least squares regression formulation
-
-        f(weights) = 1/n ||A weights-y||^2
-
-    which is the mean squared error.
-    Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
-    its corresponding right hand side label y.
-    See also the documentation for the precise formulation.
-
     .. versionadded:: 0.9.0
     """
-
     @classmethod
     @since("0.9.0")
     def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
@@ -240,15 +229,15 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
               validateData=True, convergenceTol=0.001):
         """
         Train a linear regression model using Stochastic Gradient
-        Descent (SGD).
-        This solves the least squares regression formulation
+        Descent (SGD). This solves the least squares regression
+        formulation
 
-            f(weights) = 1/(2n) ||A weights - y||^2,
+            f(weights) = 1/(2n) ||A weights - y||^2
 
-        which is the mean squared error.
-        Here the data matrix has n rows, and the input RDD holds the
-        set of rows of A, each with its corresponding right hand side
-        label y. See also the documentation for the precise formulation.
+        which is the mean squared error. Here the data matrix has n rows,
+        and the input RDD holds the set of rows of A, each with its
+        corresponding right hand side label y.
+        See also the documentation for the precise formulation.
 
         :param data:
           The training data, an RDD of LabeledPoint.
@@ -269,11 +258,11 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
           (default: 0.0)
         :param regType:
           The type of regularizer used for training our model.
-          :Allowed values:
-          - "l1" for using L1 regularization (lasso),
-          - "l2" for using L2 regularization (ridge),
-          - None for no regularization
-          (default: None)
+          Supported values:
+
+            - "l1" for using L1 regularization
+            - "l2" for using L2 regularization
+            - None for no regularization (default)
         :param intercept:
           Boolean parameter which indicates the use or not of the
           augmented representation for training data (i.e., whether bias
@@ -376,34 +365,23 @@ def load(cls, sc, path):
 
 class LassoWithSGD(object):
     """
-    Train a regression model with L1-regularization using Stochastic Gradient Descent.
-    This solves the L1-regularized least squares regression formulation
-
-        f(weights) = 1/2n ||A weights-y||^2  + regParam ||weights||_1
-
-    Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
-    its corresponding right hand side label y.
-    See also the documentation for the precise formulation.
-
     .. versionadded:: 0.9.0
     """
-
     @classmethod
     @since("0.9.0")
     def train(cls, data, iterations=100, step=1.0, regParam=0.01,
               miniBatchFraction=1.0, initialWeights=None, intercept=False,
               validateData=True, convergenceTol=0.001):
         """
-        Train a regression model with L1-regularization using
-        Stochastic Gradient Descent.
-        This solves the l1-regularized least squares regression
-        formulation
+        Train a regression model with L1-regularization using Stochastic
+        Gradient Descent. This solves the l1-regularized least squares
+        regression formulation
 
-            f(weights) = 1/(2n) ||A weights - y||^2  + regParam ||weights||_1.
+            f(weights) = 1/(2n) ||A weights - y||^2  + regParam ||weights||_1
 
-        Here the data matrix has n rows, and the input RDD holds the
-        set of rows of A, each with its corresponding right hand side
-        label y. See also the documentation for the precise formulation.
+        Here the data matrix has n rows, and the input RDD holds the set
+        of rows of A, each with its corresponding right hand side label y.
+        See also the documentation for the precise formulation.
 
         :param data:
           The training data, an RDD of LabeledPoint.
@@ -524,34 +502,23 @@ def load(cls, sc, path):
 
 class RidgeRegressionWithSGD(object):
     """
-    Train a regression model with L2-regularization using Stochastic Gradient Descent.
-    This solves the L2-regularized least squares regression formulation
-
-          f(weights) = 1/2n ||A weights-y||^2  + regParam/2 ||weights||^2
-
-    Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
-    its corresponding right hand side label y.
-    See also the documentation for the precise formulation.
-
     .. versionadded:: 0.9.0
     """
-
     @classmethod
     @since("0.9.0")
     def train(cls, data, iterations=100, step=1.0, regParam=0.01,
               miniBatchFraction=1.0, initialWeights=None, intercept=False,
               validateData=True, convergenceTol=0.001):
         """
-        Train a regression model with L2-regularization using
-        Stochastic Gradient Descent.
-        This solves the l2-regularized least squares regression
-        formulation
+        Train a regression model with L2-regularization using Stochastic
+        Gradient Descent. This solves the l2-regularized least squares
+        regression formulation
 
-            f(weights) = 1/(2n) ||A weights - y||^2 + regParam/2 ||weights||^2.
+            f(weights) = 1/(2n) ||A weights - y||^2 + regParam/2 ||weights||^2
 
-        Here the data matrix has n rows, and the input RDD holds the
-        set of rows of A, each with its corresponding right hand side
-        label y. See also the documentation for the precise formulation.
+        Here the data matrix has n rows, and the input RDD holds the set
+        of rows of A, each with its corresponding right hand side label y.
+        See also the documentation for the precise formulation.
 
         :param data:
           The training data, an RDD of LabeledPoint.
@@ -597,11 +564,11 @@ class IsotonicRegressionModel(Saveable, Loader):
     Regression model for isotonic regression.
 
     :param boundaries:
-      Array of boundaries for which predictions are known. Boundaries must
-      be sorted in increasing order.
+      Array of boundaries for which predictions are known. Boundaries
+      must be sorted in increasing order.
     :param predictions:
-      Array of predictions associated to the boundaries at the same index.
-      Results of isotonic regression and therefore monotone.
+      Array of predictions associated to the boundaries at the same
+      index. Results of isotonic regression and therefore monotone.
     :param isotonic:
       Indicates whether this is isotonic or antitonic.
 
@@ -684,8 +651,8 @@ def load(cls, sc, path):
 class IsotonicRegression(object):
     """
     Isotonic regression.
-    Currently implemented using parallelized pool adjacent violators algorithm.
-    Only univariate (single feature) algorithm supported.
+    Currently implemented using parallelized pool adjacent violators
+    algorithm. Only univariate (single feature) algorithm supported.
 
     Sequential PAV implementation based on:
 
@@ -751,9 +718,11 @@ def _validate(self, dstream):
     @since("1.5.0")
     def predictOn(self, dstream):
         """
-        Make predictions on a dstream.
+        Use the model to make predictions on batches of data from a
+        DStream.
 
-        :return: Transformed dstream object.
+        :return:
+          DStream containing predictions.
         """
         self._validate(dstream)
         return dstream.map(lambda x: self._model.predict(x))
@@ -761,9 +730,11 @@ def predictOn(self, dstream):
     @since("1.5.0")
     def predictOnValues(self, dstream):
         """
-        Make predictions on a keyed dstream.
+        Use the model to make predictions on the values of a DStream and
+        carry over its keys.
 
-        :return: Transformed dstream object.
+        :return:
+          DStream containing the input keys and the predictions as values.
         """
         self._validate(dstream)
         return dstream.mapValues(lambda x: self._model.predict(x))
@@ -772,14 +743,15 @@ def predictOnValues(self, dstream):
 @inherit_doc
 class StreamingLinearRegressionWithSGD(StreamingLinearAlgorithm):
     """
-    Train or predict a linear regression model on streaming data. Training uses
-    Stochastic Gradient Descent to update the model based on each new batch of
-    incoming data from a DStream (see `LinearRegressionWithSGD` for model equation).
+    Train or predict a linear regression model on streaming data.
+    Training uses Stochastic Gradient Descent to update the model
+    based on each new batch of incoming data from a DStream
+    (see `LinearRegressionWithSGD` for model equation).
 
     Each batch of data is assumed to be an RDD of LabeledPoints.
     The number of data points per batch can vary, but the number
-    of features must be constant. An initial weight
-    vector must be provided.
+    of features must be constant. An initial weight vector must
+    be provided.
 
     :param stepSize:
       Step size for each iteration of gradient descent.

From 94d532dbb410f2a5b96a563f38e543edea66eb98 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Fri, 26 Feb 2016 16:20:09 -0800
Subject: [PATCH 6/6] Changed 'Allowed values:' -> 'Supported values:' to be
 consistent

---
 python/pyspark/mllib/classification.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index b4d54ef61b0e6..13cf8b6b3899b 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -294,7 +294,7 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
           (default: 0.01)
         :param regType:
           The type of regularizer used for training our model.
-          Allowed values:
+          Supported values:
 
             - "l1" for using L1 regularization
             - "l2" for using L2 regularization (default)
@@ -344,7 +344,7 @@ def train(cls, data, iterations=100, initialWeights=None, regParam=0.01, regType
           (default: 0.01)
         :param regType:
           The type of regularizer used for training our model.
-          Allowed values:
+          Supported values:
 
             - "l1" for using L1 regularization
             - "l2" for using L2 regularization (default)