From 146c2a8b340f2269612a11469205aa10414cbb13 Mon Sep 17 00:00:00 2001
From: vijaykiran <mail@vijaykiran.com>
Date: Tue, 5 Jan 2016 12:08:34 +0100
Subject: [PATCH 1/4] [SPARK-12633][DOC] Update param descriptions

Updates the param descriptions to be consistent. See [SPARK-11219] for
more details.
---
 python/pyspark/mllib/regression.py | 209 +++++++++++++++++------------
 1 file changed, 121 insertions(+), 88 deletions(-)

diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 13b3397501c0b..26342f5abfc07 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -37,10 +37,11 @@ class LabeledPoint(object):
     """
     Class that represents the features and labels of a data point.
 
-    :param label: Label for this data point.
-    :param features: Vector of features for this point (NumPy array,
-            list, pyspark.mllib.linalg.SparseVector, or scipy.sparse
-            column matrix)
+    :param label:
+      Label for this data point.
+    :param features:
+      Vector of features for this point (NumPy array, list,
+      pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix)
 
     Note: 'label' and 'features' are accessible as class attributes.
 
@@ -66,8 +67,10 @@ class LinearModel(object):
     """
     A linear model that has a vector of coefficients and an intercept.
 
-    :param weights: Weights computed for every feature.
-    :param intercept: Intercept computed for this model.
+    :param weights:
+      Weights computed for every feature.
+    :param intercept:
+      Intercept computed for this model.
 
     .. versionadded:: 0.9.0
     """
@@ -245,37 +248,45 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
         set of rows of A, each with its corresponding right hand side
         label y. See also the documentation for the precise formulation.
 
-        :param data:              The training data, an RDD of
-                                  LabeledPoint.
-        :param iterations:        The number of iterations
-                                  (default: 100).
-        :param step:              The step parameter used in SGD
-                                  (default: 1.0).
-        :param miniBatchFraction: Fraction of data to be used for each
-                                  SGD iteration (default: 1.0).
-        :param initialWeights:    The initial weights (default: None).
-        :param regParam:          The regularizer parameter
-                                  (default: 0.0).
-        :param regType:           The type of regularizer used for
-                                  training our model.
-
-                                  :Allowed values:
-                                     - "l1" for using L1 regularization (lasso),
-                                     - "l2" for using L2 regularization (ridge),
-                                     - None for no regularization
-
-                                     (default: None)
-
-        :param intercept:         Boolean parameter which indicates the
-                                  use or not of the augmented representation
-                                  for training data (i.e. whether bias
-                                  features are activated or not,
-                                  default: False).
-        :param validateData:      Boolean parameter which indicates if
-                                  the algorithm should validate data
-                                  before training. (default: True)
-        :param convergenceTol:    A condition which decides iteration termination.
-                                  (default: 0.001)
+        :param data:
+          The training data, an RDD of LabeledPoint.
+        :param iterations:
+          The number of iterations.
+          (default: 100)
+        :param step:
+          The step parameter used in SGD.
+          (default: 1.0)
+        :param miniBatchFraction:
+          Fraction of data to be used for each SGD iteration.
+          (default: 1.0)
+        :param initialWeights:
+          The initial weights.
+          (default: None)
+        :param regParam:
+          The regularizer parameter.
+          (default: 0.0)
+        :param regType:
+          The type of regularizer used for training our model.
+
+          :Allowed values:
+          - "l1" for using L1 regularization (lasso),
+          - "l2" for using L2 regularization (ridge),
+          - None for no regularization
+
+          (default: None)
+
+        :param intercept:
+          Boolean parameter which indicates the use or not of the augmented
+          representation for training data (i.e., whether bias features are
+          activated or not).
+          (default: False)
+        :param validateData:
+          Boolean parameter which indicates if the algorithm should validate data
+          before training.
+          (default: True)
+        :param convergenceTol:
+          A condition which decides iteration termination.
+          (default: 0.001)
         """
         def train(rdd, i):
             return callMLlibFunc("trainLinearRegressionModelWithSGD", rdd, int(iterations),
@@ -393,27 +404,35 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
         set of rows of A, each with its corresponding right hand side
         label y. See also the documentation for the precise formulation.
 
-        :param data:              The training data, an RDD of
-                                  LabeledPoint.
-        :param iterations:        The number of iterations
-                                  (default: 100).
-        :param step:              The step parameter used in SGD
-                                  (default: 1.0).
-        :param regParam:          The regularizer parameter
-                                  (default: 0.01).
-        :param miniBatchFraction: Fraction of data to be used for each
-                                  SGD iteration (default: 1.0).
-        :param initialWeights:    The initial weights (default: None).
-        :param intercept:         Boolean parameter which indicates the
-                                  use or not of the augmented representation
-                                  for training data (i.e. whether bias
-                                  features are activated or not,
-                                  default: False).
-        :param validateData:      Boolean parameter which indicates if
-                                  the algorithm should validate data
-                                  before training. (default: True)
-        :param convergenceTol:    A condition which decides iteration termination.
-                                  (default: 0.001)
+        :param data:
+          The training data, an RDD of LabeledPoint.
+        :param iterations:
+          The number of iterations.
+          (default: 100)
+        :param step:
+          The step parameter used in SGD.
+          (default: 1.0)
+        :param regParam:
+          The regularizer parameter.
+          (default: 0.01)
+        :param miniBatchFraction:
+          Fraction of data to be used for each SGD iteration.
+          (default: 1.0)
+        :param initialWeights:
+          The initial weights.
+          (default: None)
+        :param intercept:
+          Boolean parameter which indicates the use or not of the augmented
+          representation for training data (i.e. whether bias features are
+          activated or not).
+         (default: False)
+        :param validateData:
+          Boolean parameter which indicates if the algorithm should validate
+          data before training.
+          (default: True)
+        :param convergenceTol:
+          A condition which decides iteration termination.
+          (default: 0.001)
         """
         def train(rdd, i):
             return callMLlibFunc("trainLassoModelWithSGD", rdd, int(iterations), float(step),
@@ -531,27 +550,35 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
         set of rows of A, each with its corresponding right hand side
         label y. See also the documentation for the precise formulation.
 
-        :param data:              The training data, an RDD of
-                                  LabeledPoint.
-        :param iterations:        The number of iterations
-                                  (default: 100).
-        :param step:              The step parameter used in SGD
-                                  (default: 1.0).
-        :param regParam:          The regularizer parameter
-                                  (default: 0.01).
-        :param miniBatchFraction: Fraction of data to be used for each
-                                  SGD iteration (default: 1.0).
-        :param initialWeights:    The initial weights (default: None).
-        :param intercept:         Boolean parameter which indicates the
-                                  use or not of the augmented representation
-                                  for training data (i.e. whether bias
-                                  features are activated or not,
-                                  default: False).
-        :param validateData:      Boolean parameter which indicates if
-                                  the algorithm should validate data
-                                  before training. (default: True)
-        :param convergenceTol:    A condition which decides iteration termination.
-                                  (default: 0.001)
+        :param data:
+          The training data, an RDD of LabeledPoint.
+        :param iterations:
+          The number of iterations.
+          (default: 100)
+        :param step:
+          The step parameter used in SGD.
+          (default: 1.0)
+        :param regParam:
+          The regularizer parameter.
+          (default: 0.01)
+        :param miniBatchFraction:
+          Fraction of data to be used for each SGD iteration.
+          (default: 1.0)
+        :param initialWeights:
+          The initial weights.
+          (default: None)
+        :param intercept:
+          Boolean parameter which indicates the use or not of the augmented
+          representation for training data (i.e. whether bias features are
+          activated or not).
+          (default: False)
+        :param validateData:
+          Boolean parameter which indicates if the algorithm should validate
+          data before training.
+          (default: True)
+        :param convergenceTol:
+          A condition which decides iteration termination.
+          (default: 0.001)
         """
         def train(rdd, i):
             return callMLlibFunc("trainRidgeModelWithSGD", rdd, int(iterations), float(step),
@@ -566,12 +593,14 @@ class IsotonicRegressionModel(Saveable, Loader):
     """
     Regression model for isotonic regression.
 
-    :param boundaries: Array of boundaries for which predictions are
-            known. Boundaries must be sorted in increasing order.
-    :param predictions: Array of predictions associated to the
-            boundaries at the same index. Results of isotonic
-            regression and therefore monotone.
-    :param isotonic: indicates whether this is isotonic or antitonic.
+    :param boundaries:
+      Array of boundaries for which predictions are known. Boundaries must be
+      sorted in increasing order.
+    :param predictions:
+      Array of predictions associated to the boundaries at the same index.
+      Results of isotonic regression and therefore monotone.
+    :param isotonic:
+      Indicates whether this is isotonic or antitonic.
 
     >>> data = [(1, 0, 1), (2, 1, 1), (3, 2, 1), (1, 3, 1), (6, 4, 1), (17, 5, 1), (16, 6, 1)]
     >>> irm = IsotonicRegression.train(sc.parallelize(data))
@@ -622,7 +651,8 @@ def predict(self, x):
         values with the same boundary then the same rules as in 2)
         are used.
 
-        :param x: Feature or RDD of Features to be labeled.
+        :param x:
+          Feature or RDD of Features to be labeled.
         """
         if isinstance(x, RDD):
             return x.map(lambda v: self.predict(v))
@@ -676,8 +706,11 @@ def train(cls, data, isotonic=True):
         """
         Train a isotonic regression model on the given data.
 
-        :param data: RDD of (label, feature, weight) tuples.
-        :param isotonic: Whether this is isotonic or antitonic.
+        :param data:
+          RDD of (label, feature, weight) tuples.
+        :param isotonic:
+          Whether this is isotonic (which is default) or antitonic.
+          (default: True)
         """
         boundaries, predictions = callMLlibFunc("trainIsotonicRegressionModel",
                                                 data.map(_convert_to_vector), bool(isotonic))

From d361d70806a9e758a9ee2986c144a89f6a0c7b63 Mon Sep 17 00:00:00 2001
From: vijaykiran <mail@vijaykiran.com>
Date: Wed, 6 Jan 2016 11:30:18 +0100
Subject: [PATCH 2/4] Style Fixes

Change fill-column to 100.
---
 python/pyspark/mllib/regression.py | 39 ++++++++++++------------------
 1 file changed, 15 insertions(+), 24 deletions(-)

diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 26342f5abfc07..c6b52c9fbd64b 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -40,8 +40,8 @@ class LabeledPoint(object):
     :param label:
       Label for this data point.
     :param features:
-      Vector of features for this point (NumPy array, list,
-      pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix)
+      Vector of features for this point (NumPy array, list, pyspark.mllib.linalg.SparseVector, or
+      scipy.sparse column matrix)
 
     Note: 'label' and 'features' are accessible as class attributes.
 
@@ -267,22 +267,17 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
           (default: 0.0)
         :param regType:
           The type of regularizer used for training our model.
-
           :Allowed values:
           - "l1" for using L1 regularization (lasso),
           - "l2" for using L2 regularization (ridge),
           - None for no regularization
-
           (default: None)
-
         :param intercept:
-          Boolean parameter which indicates the use or not of the augmented
-          representation for training data (i.e., whether bias features are
-          activated or not).
+          Boolean parameter which indicates the use or not of the augmented representation for
+          training data (i.e., whether bias features are activated or not).
           (default: False)
         :param validateData:
-          Boolean parameter which indicates if the algorithm should validate data
-          before training.
+          Boolean parameter which indicates if the algorithm should validate data before training.
           (default: True)
         :param convergenceTol:
           A condition which decides iteration termination.
@@ -422,13 +417,11 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
           The initial weights.
           (default: None)
         :param intercept:
-          Boolean parameter which indicates the use or not of the augmented
-          representation for training data (i.e. whether bias features are
-          activated or not).
+          Boolean parameter which indicates the use or not of the augmented representation for
+          training data (i.e. whether bias features are activated or not).
          (default: False)
         :param validateData:
-          Boolean parameter which indicates if the algorithm should validate
-          data before training.
+          Boolean parameter which indicates if the algorithm should validate data before training.
           (default: True)
         :param convergenceTol:
           A condition which decides iteration termination.
@@ -568,13 +561,11 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
           The initial weights.
           (default: None)
         :param intercept:
-          Boolean parameter which indicates the use or not of the augmented
-          representation for training data (i.e. whether bias features are
-          activated or not).
+          Boolean parameter which indicates the use or not of the augmented representation for
+          training data (i.e. whether bias features are activated or not).
           (default: False)
         :param validateData:
-          Boolean parameter which indicates if the algorithm should validate
-          data before training.
+          Boolean parameter which indicates if the algorithm should validate data before training.
           (default: True)
         :param convergenceTol:
           A condition which decides iteration termination.
@@ -594,11 +585,11 @@ class IsotonicRegressionModel(Saveable, Loader):
     Regression model for isotonic regression.
 
     :param boundaries:
-      Array of boundaries for which predictions are known. Boundaries must be
-      sorted in increasing order.
+      Array of boundaries for which predictions are known. Boundaries must be sorted in increasing
+      order.
     :param predictions:
-      Array of predictions associated to the boundaries at the same index.
-      Results of isotonic regression and therefore monotone.
+      Array of predictions associated to the boundaries at the same index. Results of isotonic
+      regression and therefore monotone.
     :param isotonic:
       Indicates whether this is isotonic or antitonic.
 

From 45bec55b2f6bb165a0491e71bff6f2341a58b744 Mon Sep 17 00:00:00 2001
From: vijaykiran <mail@vijaykiran.com>
Date: Fri, 22 Jan 2016 15:21:51 +0100
Subject: [PATCH 3/4] Limit parameter descriptions to col 74

---
 python/pyspark/mllib/regression.py | 36 +++++++++++++++++-------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index c6b52c9fbd64b..de97ba515fb7b 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -40,8 +40,8 @@ class LabeledPoint(object):
     :param label:
       Label for this data point.
     :param features:
-      Vector of features for this point (NumPy array, list, pyspark.mllib.linalg.SparseVector, or
-      scipy.sparse column matrix)
+      Vector of features for this point (NumPy array, list,
+      pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix)
 
     Note: 'label' and 'features' are accessible as class attributes.
 
@@ -273,11 +273,13 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
           - None for no regularization
           (default: None)
         :param intercept:
-          Boolean parameter which indicates the use or not of the augmented representation for
-          training data (i.e., whether bias features are activated or not).
+          Boolean parameter which indicates the use or not of the
+          augmented representation for training data (i.e., whether bias
+          features are activated or not).
           (default: False)
         :param validateData:
-          Boolean parameter which indicates if the algorithm should validate data before training.
+          Boolean parameter which indicates if the algorithm should
+          validate data before training.
           (default: True)
         :param convergenceTol:
           A condition which decides iteration termination.
@@ -417,11 +419,13 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
           The initial weights.
           (default: None)
         :param intercept:
-          Boolean parameter which indicates the use or not of the augmented representation for
-          training data (i.e. whether bias features are activated or not).
+          Boolean parameter which indicates the use or not of the
+          augmented representation for training data (i.e. whether bias
+          features are activated or not).
          (default: False)
         :param validateData:
-          Boolean parameter which indicates if the algorithm should validate data before training.
+          Boolean parameter which indicates if the algorithm should
+          validate data before training.
           (default: True)
         :param convergenceTol:
           A condition which decides iteration termination.
@@ -561,11 +565,13 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
           The initial weights.
           (default: None)
         :param intercept:
-          Boolean parameter which indicates the use or not of the augmented representation for
-          training data (i.e. whether bias features are activated or not).
+          Boolean parameter which indicates the use or not of the
+          augmented representation for training data (i.e. whether bias
+          features are activated or not).
           (default: False)
         :param validateData:
-          Boolean parameter which indicates if the algorithm should validate data before training.
+          Boolean parameter which indicates if the algorithm should
+          validate data before training.
           (default: True)
         :param convergenceTol:
           A condition which decides iteration termination.
@@ -585,11 +591,11 @@ class IsotonicRegressionModel(Saveable, Loader):
     Regression model for isotonic regression.
 
     :param boundaries:
-      Array of boundaries for which predictions are known. Boundaries must be sorted in increasing
-      order.
+      Array of boundaries for which predictions are known. Boundaries must
+      be sorted in increasing order.
     :param predictions:
-      Array of predictions associated to the boundaries at the same index. Results of isotonic
-      regression and therefore monotone.
+      Array of predictions associated to the boundaries at the same index.
+      Results of isotonic regression and therefore monotone.
     :param isotonic:
       Indicates whether this is isotonic or antitonic.
 

From 5feecbad219895696709d804facfb8c575d1d5b4 Mon Sep 17 00:00:00 2001
From: vijaykiran <mail@vijaykiran.com>
Date: Sat, 23 Jan 2016 08:13:15 +0100
Subject: [PATCH 4/4] Fix indentation

---
 python/pyspark/mllib/regression.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index de97ba515fb7b..d5e3adb5ac9e2 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -41,7 +41,7 @@ class LabeledPoint(object):
       Label for this data point.
     :param features:
       Vector of features for this point (NumPy array, list,
-      pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix)
+      pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix).
 
     Note: 'label' and 'features' are accessible as class attributes.
 
@@ -422,7 +422,7 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
           Boolean parameter which indicates the use or not of the
           augmented representation for training data (i.e. whether bias
           features are activated or not).
-         (default: False)
+          (default: False)
         :param validateData:
           Boolean parameter which indicates if the algorithm should
           validate data before training.