apache · vijaykiran · Jan 5, 2016 · Jan 6, 2016 · Jan 22, 2016 · Jan 23, 2016
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
@@ -37,10 +37,11 @@ class LabeledPoint(object):
     """
     Class that represents the features and labels of a data point.
 
-    :param label: Label for this data point.
-    :param features: Vector of features for this point (NumPy array,
-            list, pyspark.mllib.linalg.SparseVector, or scipy.sparse
-            column matrix)
+    :param label:
+      Label for this data point.
+    :param features:
+      Vector of features for this point (NumPy array, list,
+      pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix).
 
     Note: 'label' and 'features' are accessible as class attributes.
 
@@ -66,8 +67,10 @@ class LinearModel(object):
     """
     A linear model that has a vector of coefficients and an intercept.
 
-    :param weights: Weights computed for every feature.
-    :param intercept: Intercept computed for this model.
+    :param weights:
+      Weights computed for every feature.
+    :param intercept:
+      Intercept computed for this model.
 
     .. versionadded:: 0.9.0
     """
@@ -245,37 +248,42 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
         set of rows of A, each with its corresponding right hand side
         label y. See also the documentation for the precise formulation.
 
-        :param data:              The training data, an RDD of
-                                  LabeledPoint.
-        :param iterations:        The number of iterations
-                                  (default: 100).
-        :param step:              The step parameter used in SGD
-                                  (default: 1.0).
-        :param miniBatchFraction: Fraction of data to be used for each
-                                  SGD iteration (default: 1.0).
-        :param initialWeights:    The initial weights (default: None).
-        :param regParam:          The regularizer parameter
-                                  (default: 0.0).
-        :param regType:           The type of regularizer used for
-                                  training our model.
-
-                                  :Allowed values:
-                                     - "l1" for using L1 regularization (lasso),
-                                     - "l2" for using L2 regularization (ridge),
-                                     - None for no regularization
-
-                                     (default: None)
-
-        :param intercept:         Boolean parameter which indicates the
-                                  use or not of the augmented representation
-                                  for training data (i.e. whether bias
-                                  features are activated or not,
-                                  default: False).
-        :param validateData:      Boolean parameter which indicates if
-                                  the algorithm should validate data
-                                  before training. (default: True)
-        :param convergenceTol:    A condition which decides iteration termination.
-                                  (default: 0.001)
+        :param data:
+          The training data, an RDD of LabeledPoint.
+        :param iterations:
+          The number of iterations.
+          (default: 100)
+        :param step:
+          The step parameter used in SGD.
+          (default: 1.0)
+        :param miniBatchFraction:
+          Fraction of data to be used for each SGD iteration.
+          (default: 1.0)
+        :param initialWeights:
+          The initial weights.
+          (default: None)
+        :param regParam:
+          The regularizer parameter.
+          (default: 0.0)
+        :param regType:
+          The type of regularizer used for training our model.
+          :Allowed values:
+          - "l1" for using L1 regularization (lasso),
+          - "l2" for using L2 regularization (ridge),
+          - None for no regularization
+          (default: None)
+        :param intercept:
+          Boolean parameter which indicates the use or not of the
+          augmented representation for training data (i.e., whether bias
+          features are activated or not).
+          (default: False)
+        :param validateData:
+          Boolean parameter which indicates if the algorithm should
+          validate data before training.
+          (default: True)
+        :param convergenceTol:
+          A condition which decides iteration termination.
+          (default: 0.001)
         """
         def train(rdd, i):
             return callMLlibFunc("trainLinearRegressionModelWithSGD", rdd, int(iterations),
@@ -393,27 +401,35 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
         set of rows of A, each with its corresponding right hand side
         label y. See also the documentation for the precise formulation.
 
-        :param data:              The training data, an RDD of
-                                  LabeledPoint.
-        :param iterations:        The number of iterations
-                                  (default: 100).
-        :param step:              The step parameter used in SGD
-                                  (default: 1.0).
-        :param regParam:          The regularizer parameter
-                                  (default: 0.01).
-        :param miniBatchFraction: Fraction of data to be used for each
-                                  SGD iteration (default: 1.0).
-        :param initialWeights:    The initial weights (default: None).
-        :param intercept:         Boolean parameter which indicates the
-                                  use or not of the augmented representation
-                                  for training data (i.e. whether bias
-                                  features are activated or not,
-                                  default: False).
-        :param validateData:      Boolean parameter which indicates if
-                                  the algorithm should validate data
-                                  before training. (default: True)
-        :param convergenceTol:    A condition which decides iteration termination.
-                                  (default: 0.001)
+        :param data:
+          The training data, an RDD of LabeledPoint.
+        :param iterations:
+          The number of iterations.
+          (default: 100)
+        :param step:
+          The step parameter used in SGD.
+          (default: 1.0)
+        :param regParam:
+          The regularizer parameter.
+          (default: 0.01)
+        :param miniBatchFraction:
+          Fraction of data to be used for each SGD iteration.
+          (default: 1.0)
+        :param initialWeights:
+          The initial weights.
+          (default: None)
+        :param intercept:
+          Boolean parameter which indicates the use or not of the
+          augmented representation for training data (i.e. whether bias
+          features are activated or not).
+          (default: False)
+        :param validateData:
+          Boolean parameter which indicates if the algorithm should
+          validate data before training.
+          (default: True)
+        :param convergenceTol:
+          A condition which decides iteration termination.
+          (default: 0.001)
         """
         def train(rdd, i):
             return callMLlibFunc("trainLassoModelWithSGD", rdd, int(iterations), float(step),
@@ -531,27 +547,35 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
         set of rows of A, each with its corresponding right hand side
         label y. See also the documentation for the precise formulation.
 
-        :param data:              The training data, an RDD of
-                                  LabeledPoint.
-        :param iterations:        The number of iterations
-                                  (default: 100).
-        :param step:              The step parameter used in SGD
-                                  (default: 1.0).
-        :param regParam:          The regularizer parameter
-                                  (default: 0.01).
-        :param miniBatchFraction: Fraction of data to be used for each
-                                  SGD iteration (default: 1.0).
-        :param initialWeights:    The initial weights (default: None).
-        :param intercept:         Boolean parameter which indicates the
-                                  use or not of the augmented representation
-                                  for training data (i.e. whether bias
-                                  features are activated or not,
-                                  default: False).
-        :param validateData:      Boolean parameter which indicates if
-                                  the algorithm should validate data
-                                  before training. (default: True)
-        :param convergenceTol:    A condition which decides iteration termination.
-                                  (default: 0.001)
+        :param data:
+          The training data, an RDD of LabeledPoint.
+        :param iterations:
+          The number of iterations.
+          (default: 100)
+        :param step:
+          The step parameter used in SGD.
+          (default: 1.0)
+        :param regParam:
+          The regularizer parameter.
+          (default: 0.01)
+        :param miniBatchFraction:
+          Fraction of data to be used for each SGD iteration.
+          (default: 1.0)
+        :param initialWeights:
+          The initial weights.
+          (default: None)
+        :param intercept:
+          Boolean parameter which indicates the use or not of the
+          augmented representation for training data (i.e. whether bias
+          features are activated or not).
+          (default: False)
+        :param validateData:
+          Boolean parameter which indicates if the algorithm should
+          validate data before training.
+          (default: True)
+        :param convergenceTol:
+          A condition which decides iteration termination.
+          (default: 0.001)
         """
         def train(rdd, i):
             return callMLlibFunc("trainRidgeModelWithSGD", rdd, int(iterations), float(step),
@@ -566,12 +590,14 @@ class IsotonicRegressionModel(Saveable, Loader):
     """
     Regression model for isotonic regression.
 
-    :param boundaries: Array of boundaries for which predictions are
-            known. Boundaries must be sorted in increasing order.
-    :param predictions: Array of predictions associated to the
-            boundaries at the same index. Results of isotonic
-            regression and therefore monotone.
-    :param isotonic: indicates whether this is isotonic or antitonic.
+    :param boundaries:
+      Array of boundaries for which predictions are known. Boundaries must
+      be sorted in increasing order.
+    :param predictions:
+      Array of predictions associated to the boundaries at the same index.
+      Results of isotonic regression and therefore monotone.
+    :param isotonic:
+      Indicates whether this is isotonic or antitonic.
 
     >>> data = [(1, 0, 1), (2, 1, 1), (3, 2, 1), (1, 3, 1), (6, 4, 1), (17, 5, 1), (16, 6, 1)]
     >>> irm = IsotonicRegression.train(sc.parallelize(data))
@@ -622,7 +648,8 @@ def predict(self, x):
         values with the same boundary then the same rules as in 2)
         are used.
 
-        :param x: Feature or RDD of Features to be labeled.
+        :param x:
+          Feature or RDD of Features to be labeled.
         """
         if isinstance(x, RDD):
             return x.map(lambda v: self.predict(v))
@@ -676,8 +703,11 @@ def train(cls, data, isotonic=True):
         """
         Train a isotonic regression model on the given data.
 
-        :param data: RDD of (label, feature, weight) tuples.
-        :param isotonic: Whether this is isotonic or antitonic.
+        :param data:
+          RDD of (label, feature, weight) tuples.
+        :param isotonic:
+          Whether this is isotonic (which is default) or antitonic.
+          (default: True)
         """
         boundaries, predictions = callMLlibFunc("trainIsotonicRegressionModel",
                                                 data.map(_convert_to_vector), bool(isotonic))