From 146c2a8b340f2269612a11469205aa10414cbb13 Mon Sep 17 00:00:00 2001 From: vijaykiran Date: Tue, 5 Jan 2016 12:08:34 +0100 Subject: [PATCH 1/6] [SPARK-12633][DOC] Update param descriptions Updates the param descriptions to be consistent. See [SPARK-11219] for more details. --- python/pyspark/mllib/regression.py | 209 +++++++++++++++++------------ 1 file changed, 121 insertions(+), 88 deletions(-) diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index 13b3397501c0b..26342f5abfc07 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -37,10 +37,11 @@ class LabeledPoint(object): """ Class that represents the features and labels of a data point. - :param label: Label for this data point. - :param features: Vector of features for this point (NumPy array, - list, pyspark.mllib.linalg.SparseVector, or scipy.sparse - column matrix) + :param label: + Label for this data point. + :param features: + Vector of features for this point (NumPy array, list, + pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix) Note: 'label' and 'features' are accessible as class attributes. @@ -66,8 +67,10 @@ class LinearModel(object): """ A linear model that has a vector of coefficients and an intercept. - :param weights: Weights computed for every feature. - :param intercept: Intercept computed for this model. + :param weights: + Weights computed for every feature. + :param intercept: + Intercept computed for this model. .. versionadded:: 0.9.0 """ @@ -245,37 +248,45 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0, set of rows of A, each with its corresponding right hand side label y. See also the documentation for the precise formulation. - :param data: The training data, an RDD of - LabeledPoint. - :param iterations: The number of iterations - (default: 100). - :param step: The step parameter used in SGD - (default: 1.0). - :param miniBatchFraction: Fraction of data to be used for each - SGD iteration (default: 1.0). - :param initialWeights: The initial weights (default: None). - :param regParam: The regularizer parameter - (default: 0.0). - :param regType: The type of regularizer used for - training our model. - - :Allowed values: - - "l1" for using L1 regularization (lasso), - - "l2" for using L2 regularization (ridge), - - None for no regularization - - (default: None) - - :param intercept: Boolean parameter which indicates the - use or not of the augmented representation - for training data (i.e. whether bias - features are activated or not, - default: False). - :param validateData: Boolean parameter which indicates if - the algorithm should validate data - before training. (default: True) - :param convergenceTol: A condition which decides iteration termination. - (default: 0.001) + :param data: + The training data, an RDD of LabeledPoint. + :param iterations: + The number of iterations. + (default: 100) + :param step: + The step parameter used in SGD. + (default: 1.0) + :param miniBatchFraction: + Fraction of data to be used for each SGD iteration. + (default: 1.0) + :param initialWeights: + The initial weights. + (default: None) + :param regParam: + The regularizer parameter. + (default: 0.0) + :param regType: + The type of regularizer used for training our model. + + :Allowed values: + - "l1" for using L1 regularization (lasso), + - "l2" for using L2 regularization (ridge), + - None for no regularization + + (default: None) + + :param intercept: + Boolean parameter which indicates the use or not of the augmented + representation for training data (i.e., whether bias features are + activated or not). + (default: False) + :param validateData: + Boolean parameter which indicates if the algorithm should validate data + before training. + (default: True) + :param convergenceTol: + A condition which decides iteration termination. + (default: 0.001) """ def train(rdd, i): return callMLlibFunc("trainLinearRegressionModelWithSGD", rdd, int(iterations), @@ -393,27 +404,35 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01, set of rows of A, each with its corresponding right hand side label y. See also the documentation for the precise formulation. - :param data: The training data, an RDD of - LabeledPoint. - :param iterations: The number of iterations - (default: 100). - :param step: The step parameter used in SGD - (default: 1.0). - :param regParam: The regularizer parameter - (default: 0.01). - :param miniBatchFraction: Fraction of data to be used for each - SGD iteration (default: 1.0). - :param initialWeights: The initial weights (default: None). - :param intercept: Boolean parameter which indicates the - use or not of the augmented representation - for training data (i.e. whether bias - features are activated or not, - default: False). - :param validateData: Boolean parameter which indicates if - the algorithm should validate data - before training. (default: True) - :param convergenceTol: A condition which decides iteration termination. - (default: 0.001) + :param data: + The training data, an RDD of LabeledPoint. + :param iterations: + The number of iterations. + (default: 100) + :param step: + The step parameter used in SGD. + (default: 1.0) + :param regParam: + The regularizer parameter. + (default: 0.01) + :param miniBatchFraction: + Fraction of data to be used for each SGD iteration. + (default: 1.0) + :param initialWeights: + The initial weights. + (default: None) + :param intercept: + Boolean parameter which indicates the use or not of the augmented + representation for training data (i.e. whether bias features are + activated or not). + (default: False) + :param validateData: + Boolean parameter which indicates if the algorithm should validate + data before training. + (default: True) + :param convergenceTol: + A condition which decides iteration termination. + (default: 0.001) """ def train(rdd, i): return callMLlibFunc("trainLassoModelWithSGD", rdd, int(iterations), float(step), @@ -531,27 +550,35 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01, set of rows of A, each with its corresponding right hand side label y. See also the documentation for the precise formulation. - :param data: The training data, an RDD of - LabeledPoint. - :param iterations: The number of iterations - (default: 100). - :param step: The step parameter used in SGD - (default: 1.0). - :param regParam: The regularizer parameter - (default: 0.01). - :param miniBatchFraction: Fraction of data to be used for each - SGD iteration (default: 1.0). - :param initialWeights: The initial weights (default: None). - :param intercept: Boolean parameter which indicates the - use or not of the augmented representation - for training data (i.e. whether bias - features are activated or not, - default: False). - :param validateData: Boolean parameter which indicates if - the algorithm should validate data - before training. (default: True) - :param convergenceTol: A condition which decides iteration termination. - (default: 0.001) + :param data: + The training data, an RDD of LabeledPoint. + :param iterations: + The number of iterations. + (default: 100) + :param step: + The step parameter used in SGD. + (default: 1.0) + :param regParam: + The regularizer parameter. + (default: 0.01) + :param miniBatchFraction: + Fraction of data to be used for each SGD iteration. + (default: 1.0) + :param initialWeights: + The initial weights. + (default: None) + :param intercept: + Boolean parameter which indicates the use or not of the augmented + representation for training data (i.e. whether bias features are + activated or not). + (default: False) + :param validateData: + Boolean parameter which indicates if the algorithm should validate + data before training. + (default: True) + :param convergenceTol: + A condition which decides iteration termination. + (default: 0.001) """ def train(rdd, i): return callMLlibFunc("trainRidgeModelWithSGD", rdd, int(iterations), float(step), @@ -566,12 +593,14 @@ class IsotonicRegressionModel(Saveable, Loader): """ Regression model for isotonic regression. - :param boundaries: Array of boundaries for which predictions are - known. Boundaries must be sorted in increasing order. - :param predictions: Array of predictions associated to the - boundaries at the same index. Results of isotonic - regression and therefore monotone. - :param isotonic: indicates whether this is isotonic or antitonic. + :param boundaries: + Array of boundaries for which predictions are known. Boundaries must be + sorted in increasing order. + :param predictions: + Array of predictions associated to the boundaries at the same index. + Results of isotonic regression and therefore monotone. + :param isotonic: + Indicates whether this is isotonic or antitonic. >>> data = [(1, 0, 1), (2, 1, 1), (3, 2, 1), (1, 3, 1), (6, 4, 1), (17, 5, 1), (16, 6, 1)] >>> irm = IsotonicRegression.train(sc.parallelize(data)) @@ -622,7 +651,8 @@ def predict(self, x): values with the same boundary then the same rules as in 2) are used. - :param x: Feature or RDD of Features to be labeled. + :param x: + Feature or RDD of Features to be labeled. """ if isinstance(x, RDD): return x.map(lambda v: self.predict(v)) @@ -676,8 +706,11 @@ def train(cls, data, isotonic=True): """ Train a isotonic regression model on the given data. - :param data: RDD of (label, feature, weight) tuples. - :param isotonic: Whether this is isotonic or antitonic. + :param data: + RDD of (label, feature, weight) tuples. + :param isotonic: + Whether this is isotonic (which is default) or antitonic. + (default: True) """ boundaries, predictions = callMLlibFunc("trainIsotonicRegressionModel", data.map(_convert_to_vector), bool(isotonic)) From d361d70806a9e758a9ee2986c144a89f6a0c7b63 Mon Sep 17 00:00:00 2001 From: vijaykiran Date: Wed, 6 Jan 2016 11:30:18 +0100 Subject: [PATCH 2/6] Style Fixes Change fill-column to 100. --- python/pyspark/mllib/regression.py | 39 ++++++++++++------------------ 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index 26342f5abfc07..c6b52c9fbd64b 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -40,8 +40,8 @@ class LabeledPoint(object): :param label: Label for this data point. :param features: - Vector of features for this point (NumPy array, list, - pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix) + Vector of features for this point (NumPy array, list, pyspark.mllib.linalg.SparseVector, or + scipy.sparse column matrix) Note: 'label' and 'features' are accessible as class attributes. @@ -267,22 +267,17 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0, (default: 0.0) :param regType: The type of regularizer used for training our model. - :Allowed values: - "l1" for using L1 regularization (lasso), - "l2" for using L2 regularization (ridge), - None for no regularization - (default: None) - :param intercept: - Boolean parameter which indicates the use or not of the augmented - representation for training data (i.e., whether bias features are - activated or not). + Boolean parameter which indicates the use or not of the augmented representation for + training data (i.e., whether bias features are activated or not). (default: False) :param validateData: - Boolean parameter which indicates if the algorithm should validate data - before training. + Boolean parameter which indicates if the algorithm should validate data before training. (default: True) :param convergenceTol: A condition which decides iteration termination. @@ -422,13 +417,11 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01, The initial weights. (default: None) :param intercept: - Boolean parameter which indicates the use or not of the augmented - representation for training data (i.e. whether bias features are - activated or not). + Boolean parameter which indicates the use or not of the augmented representation for + training data (i.e. whether bias features are activated or not). (default: False) :param validateData: - Boolean parameter which indicates if the algorithm should validate - data before training. + Boolean parameter which indicates if the algorithm should validate data before training. (default: True) :param convergenceTol: A condition which decides iteration termination. @@ -568,13 +561,11 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01, The initial weights. (default: None) :param intercept: - Boolean parameter which indicates the use or not of the augmented - representation for training data (i.e. whether bias features are - activated or not). + Boolean parameter which indicates the use or not of the augmented representation for + training data (i.e. whether bias features are activated or not). (default: False) :param validateData: - Boolean parameter which indicates if the algorithm should validate - data before training. + Boolean parameter which indicates if the algorithm should validate data before training. (default: True) :param convergenceTol: A condition which decides iteration termination. @@ -594,11 +585,11 @@ class IsotonicRegressionModel(Saveable, Loader): Regression model for isotonic regression. :param boundaries: - Array of boundaries for which predictions are known. Boundaries must be - sorted in increasing order. + Array of boundaries for which predictions are known. Boundaries must be sorted in increasing + order. :param predictions: - Array of predictions associated to the boundaries at the same index. - Results of isotonic regression and therefore monotone. + Array of predictions associated to the boundaries at the same index. Results of isotonic + regression and therefore monotone. :param isotonic: Indicates whether this is isotonic or antitonic. From 45bec55b2f6bb165a0491e71bff6f2341a58b744 Mon Sep 17 00:00:00 2001 From: vijaykiran Date: Fri, 22 Jan 2016 15:21:51 +0100 Subject: [PATCH 3/6] Limit parameter descriptions to col 74 --- python/pyspark/mllib/regression.py | 36 +++++++++++++++++------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index c6b52c9fbd64b..de97ba515fb7b 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -40,8 +40,8 @@ class LabeledPoint(object): :param label: Label for this data point. :param features: - Vector of features for this point (NumPy array, list, pyspark.mllib.linalg.SparseVector, or - scipy.sparse column matrix) + Vector of features for this point (NumPy array, list, + pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix) Note: 'label' and 'features' are accessible as class attributes. @@ -273,11 +273,13 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0, - None for no regularization (default: None) :param intercept: - Boolean parameter which indicates the use or not of the augmented representation for - training data (i.e., whether bias features are activated or not). + Boolean parameter which indicates the use or not of the + augmented representation for training data (i.e., whether bias + features are activated or not). (default: False) :param validateData: - Boolean parameter which indicates if the algorithm should validate data before training. + Boolean parameter which indicates if the algorithm should + validate data before training. (default: True) :param convergenceTol: A condition which decides iteration termination. @@ -417,11 +419,13 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01, The initial weights. (default: None) :param intercept: - Boolean parameter which indicates the use or not of the augmented representation for - training data (i.e. whether bias features are activated or not). + Boolean parameter which indicates the use or not of the + augmented representation for training data (i.e. whether bias + features are activated or not). (default: False) :param validateData: - Boolean parameter which indicates if the algorithm should validate data before training. + Boolean parameter which indicates if the algorithm should + validate data before training. (default: True) :param convergenceTol: A condition which decides iteration termination. @@ -561,11 +565,13 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01, The initial weights. (default: None) :param intercept: - Boolean parameter which indicates the use or not of the augmented representation for - training data (i.e. whether bias features are activated or not). + Boolean parameter which indicates the use or not of the + augmented representation for training data (i.e. whether bias + features are activated or not). (default: False) :param validateData: - Boolean parameter which indicates if the algorithm should validate data before training. + Boolean parameter which indicates if the algorithm should + validate data before training. (default: True) :param convergenceTol: A condition which decides iteration termination. @@ -585,11 +591,11 @@ class IsotonicRegressionModel(Saveable, Loader): Regression model for isotonic regression. :param boundaries: - Array of boundaries for which predictions are known. Boundaries must be sorted in increasing - order. + Array of boundaries for which predictions are known. Boundaries must + be sorted in increasing order. :param predictions: - Array of predictions associated to the boundaries at the same index. Results of isotonic - regression and therefore monotone. + Array of predictions associated to the boundaries at the same index. + Results of isotonic regression and therefore monotone. :param isotonic: Indicates whether this is isotonic or antitonic. From 5feecbad219895696709d804facfb8c575d1d5b4 Mon Sep 17 00:00:00 2001 From: vijaykiran Date: Sat, 23 Jan 2016 08:13:15 +0100 Subject: [PATCH 4/6] Fix indentation --- python/pyspark/mllib/regression.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index de97ba515fb7b..d5e3adb5ac9e2 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -41,7 +41,7 @@ class LabeledPoint(object): Label for this data point. :param features: Vector of features for this point (NumPy array, list, - pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix) + pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix). Note: 'label' and 'features' are accessible as class attributes. @@ -422,7 +422,7 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01, Boolean parameter which indicates the use or not of the augmented representation for training data (i.e. whether bias features are activated or not). - (default: False) + (default: False) :param validateData: Boolean parameter which indicates if the algorithm should validate data before training. From 2e535424dae80fad627c6c23965046f8680139f6 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Fri, 26 Feb 2016 16:19:28 -0800 Subject: [PATCH 5/6] [SPARK-12633] Fixed allowed values, cleanup, and sync with Scala API --- python/pyspark/mllib/regression.py | 120 +++++++++++------------------ 1 file changed, 46 insertions(+), 74 deletions(-) diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index ecd50f0735197..3b77a6200054f 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -220,19 +220,8 @@ def _regression_train_wrapper(train_func, modelClass, data, initial_weights): class LinearRegressionWithSGD(object): """ - Train a linear regression model with no regularization using Stochastic Gradient Descent. - This solves the least squares regression formulation - - f(weights) = 1/n ||A weights-y||^2 - - which is the mean squared error. - Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with - its corresponding right hand side label y. - See also the documentation for the precise formulation. - .. versionadded:: 0.9.0 """ - @classmethod @since("0.9.0") def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0, @@ -240,15 +229,15 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0, validateData=True, convergenceTol=0.001): """ Train a linear regression model using Stochastic Gradient - Descent (SGD). - This solves the least squares regression formulation + Descent (SGD). This solves the least squares regression + formulation - f(weights) = 1/(2n) ||A weights - y||^2, + f(weights) = 1/(2n) ||A weights - y||^2 - which is the mean squared error. - Here the data matrix has n rows, and the input RDD holds the - set of rows of A, each with its corresponding right hand side - label y. See also the documentation for the precise formulation. + which is the mean squared error. Here the data matrix has n rows, + and the input RDD holds the set of rows of A, each with its + corresponding right hand side label y. + See also the documentation for the precise formulation. :param data: The training data, an RDD of LabeledPoint. @@ -269,11 +258,11 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0, (default: 0.0) :param regType: The type of regularizer used for training our model. - :Allowed values: - - "l1" for using L1 regularization (lasso), - - "l2" for using L2 regularization (ridge), - - None for no regularization - (default: None) + Supported values: + + - "l1" for using L1 regularization + - "l2" for using L2 regularization + - None for no regularization (default) :param intercept: Boolean parameter which indicates the use or not of the augmented representation for training data (i.e., whether bias @@ -376,34 +365,23 @@ def load(cls, sc, path): class LassoWithSGD(object): """ - Train a regression model with L1-regularization using Stochastic Gradient Descent. - This solves the L1-regularized least squares regression formulation - - f(weights) = 1/2n ||A weights-y||^2 + regParam ||weights||_1 - - Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with - its corresponding right hand side label y. - See also the documentation for the precise formulation. - .. versionadded:: 0.9.0 """ - @classmethod @since("0.9.0") def train(cls, data, iterations=100, step=1.0, regParam=0.01, miniBatchFraction=1.0, initialWeights=None, intercept=False, validateData=True, convergenceTol=0.001): """ - Train a regression model with L1-regularization using - Stochastic Gradient Descent. - This solves the l1-regularized least squares regression - formulation + Train a regression model with L1-regularization using Stochastic + Gradient Descent. This solves the l1-regularized least squares + regression formulation - f(weights) = 1/(2n) ||A weights - y||^2 + regParam ||weights||_1. + f(weights) = 1/(2n) ||A weights - y||^2 + regParam ||weights||_1 - Here the data matrix has n rows, and the input RDD holds the - set of rows of A, each with its corresponding right hand side - label y. See also the documentation for the precise formulation. + Here the data matrix has n rows, and the input RDD holds the set + of rows of A, each with its corresponding right hand side label y. + See also the documentation for the precise formulation. :param data: The training data, an RDD of LabeledPoint. @@ -524,34 +502,23 @@ def load(cls, sc, path): class RidgeRegressionWithSGD(object): """ - Train a regression model with L2-regularization using Stochastic Gradient Descent. - This solves the L2-regularized least squares regression formulation - - f(weights) = 1/2n ||A weights-y||^2 + regParam/2 ||weights||^2 - - Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with - its corresponding right hand side label y. - See also the documentation for the precise formulation. - .. versionadded:: 0.9.0 """ - @classmethod @since("0.9.0") def train(cls, data, iterations=100, step=1.0, regParam=0.01, miniBatchFraction=1.0, initialWeights=None, intercept=False, validateData=True, convergenceTol=0.001): """ - Train a regression model with L2-regularization using - Stochastic Gradient Descent. - This solves the l2-regularized least squares regression - formulation + Train a regression model with L2-regularization using Stochastic + Gradient Descent. This solves the l2-regularized least squares + regression formulation - f(weights) = 1/(2n) ||A weights - y||^2 + regParam/2 ||weights||^2. + f(weights) = 1/(2n) ||A weights - y||^2 + regParam/2 ||weights||^2 - Here the data matrix has n rows, and the input RDD holds the - set of rows of A, each with its corresponding right hand side - label y. See also the documentation for the precise formulation. + Here the data matrix has n rows, and the input RDD holds the set + of rows of A, each with its corresponding right hand side label y. + See also the documentation for the precise formulation. :param data: The training data, an RDD of LabeledPoint. @@ -597,11 +564,11 @@ class IsotonicRegressionModel(Saveable, Loader): Regression model for isotonic regression. :param boundaries: - Array of boundaries for which predictions are known. Boundaries must - be sorted in increasing order. + Array of boundaries for which predictions are known. Boundaries + must be sorted in increasing order. :param predictions: - Array of predictions associated to the boundaries at the same index. - Results of isotonic regression and therefore monotone. + Array of predictions associated to the boundaries at the same + index. Results of isotonic regression and therefore monotone. :param isotonic: Indicates whether this is isotonic or antitonic. @@ -684,8 +651,8 @@ def load(cls, sc, path): class IsotonicRegression(object): """ Isotonic regression. - Currently implemented using parallelized pool adjacent violators algorithm. - Only univariate (single feature) algorithm supported. + Currently implemented using parallelized pool adjacent violators + algorithm. Only univariate (single feature) algorithm supported. Sequential PAV implementation based on: @@ -751,9 +718,11 @@ def _validate(self, dstream): @since("1.5.0") def predictOn(self, dstream): """ - Make predictions on a dstream. + Use the model to make predictions on batches of data from a + DStream. - :return: Transformed dstream object. + :return: + DStream containing predictions. """ self._validate(dstream) return dstream.map(lambda x: self._model.predict(x)) @@ -761,9 +730,11 @@ def predictOn(self, dstream): @since("1.5.0") def predictOnValues(self, dstream): """ - Make predictions on a keyed dstream. + Use the model to make predictions on the values of a DStream and + carry over its keys. - :return: Transformed dstream object. + :return: + DStream containing the input keys and the predictions as values. """ self._validate(dstream) return dstream.mapValues(lambda x: self._model.predict(x)) @@ -772,14 +743,15 @@ def predictOnValues(self, dstream): @inherit_doc class StreamingLinearRegressionWithSGD(StreamingLinearAlgorithm): """ - Train or predict a linear regression model on streaming data. Training uses - Stochastic Gradient Descent to update the model based on each new batch of - incoming data from a DStream (see `LinearRegressionWithSGD` for model equation). + Train or predict a linear regression model on streaming data. + Training uses Stochastic Gradient Descent to update the model + based on each new batch of incoming data from a DStream + (see `LinearRegressionWithSGD` for model equation). Each batch of data is assumed to be an RDD of LabeledPoints. The number of data points per batch can vary, but the number - of features must be constant. An initial weight - vector must be provided. + of features must be constant. An initial weight vector must + be provided. :param stepSize: Step size for each iteration of gradient descent. From 94d532dbb410f2a5b96a563f38e543edea66eb98 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Fri, 26 Feb 2016 16:20:09 -0800 Subject: [PATCH 6/6] Changed 'Allowed values:' -> 'Supported values:' to be consistent --- python/pyspark/mllib/classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index b4d54ef61b0e6..13cf8b6b3899b 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -294,7 +294,7 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0, (default: 0.01) :param regType: The type of regularizer used for training our model. - Allowed values: + Supported values: - "l1" for using L1 regularization - "l2" for using L2 regularization (default) @@ -344,7 +344,7 @@ def train(cls, data, iterations=100, initialWeights=None, regParam=0.01, regType (default: 0.01) :param regType: The type of regularizer used for training our model. - Allowed values: + Supported values: - "l1" for using L1 regularization - "l2" for using L2 regularization (default)