Skip to content
20 changes: 13 additions & 7 deletions python/pyspark/ml/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from pyspark import since
from pyspark.ml.util import *
from pyspark.ml.wrapper import JavaEstimator, JavaModel
from pyspark.ml.param import TypeConverters
from pyspark.ml.param.shared import *
from pyspark.ml.regression import (
RandomForestParams, TreeEnsembleParams, DecisionTreeModel, TreeEnsembleModels)
Expand Down Expand Up @@ -87,7 +88,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti

threshold = Param(Params._dummy(), "threshold",
"Threshold in binary classification prediction, in range [0, 1]." +
" If threshold and thresholds are both set, they must match.")
" If threshold and thresholds are both set, they must match.",
typeConverter=TypeConverters.toFloat)

@keyword_only
def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
Expand Down Expand Up @@ -243,7 +245,7 @@ class TreeClassifierParams(object):
impurity = Param(Params._dummy(), "impurity",
"Criterion used for information gain calculation (case-insensitive). " +
"Supported options: " +
", ".join(supportedImpurities))
", ".join(supportedImpurities), typeConverter=TypeConverters.toString)

def __init__(self):
super(TreeClassifierParams, self).__init__()
Expand Down Expand Up @@ -534,7 +536,8 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol

lossType = Param(Params._dummy(), "lossType",
"Loss function which GBT tries to minimize (case-insensitive). " +
"Supported options: " + ", ".join(GBTParams.supportedLossTypes))
"Supported options: " + ", ".join(GBTParams.supportedLossTypes),
typeConverter=TypeConverters.toString)

@keyword_only
def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
Expand Down Expand Up @@ -652,9 +655,10 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H
"""

smoothing = Param(Params._dummy(), "smoothing", "The smoothing parameter, should be >= 0, " +
"default is 1.0")
"default is 1.0", typeConverter=TypeConverters.toFloat)
modelType = Param(Params._dummy(), "modelType", "The model type which is a string " +
"(case-sensitive). Supported options: multinomial (default) and bernoulli.")
"(case-sensitive). Supported options: multinomial (default) and bernoulli.",
typeConverter=TypeConverters.toString)

@keyword_only
def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
Expand Down Expand Up @@ -782,11 +786,13 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,

layers = Param(Params._dummy(), "layers", "Sizes of layers from input layer to output layer " +
"E.g., Array(780, 100, 10) means 780 inputs, one hidden layer with 100 " +
"neurons and output layer of 10 neurons, default is [1, 1].")
"neurons and output layer of 10 neurons, default is [1, 1].",
typeConverter=TypeConverters.toListInt)
blockSize = Param(Params._dummy(), "blockSize", "Block size for stacking input data in " +
"matrices. Data is stacked within partitions. If block size is more than " +
"remaining data in a partition then it is adjusted to the size of this " +
"data. Recommended size is between 10 and 1000, default is 128.")
"data. Recommended size is between 10 and 1000, default is 128.",
typeConverter=TypeConverters.toInt)

@keyword_only
def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
Expand Down
14 changes: 9 additions & 5 deletions python/pyspark/ml/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,14 @@ class KMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol
.. versionadded:: 1.5.0
"""

k = Param(Params._dummy(), "k", "number of clusters to create")
k = Param(Params._dummy(), "k", "number of clusters to create",
typeConverter=TypeConverters.toInt)
initMode = Param(Params._dummy(), "initMode",
"the initialization algorithm. This can be either \"random\" to " +
"choose random points as initial cluster centers, or \"k-means||\" " +
"to use a parallel variant of k-means++")
initSteps = Param(Params._dummy(), "initSteps", "steps for k-means initialization mode")
"to use a parallel variant of k-means++", TypeConverters.toString)
initSteps = Param(Params._dummy(), "initSteps", "steps for k-means initialization mode",
typeConverter=TypeConverters.toInt)

@keyword_only
def __init__(self, featuresCol="features", predictionCol="prediction", k=2,
Expand Down Expand Up @@ -227,10 +229,12 @@ class BisectingKMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIte
.. versionadded:: 2.0.0
"""

k = Param(Params._dummy(), "k", "number of clusters to create")
k = Param(Params._dummy(), "k", "number of clusters to create",
typeConverter=TypeConverters.toInt)
minDivisibleClusterSize = Param(Params._dummy(), "minDivisibleClusterSize",
"the minimum number of points (if >= 1.0) " +
"or the minimum proportion")
"or the minimum proportion",
typeConverter=TypeConverters.toFloat)

@keyword_only
def __init__(self, featuresCol="features", predictionCol="prediction", maxIter=20,
Expand Down
Loading