From 8f49cf5cb21301fa30fc38cd2c96bd477227ee9c Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Thu, 3 Sep 2015 11:51:14 -0700 Subject: [PATCH 1/2] Add defaults to the scaladoc for params in ml/ --- .../ml/classification/MultilayerPerceptronClassifier.scala | 2 ++ .../spark/ml/evaluation/BinaryClassificationEvaluator.scala | 1 + .../ml/evaluation/MulticlassClassificationEvaluator.scala | 1 + .../org/apache/spark/ml/evaluation/RegressionEvaluator.scala | 1 + .../src/main/scala/org/apache/spark/ml/feature/Binarizer.scala | 1 + mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala | 1 + .../scala/org/apache/spark/ml/feature/StopWordsRemover.scala | 2 ++ .../main/scala/org/apache/spark/ml/feature/StringIndexer.scala | 1 + .../main/scala/org/apache/spark/ml/feature/VectorIndexer.scala | 1 + .../main/scala/org/apache/spark/ml/feature/VectorSlicer.scala | 2 ++ .../src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala | 3 +++ .../org/apache/spark/ml/regression/IsotonicRegression.scala | 1 + 12 files changed, 17 insertions(+) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala index 1e5b0bc4453e4..82fc80c58054f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala @@ -32,6 +32,7 @@ private[ml] trait MultilayerPerceptronParams extends PredictorParams with HasSeed with HasMaxIter with HasTol { /** * Layer sizes including input size and output size. + * Default: Array(1, 1) * @group param */ final val layers: IntArrayParam = new IntArrayParam(this, "layers", @@ -50,6 +51,7 @@ private[ml] trait MultilayerPerceptronParams extends PredictorParams * Data is stacked within partitions. If block size is more than remaining data in * a partition then it is adjusted to the size of this data. * Recommended size is between 10 and 1000. + * Default: 128 * @group expertParam */ final val blockSize: IntParam = new IntParam(this, "blockSize", diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala index 56419a0a15952..08df2919a8a87 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala @@ -38,6 +38,7 @@ class BinaryClassificationEvaluator(override val uid: String) /** * param for metric name in evaluation + * Default: areaUnderROC * @group param */ val metricName: Param[String] = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala index f73d2345078e6..458ca0de32d6a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala @@ -38,6 +38,7 @@ class MulticlassClassificationEvaluator (override val uid: String) /** * param for metric name in evaluation (supports `"f1"` (default), `"precision"`, `"recall"`, * `"weightedPrecision"`, `"weightedRecall"`) + * Default: f1 * @group param */ val metricName: Param[String] = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala index d21c88ab9b109..f77b7661cfb88 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala @@ -41,6 +41,7 @@ final class RegressionEvaluator(override val uid: String) * Because we will maximize evaluation value (ref: `CrossValidator`), * when we evaluate a metric that is needed to minimize (e.g., `"rmse"`, `"mse"`, `"mae"`), * we take and output the negative of this metric. + * Default: rmse * @group param */ val metricName: Param[String] = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala index 46314854d5e3a..edad754436455 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala @@ -41,6 +41,7 @@ final class Binarizer(override val uid: String) * Param for threshold used to binarize continuous features. * The features greater than the threshold, will be binarized to 1.0. * The features equal to or less than the threshold, will be binarized to 0.0. + * Default: 0.0 * @group param */ val threshold: DoubleParam = diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala index 938447447a0a2..4c36df75d8aa0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala @@ -35,6 +35,7 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol /** * The minimum of documents in which a term should appear. + * Default: 0 * @group param */ final val minDocFreq = new IntParam( diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala index 7da430c7d16df..622c7c02da55f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala @@ -98,6 +98,7 @@ class StopWordsRemover(override val uid: String) /** * the stop words set to be filtered out + * Default: StopWords.English * @group param */ val stopWords: StringArrayParam = new StringArrayParam(this, "stopWords", "stop words") @@ -110,6 +111,7 @@ class StopWordsRemover(override val uid: String) /** * whether to do a case sensitive comparison over the stop words + * Default: false * @group param */ val caseSensitive: BooleanParam = new BooleanParam(this, "caseSensitive", diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala index 24250e4c4cf92..a073faa5dcf30 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala @@ -214,6 +214,7 @@ class IndexToString private[ml] ( * Param for array of labels. * Optional labels to be provided by the user, if not supplied column * metadata is read for labels. + * Default: Empty array, resulting in metadata used for labels. * @group param */ final val labels: StringArrayParam = new StringArrayParam(this, "labels", diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala index 61b925c0fdc07..52e0599e38d83 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala @@ -43,6 +43,7 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu * Must be >= 2. * * (default = 20) + * @group param */ val maxCategories = new IntParam(this, "maxCategories", "Threshold for the number of values a categorical feature can take (>= 2)." + diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala index c5c2272270792..fb3387d4aa9be 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala @@ -49,6 +49,7 @@ final class VectorSlicer(override val uid: String) /** * An array of indices to select features from a vector column. * There can be no overlap with [[names]]. + * Default: Empty array * @group param */ val indices = new IntArrayParam(this, "indices", @@ -67,6 +68,7 @@ final class VectorSlicer(override val uid: String) * An array of feature names to select features from a vector column. * These names must be specified by ML [[org.apache.spark.ml.attribute.Attribute]]s. * There can be no overlap with [[indices]]. + * Default: Empty Array * @group param */ val names = new StringArrayParam(this, "names", diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala index 5af775a4159ad..9edab3af913ca 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala @@ -39,6 +39,7 @@ private[feature] trait Word2VecBase extends Params /** * The dimension of the code that you want to transform from words. + * Default: 100 * @group param */ final val vectorSize = new IntParam( @@ -50,6 +51,7 @@ private[feature] trait Word2VecBase extends Params /** * Number of partitions for sentences of words. + * Default: 1 * @group param */ final val numPartitions = new IntParam( @@ -62,6 +64,7 @@ private[feature] trait Word2VecBase extends Params /** * The minimum number of times a token must appear to be included in the word2vec model's * vocabulary. + * Default: 5 * @group param */ final val minCount = new IntParam(this, "minCount", "the minimum number of times a token must " + diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala index 0f33bae30e622..d43a3447d3975 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala @@ -40,6 +40,7 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures /** * Param for whether the output sequence should be isotonic/increasing (true) or * antitonic/decreasing (false). + * Default: true * @group param */ final val isotonic: BooleanParam = From 16c972e2f6582def635cb4157fefbd0e98c58406 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Fri, 4 Sep 2015 14:50:18 -0700 Subject: [PATCH 2/2] CR feedback --- .../ml/evaluation/MulticlassClassificationEvaluator.scala | 1 - .../org/apache/spark/ml/evaluation/RegressionEvaluator.scala | 1 - .../scala/org/apache/spark/ml/feature/StopWordsRemover.scala | 2 +- .../scala/org/apache/spark/ml/feature/StringIndexer.scala | 5 ++--- 4 files changed, 3 insertions(+), 6 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala index 458ca0de32d6a..f73d2345078e6 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala @@ -38,7 +38,6 @@ class MulticlassClassificationEvaluator (override val uid: String) /** * param for metric name in evaluation (supports `"f1"` (default), `"precision"`, `"recall"`, * `"weightedPrecision"`, `"weightedRecall"`) - * Default: f1 * @group param */ val metricName: Param[String] = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala index f77b7661cfb88..d21c88ab9b109 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala @@ -41,7 +41,6 @@ final class RegressionEvaluator(override val uid: String) * Because we will maximize evaluation value (ref: `CrossValidator`), * when we evaluate a metric that is needed to minimize (e.g., `"rmse"`, `"mse"`, `"mae"`), * we take and output the negative of this metric. - * Default: rmse * @group param */ val metricName: Param[String] = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala index 622c7c02da55f..2a79582625e9a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala @@ -98,7 +98,7 @@ class StopWordsRemover(override val uid: String) /** * the stop words set to be filtered out - * Default: StopWords.English + * Default: [[StopWords.English]] * @group param */ val stopWords: StringArrayParam = new StringArrayParam(this, "stopWords", "stop words") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala index a073faa5dcf30..77aeed0ab0370 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala @@ -212,9 +212,8 @@ class IndexToString private[ml] ( /** * Param for array of labels. - * Optional labels to be provided by the user, if not supplied column - * metadata is read for labels. - * Default: Empty array, resulting in metadata used for labels. + * Optional labels to be provided by the user. + * Default: Empty array, in which case column metadata is used for labels. * @group param */ final val labels: StringArrayParam = new StringArrayParam(this, "labels",