From e73918d5b7aa03b3e86d99a78aaae2b425d791be Mon Sep 17 00:00:00 2001 From: Yuhao Yang Date: Tue, 24 Nov 2015 11:36:26 -0500 Subject: [PATCH 1/4] for discussion about API audit --- mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala | 2 +- .../main/scala/org/apache/spark/ml/feature/SQLTransformer.scala | 2 +- .../org/apache/spark/mllib/clustering/BisectingKMeans.scala | 2 +- .../apache/spark/mllib/clustering/BisectingKMeansModel.scala | 2 +- .../scala/org/apache/spark/mllib/stat/test/StreamingTest.scala | 1 - 5 files changed, 4 insertions(+), 5 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala index aa88cb03d23c5..deb2413677216 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala @@ -105,7 +105,7 @@ object PCA extends DefaultParamsReadable[PCA] { @Experimental class PCAModel private[ml] ( override val uid: String, - val pc: DenseMatrix) + private[feature] val pc: DenseMatrix) extends Model[PCAModel] with PCAParams with MLWritable { import PCAModel._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala index 3a735017ba836..3015b97916723 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala @@ -44,7 +44,7 @@ class SQLTransformer @Since("1.6.0") (override val uid: String) extends Transfor * @group param */ @Since("1.6.0") - final val statement: Param[String] = new Param[String](this, "statement", "SQL statement") + private final val statement: Param[String] = new Param[String](this, "statement", "SQL statement") /** @group setParam */ @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala index 29a7aa0bb63f2..9777c38067436 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala @@ -407,7 +407,7 @@ private object BisectingKMeans extends Serializable { */ @Since("1.6.0") @Experimental -class ClusteringTreeNode private[clustering] ( +private[clustering] class ClusteringTreeNode private[clustering] ( val index: Int, val size: Long, private val centerWithNorm: VectorWithNorm, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala index 5015f1540d920..712a27b87759d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala @@ -33,7 +33,7 @@ import org.apache.spark.rdd.RDD @Since("1.6.0") @Experimental class BisectingKMeansModel @Since("1.6.0") ( - @Since("1.6.0") val root: ClusteringTreeNode + @Since("1.6.0")private[clustering] val root: ClusteringTreeNode ) extends Serializable with Logging { /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala index 75c6a51d09571..d9e0b60e7604e 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala @@ -19,7 +19,6 @@ package org.apache.spark.mllib.stat.test import org.apache.spark.Logging import org.apache.spark.annotation.{Experimental, Since} -import org.apache.spark.rdd.RDD import org.apache.spark.streaming.dstream.DStream import org.apache.spark.util.StatCounter From 3fb1cda11873ea7d4c63686aa470e42981426af5 Mon Sep 17 00:00:00 2001 From: Yuhao Yang Date: Thu, 26 Nov 2015 11:44:54 -0500 Subject: [PATCH 2/4] second pass --- .../main/scala/org/apache/spark/ml/feature/PCA.scala | 2 +- .../org/apache/spark/ml/feature/SQLTransformer.scala | 2 +- .../org/apache/spark/ml/feature/StringIndexer.scala | 12 ++++-------- .../org/apache/spark/ml/feature/package-info.java | 2 +- .../spark/ml/regression/LinearRegression.scala | 2 +- 5 files changed, 8 insertions(+), 12 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala index deb2413677216..aa88cb03d23c5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala @@ -105,7 +105,7 @@ object PCA extends DefaultParamsReadable[PCA] { @Experimental class PCAModel private[ml] ( override val uid: String, - private[feature] val pc: DenseMatrix) + val pc: DenseMatrix) extends Model[PCAModel] with PCAParams with MLWritable { import PCAModel._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala index 3015b97916723..3a735017ba836 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala @@ -44,7 +44,7 @@ class SQLTransformer @Since("1.6.0") (override val uid: String) extends Transfor * @group param */ @Since("1.6.0") - private final val statement: Param[String] = new Param[String](this, "statement", "SQL statement") + final val statement: Param[String] = new Param[String](this, "statement", "SQL statement") /** @group setParam */ @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala index 5c40c35eeaa48..3a05f398b8a7e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala @@ -52,6 +52,10 @@ private[feature] trait StringIndexerBase extends Params with HasInputCol with Ha val outputFields = inputFields :+ attr.toStructField() StructType(outputFields) } + + /** @group setParam */ + def setHandleInvalid(value: String): this.type = set(handleInvalid, value) + setDefault(handleInvalid, "error") } /** @@ -69,10 +73,6 @@ class StringIndexer(override val uid: String) extends Estimator[StringIndexerMod def this() = this(Identifiable.randomUID("strIdx")) - /** @group setParam */ - def setHandleInvalid(value: String): this.type = set(handleInvalid, value) - setDefault(handleInvalid, "error") - /** @group setParam */ def setInputCol(value: String): this.type = set(inputCol, value) @@ -133,10 +133,6 @@ class StringIndexerModel ( map } - /** @group setParam */ - def setHandleInvalid(value: String): this.type = set(handleInvalid, value) - setDefault(handleInvalid, "error") - /** @group setParam */ def setInputCol(value: String): this.type = set(inputCol, value) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java b/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java index c22d2e0cd2d90..e0e43fe9831f6 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java @@ -23,7 +23,7 @@ * features into more suitable forms for model fitting. * Most feature transformers are implemented as {@link org.apache.spark.ml.Transformer}s, which * transforms one {@link org.apache.spark.sql.DataFrame} into another, e.g., - * {@link org.apache.spark.feature.HashingTF}. + * {@link org.apache.spark.mllib.feature.HashingTF}. * Some feature transformers are implemented as {@link org.apache.spark.ml.Estimator}}s, because the * transformation requires some aggregated information of the dataset, e.g., document * frequencies in {@link org.apache.spark.ml.feature.IDF}. diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index 1db91666f21ab..5e5850963edc9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -529,7 +529,7 @@ class LinearRegressionSummary private[regression] ( val predictionCol: String, val labelCol: String, val model: LinearRegressionModel, - val diagInvAtWA: Array[Double]) extends Serializable { + private val diagInvAtWA: Array[Double]) extends Serializable { @transient private val metrics = new RegressionMetrics( predictions From 319e61d37a4987e44b9b3225c68ca1c1de19210e Mon Sep 17 00:00:00 2001 From: Yuhao Yang Date: Wed, 9 Dec 2015 10:43:56 +0800 Subject: [PATCH 3/4] revert stringindexer --- .../org/apache/spark/ml/feature/StringIndexer.scala | 12 ++++++++---- .../org/apache/spark/ml/feature/package-info.java | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala index 3a05f398b8a7e..5c40c35eeaa48 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala @@ -52,10 +52,6 @@ private[feature] trait StringIndexerBase extends Params with HasInputCol with Ha val outputFields = inputFields :+ attr.toStructField() StructType(outputFields) } - - /** @group setParam */ - def setHandleInvalid(value: String): this.type = set(handleInvalid, value) - setDefault(handleInvalid, "error") } /** @@ -73,6 +69,10 @@ class StringIndexer(override val uid: String) extends Estimator[StringIndexerMod def this() = this(Identifiable.randomUID("strIdx")) + /** @group setParam */ + def setHandleInvalid(value: String): this.type = set(handleInvalid, value) + setDefault(handleInvalid, "error") + /** @group setParam */ def setInputCol(value: String): this.type = set(inputCol, value) @@ -133,6 +133,10 @@ class StringIndexerModel ( map } + /** @group setParam */ + def setHandleInvalid(value: String): this.type = set(handleInvalid, value) + setDefault(handleInvalid, "error") + /** @group setParam */ def setInputCol(value: String): this.type = set(inputCol, value) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java b/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java index e0e43fe9831f6..7a35f2d448f9d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java @@ -23,7 +23,7 @@ * features into more suitable forms for model fitting. * Most feature transformers are implemented as {@link org.apache.spark.ml.Transformer}s, which * transforms one {@link org.apache.spark.sql.DataFrame} into another, e.g., - * {@link org.apache.spark.mllib.feature.HashingTF}. + * {@link org.apache.spark.ml.feature.HashingTF}. * Some feature transformers are implemented as {@link org.apache.spark.ml.Estimator}}s, because the * transformation requires some aggregated information of the dataset, e.g., document * frequencies in {@link org.apache.spark.ml.feature.IDF}. From 57c78a5f59ea84e903e7db8a9e2976e74dcb3d51 Mon Sep 17 00:00:00 2001 From: Yuhao Yang Date: Thu, 10 Dec 2015 10:14:41 +0800 Subject: [PATCH 4/4] make bisectingKMeansModel constructor private --- .../apache/spark/mllib/clustering/BisectingKMeansModel.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala index 51ae12ecaece4..9ccf96b9395b7 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala @@ -32,8 +32,8 @@ import org.apache.spark.rdd.RDD */ @Since("1.6.0") @Experimental -class BisectingKMeansModel @Since("1.6.0") ( - @Since("1.6.0")private[clustering] val root: ClusteringTreeNode +class BisectingKMeansModel private[clustering] ( + private[clustering] val root: ClusteringTreeNode ) extends Serializable with Logging { /**