CodingCat
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala‎
Lines changed: 2 additions & 4 deletions b/‎mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala‎
Lines changed: 3 additions & 0 deletions b/‎mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionWithSGD.scala‎
Lines changed: 3 additions & 2 deletions b/‎mllib/src/main/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionWithSGD.scala‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala‎
Lines changed: 10 additions & 3 deletions b/‎mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala‎
Lines changed: 5 additions & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala‎
Lines changed: 1 addition & 1 deletion b/‎mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala‎
Lines changed: 18 additions & 8 deletions b/‎mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala‎
Lines changed: 18 additions & 8 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala‎
Lines changed: 11 additions & 5 deletions b/‎mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala‎
Lines changed: 11 additions & 5 deletions
diff --git a/‎mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala‎
Lines changed: 19 additions & 8 deletions b/‎mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala‎
Lines changed: 19 additions & 8 deletions
@@ -54,11 +54,9 @@ import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
 
 /**
- * :: DeveloperApi ::
- * The Java stubs necessary for the Python mllib bindings.
+ * The Java stubs necessary for the Python mllib bindings. It is called by Py4J on the Python side.
  */
-@DeveloperApi
-class PythonMLLibAPI extends Serializable {
+private[python] class PythonMLLibAPI extends Serializable {
 
 
   /**
 
@@ -62,6 +62,9 @@ class LogisticRegressionModel (
       s" but was given weights of length ${weights.size}")
   }
 
+  /**
+   * Constructs a [[LogisticRegressionModel]] with weights and intercept for binary classification.
+   */
   def this(weights: Vector, intercept: Double) = this(weights, intercept, weights.size, 2)
 
   private var threshold: Option[Double] = Some(0.5)
 
@@ -35,12 +35,13 @@ import org.apache.spark.mllib.regression.StreamingLinearAlgorithm
  * Use a builder pattern to construct a streaming logistic regression
  * analysis in an application, like:
  *
+ * {{{
  *  val model = new StreamingLogisticRegressionWithSGD()
  *    .setStepSize(0.5)
  *    .setNumIterations(10)
  *    .setInitialWeights(Vectors.dense(...))
  *    .trainOn(DStream)
- *
+ * }}}
  */
 @Experimental
 class StreamingLogisticRegressionWithSGD private[mllib] (
@@ -59,7 +60,7 @@ class StreamingLogisticRegressionWithSGD private[mllib] (
    */
   def this() = this(0.1, 50, 1.0, 0.0)
 
-  val algorithm = new LogisticRegressionWithSGD(
+  protected val algorithm = new LogisticRegressionWithSGD(
     stepSize, numIterations, regParam, miniBatchFraction)
 
   /** Set the step size for gradient descent. Default: 0.1. */
 
@@ -19,15 +19,18 @@ package org.apache.spark.mllib.clustering
 
 import scala.collection.mutable.IndexedSeq
 
-import breeze.linalg.{DenseVector => BreezeVector, DenseMatrix => BreezeMatrix, diag, Transpose}
+import breeze.linalg.{DenseMatrix => BreezeMatrix, DenseVector => BreezeVector, Transpose, diag}
 
-import org.apache.spark.mllib.linalg.{Matrices, Vector, Vectors, DenseVector, DenseMatrix, BLAS}
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix, DenseVector, Matrices, Vector, Vectors}
 import org.apache.spark.mllib.stat.distribution.MultivariateGaussian
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.Utils
 
 /**
+ * :: Experimental ::
+ *
  * This class performs expectation maximization for multivariate Gaussian
  * Mixture Models (GMMs).  A GMM represents a composite distribution of
  * independent Gaussian distributions with associated "mixing" weights
@@ -44,13 +47,17 @@ import org.apache.spark.util.Utils
  * is considered to have occurred.
  * @param maxIterations The maximum number of iterations to perform
  */
+@Experimental
 class GaussianMixture private (
     private var k: Int, 
     private var convergenceTol: Double, 
     private var maxIterations: Int,
     private var seed: Long) extends Serializable {
 
-  /** A default instance, 2 Gaussians, 100 iterations, 0.01 log-likelihood threshold */
+  /**
+   * Constructs a default instance. The default parameters are {k: 2, convergenceTol: 0.01,
+   * maxIterations: 100, seed: random}.
+   */
   def this() = this(2, 0.01, 100, Utils.random.nextLong())
 
   // number of samples per cluster to use when initializing Gaussians
 
@@ -19,12 +19,15 @@ package org.apache.spark.mllib.clustering
 
 import breeze.linalg.{DenseVector => BreezeVector}
 
-import org.apache.spark.rdd.RDD
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.stat.distribution.MultivariateGaussian
 import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.rdd.RDD
 
 /**
+ * :: Experimental ::
+ *
  * Multivariate Gaussian Mixture Model (GMM) consisting of k Gaussians, where points 
  * are drawn from each Gaussian i=1..k with probability w(i); mu(i) and sigma(i) are 
  * the respective mean and covariance for each Gaussian distribution i=1..k. 
@@ -35,6 +38,7 @@ import org.apache.spark.mllib.util.MLUtils
  * @param sigma Covariance maxtrix for each Gaussian in the mixture, where sigma(i) is the
  *              covariance matrix for Gaussian i
  */
+@Experimental
 class GaussianMixtureModel(
   val weights: Array[Double], 
   val gaussians: Array[MultivariateGaussian]) extends Serializable {
 
@@ -335,7 +335,7 @@ class DistributedLDAModel private (
 
   /**
    * For each document in the training set, return the distribution over topics for that document
-   * (i.e., "theta_doc").
+   * ("theta_doc").
    *
    * @return  RDD of (document ID, topic distribution) pairs
    */
 
@@ -18,6 +18,7 @@
 package org.apache.spark.mllib.clustering
 
 import org.apache.spark.{Logging, SparkException}
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.graphx._
 import org.apache.spark.graphx.impl.GraphImpl
 import org.apache.spark.mllib.linalg.Vectors
@@ -26,25 +27,33 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.util.random.XORShiftRandom
 
 /**
+ * :: Experimental ::
+ *
  * Model produced by [[PowerIterationClustering]].
  *
  * @param k number of clusters
  * @param assignments an RDD of (vertexID, clusterID) pairs
  */
+@Experimental
 class PowerIterationClusteringModel(
     val k: Int,
     val assignments: RDD[(Long, Int)]) extends Serializable
 
 /**
- * Power Iteration Clustering (PIC), a scalable graph clustering algorithm developed by Lin and
- * Cohen (see http://www.icml2010.org/papers/387.pdf). From the abstract: PIC finds a very
+ * :: Experimental ::
+ *
+ * Power Iteration Clustering (PIC), a scalable graph clustering algorithm developed by
+ * [[http://www.icml2010.org/papers/387.pdf Lin and Cohen]]. From the abstract: PIC finds a very
  * low-dimensional embedding of a dataset using truncated power iteration on a normalized pair-wise
  * similarity matrix of the data.
  *
  * @param k Number of clusters.
  * @param maxIterations Maximum number of iterations of the PIC algorithm.
  * @param initMode Initialization mode.
+ *
+ * @see [[http://en.wikipedia.org/wiki/Spectral_clustering Spectral clustering (Wikipedia)]]
  */
+@Experimental
 class PowerIterationClustering private[clustering] (
     private var k: Int,
     private var maxIterations: Int,
@@ -88,11 +97,12 @@ class PowerIterationClustering private[clustering] (
   /**
    * Run the PIC algorithm.
    *
-   * @param similarities an RDD of (i, j, s_ij_) tuples representing the affinity matrix, which is
-   *                     the matrix A in the PIC paper. The similarity s_ij_ must be nonnegative.
-   *                     This is a symmetric matrix and hence s_ij_ = s_ji_. For any (i, j) with
-   *                     nonzero similarity, there should be either (i, j, s_ij_) or (j, i, s_ji_)
-   *                     in the input. Tuples with i = j are ignored, because we assume s_ij_ = 0.0.
+   * @param similarities an RDD of (i, j, s,,ij,,) tuples representing the affinity matrix, which is
+   *                     the matrix A in the PIC paper. The similarity s,,ij,, must be nonnegative.
+   *                     This is a symmetric matrix and hence s,,ij,, = s,,ji,,. For any (i, j) with
+   *                     nonzero similarity, there should be either (i, j, s,,ij,,) or
+   *                     (j, i, s,,ji,,) in the input. Tuples with i = j are ignored, because we
+   *                     assume s,,ij,, = 0.0.
    *
    * @return a [[PowerIterationClusteringModel]] that contains the clustering result
    */
@@ -109,7 +119,7 @@ class PowerIterationClustering private[clustering] (
    * Runs the PIC algorithm.
    *
    * @param w The normalized affinity matrix, which is the matrix W in the PIC paper with
-   *          w_ij_ = a_ij_ / d_ii_ as its edge properties and the initial vector of the power
+   *          w,,ij,, = a,,ij,, / d,,ii,, as its edge properties and the initial vector of the power
    *          iteration as its vertex properties.
    */
   private def pic(w: Graph[Double, Double]): PowerIterationClusteringModel = {
 
@@ -21,15 +21,16 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.Logging
 import org.apache.spark.SparkContext._
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{Experimental, DeveloperApi}
 import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.streaming.dstream.DStream
 import org.apache.spark.util.Utils
 import org.apache.spark.util.random.XORShiftRandom
 
 /**
- * :: DeveloperApi ::
+ * :: Experimental ::
+ *
  * StreamingKMeansModel extends MLlib's KMeansModel for streaming
  * algorithms, so it can keep track of a continuously updated weight
  * associated with each cluster, and also update the model by
@@ -39,8 +40,10 @@ import org.apache.spark.util.random.XORShiftRandom
  * generalized to incorporate forgetfullness (i.e. decay).
  * The update rule (for each cluster) is:
  *
+ * {{{
  * c_t+1 = [(c_t * n_t * a) + (x_t * m_t)] / [n_t + m_t]
  * n_t+t = n_t * a + m_t
+ * }}}
  *
  * Where c_t is the previously estimated centroid for that cluster,
  * n_t is the number of points assigned to it thus far, x_t is the centroid
@@ -61,7 +64,7 @@ import org.apache.spark.util.random.XORShiftRandom
  * as batches or points.
  *
  */
-@DeveloperApi
+@Experimental
 class StreamingKMeansModel(
     override val clusterCenters: Array[Vector],
     val clusterWeights: Array[Double]) extends KMeansModel(clusterCenters) with Logging {
@@ -140,7 +143,8 @@ class StreamingKMeansModel(
 }
 
 /**
- * :: DeveloperApi ::
+ * :: Experimental ::
+ *
  * StreamingKMeans provides methods for configuring a
  * streaming k-means analysis, training the model on streaming,
  * and using the model to make predictions on streaming data.
@@ -149,13 +153,15 @@ class StreamingKMeansModel(
  * Use a builder pattern to construct a streaming k-means analysis
  * in an application, like:
  *
+ * {{{
  *  val model = new StreamingKMeans()
  *    .setDecayFactor(0.5)
  *    .setK(3)
  *    .setRandomCenters(5, 100.0)
  *    .trainOn(DStream)
+ * }}}
  */
-@DeveloperApi
+@Experimental
 class StreamingKMeans(
     var k: Int,
     var decayFactor: Double,
 
@@ -25,16 +25,20 @@ import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 
 import org.apache.spark.{HashPartitioner, Logging, Partitioner, SparkException}
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.api.java.{JavaPairRDD, JavaRDD}
 import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 
 /**
+ * :: Experimental ::
+ *
  * Model trained by [[FPGrowth]], which holds frequent itemsets.
  * @param freqItemsets frequent itemset, which is an RDD of (itemset, frequency) pairs
  * @tparam Item item type
  */
+@Experimental
 class FPGrowthModel[Item: ClassTag](
     val freqItemsets: RDD[(Array[Item], Long)]) extends Serializable {
 
@@ -45,28 +49,35 @@ class FPGrowthModel[Item: ClassTag](
 }
 
 /**
- * This class implements Parallel FP-growth algorithm to do frequent pattern matching on input data.
- * Parallel FPGrowth (PFP) partitions computation in such a way that each machine executes an
- * independent group of mining tasks. More detail of this algorithm can be found at
- * [[http://dx.doi.org/10.1145/1454008.1454027, PFP]], and the original FP-growth paper can be
- * found at [[http://dx.doi.org/10.1145/335191.335372, FP-growth]]
+ * :: Experimental ::
+ *
+ * A parallel FP-growth algorithm to mine frequent itemsets. The algorithm is described in
+ * [[http://dx.doi.org/10.1145/1454008.1454027 Li et al., PFP: Parallel FP-Growth for Query
+ *  Recommendation]]. PFP distributes computation in such a way that each worker executes an
+ * independent group of mining tasks. The FP-Growth algorithm is described in
+ * [[http://dx.doi.org/10.1145/335191.335372 Han et al., Mining frequent patterns without candidate
+ *  generation]].
  *
  * @param minSupport the minimal support level of the frequent pattern, any pattern appears
  *                   more than (minSupport * size-of-the-dataset) times will be output
  * @param numPartitions number of partitions used by parallel FP-growth
+ *
+ * @see [[http://en.wikipedia.org/wiki/Association_rule_learning Association rule learning
+ *       (Wikipedia)]]
  */
+@Experimental
 class FPGrowth private (
     private var minSupport: Double,
     private var numPartitions: Int) extends Logging with Serializable {
 
   /**
-   * Constructs a FPGrowth instance with default parameters:
-   * {minSupport: 0.3, numPartitions: auto}
+   * Constructs a default instance with default parameters {minSupport: `0.3`, numPartitions: same
+   * as the input data}.
    */
   def this() = this(0.3, -1)
 
   /**
-   * Sets the minimal support level (default: 0.3).
+   * Sets the minimal support level (default: `0.3`).
    */
   def setMinSupport(minSupport: Double): this.type = {
     this.minSupport = minSupport
Original file line number	Diff line number	Diff line change
`@@ -62,6 +62,9 @@ class LogisticRegressionModel (`
`62`	`62`	`s" but was given weights of length ${weights.size}")`
`63`	`63`	`}`
`64`	`64`
	`65`	`+ /**`
	`66`	`+ * Constructs a [[LogisticRegressionModel]] with weights and intercept for binary classification.`
	`67`	`+ */`
`65`	`68`	`def this(weights: Vector, intercept: Double) = this(weights, intercept, weights.size, 2)`
`66`	`69`
`67`	`70`	`private var threshold: Option[Double] = Some(0.5)`
Original file line number	Diff line number	Diff line change
`@@ -335,7 +335,7 @@ class DistributedLDAModel private (`
`335`	`335`
`336`	`336`	`/**`
`337`	`337`	`* For each document in the training set, return the distribution over topics for that document`
`338`		`- * (i.e., "theta_doc").`
	`338`	`+ * ("theta_doc").`
`339`	`339`	`*`
`340`	`340`	`* @return RDD of (document ID, topic distribution) pairs`
`341`	`341`	`*/`