mengxr · mengxr · Aug 19, 2015 · Aug 19, 2015
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
@@ -62,6 +62,7 @@ class GaussianMixture private (
   /**
    * Constructs a default instance. The default parameters are {k: 2, convergenceTol: 0.01,
    * maxIterations: 100, seed: random}.
+   * @since 1.3.0
    */
   def this() = this(2, 0.01, 100, Utils.random.nextLong())
 
@@ -72,9 +73,11 @@ class GaussianMixture private (
   // default random starting point
   private var initialModel: Option[GaussianMixtureModel] = None
 
-  /** Set the initial GMM starting point, bypassing the random initialization.
-   *  You must call setK() prior to calling this method, and the condition
-   *  (model.k == this.k) must be met; failure will result in an IllegalArgumentException
+  /**
+   * Set the initial GMM starting point, bypassing the random initialization.
+   * You must call setK() prior to calling this method, and the condition
+   * (model.k == this.k) must be met; failure will result in an IllegalArgumentException
+   * @since 1.3.0
    */
   def setInitialModel(model: GaussianMixtureModel): this.type = {
     if (model.k == k) {
@@ -85,30 +88,46 @@ class GaussianMixture private (
     this
   }
 
-  /** Return the user supplied initial GMM, if supplied */
+  /**
+   * Return the user supplied initial GMM, if supplied
+   * @since 1.3.0
+   */
   def getInitialModel: Option[GaussianMixtureModel] = initialModel
 
-  /** Set the number of Gaussians in the mixture model.  Default: 2 */
+  /**
+   * Set the number of Gaussians in the mixture model.  Default: 2
+   * @since 1.3.0
+   */
   def setK(k: Int): this.type = {
     this.k = k
     this
   }
 
-  /** Return the number of Gaussians in the mixture model */
+  /**
+   * Return the number of Gaussians in the mixture model
+   * @since 1.3.0
+   */
   def getK: Int = k
 
-  /** Set the maximum number of iterations to run. Default: 100 */
+  /**
+   * Set the maximum number of iterations to run. Default: 100
+   * @since 1.3.0
+   */
   def setMaxIterations(maxIterations: Int): this.type = {
     this.maxIterations = maxIterations
     this
   }
 
-  /** Return the maximum number of iterations to run */
+  /**
+   * Return the maximum number of iterations to run
+   * @since 1.3.0
+   */
   def getMaxIterations: Int = maxIterations
 
   /**
    * Set the largest change in log-likelihood at which convergence is
    * considered to have occurred.
+   * @since 1.3.0
    */
   def setConvergenceTol(convergenceTol: Double): this.type = {
     this.convergenceTol = convergenceTol
@@ -118,19 +137,29 @@ class GaussianMixture private (
   /**
    * Return the largest change in log-likelihood at which convergence is
    * considered to have occurred.
+   * @since 1.3.0
    */
   def getConvergenceTol: Double = convergenceTol
 
-  /** Set the random seed */
+  /**
+   * Set the random seed
+   * @since 1.3.0
+   */
   def setSeed(seed: Long): this.type = {
     this.seed = seed
     this
   }
 
-  /** Return the random seed */
+  /**
+   * Return the random seed
+   * @since 1.3.0
+   */
   def getSeed: Long = seed
 
-  /** Perform expectation maximization */
+  /**
+   * Perform expectation maximization
+   * @since 1.3.0
+   */
   def run(data: RDD[Vector]): GaussianMixtureModel = {
     val sc = data.sparkContext
 
@@ -204,7 +233,10 @@ class GaussianMixture private (
     new GaussianMixtureModel(weights, gaussians)
   }
 
-  /** Java-friendly version of [[run()]] */
+  /**
+   * Java-friendly version of [[run()]]
+   * @since 1.3.0
+   */
   def run(data: JavaRDD[Vector]): GaussianMixtureModel = run(data.rdd)
 
   private def updateWeightsAndGaussians(

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
@@ -168,9 +168,6 @@ object GaussianMixtureModel extends Loader[GaussianMixtureModel] {
       sc.parallelize(dataArray, 1).toDF().write.parquet(Loader.dataPath(path))
     }
 
-    /**
-     * @since 1.4.0
-     */
     def load(sc: SparkContext, path: String): GaussianMixtureModel = {
       val dataPath = Loader.dataPath(path)
       val sqlContext = new SQLContext(sc)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -49,40 +49,51 @@ class KMeans private (
   /**
    * Constructs a KMeans instance with default parameters: {k: 2, maxIterations: 20, runs: 1,
    * initializationMode: "k-means||", initializationSteps: 5, epsilon: 1e-4, seed: random}.
+   * @since 0.8.0
    */
   def this() = this(2, 20, 1, KMeans.K_MEANS_PARALLEL, 5, 1e-4, Utils.random.nextLong())
 
   /**
    * Number of clusters to create (k).
+   * @since 1.4.0
    */
   def getK: Int = k
 
-  /** Set the number of clusters to create (k). Default: 2. */
+  /**
+   * Set the number of clusters to create (k). Default: 2.
+   * @since 0.8.0
+   */
   def setK(k: Int): this.type = {
     this.k = k
     this
   }
 
   /**
    * Maximum number of iterations to run.
+   * @since 1.4.0
    */
   def getMaxIterations: Int = maxIterations
 
-  /** Set maximum number of iterations to run. Default: 20. */
+  /**
+   * Set maximum number of iterations to run. Default: 20.
+   * @since 0.8.0
+   */
   def setMaxIterations(maxIterations: Int): this.type = {
     this.maxIterations = maxIterations
     this
   }
 
   /**
    * The initialization algorithm. This can be either "random" or "k-means||".
+   * @since 1.4.0
    */
   def getInitializationMode: String = initializationMode
 
   /**
    * Set the initialization algorithm. This can be either "random" to choose random points as
    * initial cluster centers, or "k-means||" to use a parallel variant of k-means++
    * (Bahmani et al., Scalable K-Means++, VLDB 2012). Default: k-means||.
+   * @since 0.8.0
    */
   def setInitializationMode(initializationMode: String): this.type = {
     KMeans.validateInitMode(initializationMode)
@@ -93,6 +104,7 @@ class KMeans private (
   /**
    * :: Experimental ::
    * Number of runs of the algorithm to execute in parallel.
+   * @since 1.4.0
    */
   @Experimental
   def getRuns: Int = runs
@@ -102,6 +114,7 @@ class KMeans private (
    * Set the number of runs of the algorithm to execute in parallel. We initialize the algorithm
    * this many times with random starting conditions (configured by the initialization mode), then
    * return the best clustering found over any run. Default: 1.
+   * @since 0.8.0
    */
   @Experimental
   def setRuns(runs: Int): this.type = {
@@ -114,12 +127,14 @@ class KMeans private (
 
   /**
    * Number of steps for the k-means|| initialization mode
+   * @since 1.4.0
    */
   def getInitializationSteps: Int = initializationSteps
 
   /**
    * Set the number of steps for the k-means|| initialization mode. This is an advanced
    * setting -- the default of 5 is almost always enough. Default: 5.
+   * @since 0.8.0
    */
   def setInitializationSteps(initializationSteps: Int): this.type = {
     if (initializationSteps <= 0) {
@@ -131,12 +146,14 @@ class KMeans private (
 
   /**
    * The distance threshold within which we've consider centers to have converged.
+   * @since 1.4.0
    */
   def getEpsilon: Double = epsilon
 
   /**
    * Set the distance threshold within which we've consider centers to have converged.
    * If all centers move less than this Euclidean distance, we stop iterating one run.
+   * @since 0.8.0
    */
   def setEpsilon(epsilon: Double): this.type = {
     this.epsilon = epsilon
@@ -145,10 +162,14 @@ class KMeans private (
 
   /**
    * The random seed for cluster initialization.
+   * @since 1.4.0
    */
   def getSeed: Long = seed
 
-  /** Set the random seed for cluster initialization. */
+  /**
+   * Set the random seed for cluster initialization.
+   * @since 1.4.0
+   */
   def setSeed(seed: Long): this.type = {
     this.seed = seed
     this
@@ -162,6 +183,7 @@ class KMeans private (
    * Set the initial starting point, bypassing the random initialization or k-means||
    * The condition model.k == this.k must be met, failure results
    * in an IllegalArgumentException.
+   * @since 1.4.0
    */
   def setInitialModel(model: KMeansModel): this.type = {
     require(model.k == k, "mismatched cluster count")
@@ -172,6 +194,7 @@ class KMeans private (
   /**
    * Train a K-means model on the given set of points; `data` should be cached for high
    * performance, because this is an iterative algorithm.
+   * @since 0.8.0
    */
   def run(data: RDD[Vector]): KMeansModel = {
 

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -104,6 +104,10 @@ class KMeansModel (
  * @since 1.4.0
  */
 object KMeansModel extends Loader[KMeansModel] {
+
+  /**
+   * @since 1.4.0
+   */
   override def load(sc: SparkContext, path: String): KMeansModel = {
     KMeansModel.SaveLoadV1_0.load(sc, path)
   }

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
@@ -55,6 +55,10 @@ class LDA private (
     private var checkpointInterval: Int,
     private var ldaOptimizer: LDAOptimizer) extends Logging {
 
+  /**
+   * Constructs a LDA instance with default parameters.
+   * @since 1.3.0
+   */
   def this() = this(k = 10, maxIterations = 20, docConcentration = Vectors.dense(-1),
     topicConcentration = -1, seed = Utils.random.nextLong(), checkpointInterval = 10,
     ldaOptimizer = new EMLDAOptimizer)