From 2b133d63ae0be8746050929627683cb238bc200b Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Tue, 1 Apr 2014 22:42:49 -0700
Subject: [PATCH 1/9] intial annotation of developer and experimental apis

---
 .../scala/org/apache/spark/mllib/tree/impurity/Impurity.scala | 4 ++++
 1 file changed, 4 insertions(+)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
index a4069063af2ad..43f296ac56bc8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
@@ -23,6 +23,8 @@ package org.apache.spark.mllib.tree.impurity
 trait Impurity extends Serializable {
 
   /**
+   * <span class="badge badge-red" style="float: right;">DEVELOPER API - UNSTABLE</span>
+   *
    * information calculation for binary classification
    * @param c0 count of instances with label 0
    * @param c1 count of instances with label 1
@@ -31,6 +33,8 @@ trait Impurity extends Serializable {
   def calculate(c0 : Double, c1 : Double): Double
 
   /**
+   * <span class="badge badge-red" style="float: right;">DEVELOPER API - UNSTABLE</span>
+   *
    * information calculation for regression
    * @param count number of instances
    * @param sum sum of labels

From 86b9e342e55c8fc8cf24ef8a1137c1e683c218b1 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Mon, 7 Apr 2014 00:17:05 -0700
Subject: [PATCH 2/9] one pass over APIs of GLMs, NaiveBayes, and ALS

---
 .../mllib/api/python/PythonMLLibAPI.scala     |  3 ++
 .../classification/LogisticRegression.scala   | 20 ++++++-------
 .../mllib/classification/NaiveBayes.scala     | 19 +++++++-----
 .../spark/mllib/classification/SVM.scala      | 23 ++++++++-------
 .../spark/mllib/clustering/KMeans.scala       | 16 +++++-----
 .../apache/spark/mllib/linalg/Vectors.scala   | 19 +++++++-----
 .../spark/mllib/optimization/Gradient.scala   |  8 +++++
 .../mllib/optimization/GradientDescent.scala  | 12 ++++++--
 .../spark/mllib/optimization/Optimizer.scala  |  5 ++++
 .../spark/mllib/optimization/Updater.scala    |  8 +++++
 .../spark/mllib/recommendation/ALS.scala      | 29 +++++++++++--------
 .../MatrixFactorizationModel.scala            |  2 ++
 .../GeneralizedLinearAlgorithm.scala          |  5 +++-
 .../apache/spark/mllib/regression/Lasso.scala | 17 ++++++-----
 .../mllib/regression/LinearRegression.scala   | 15 +++++-----
 .../mllib/regression/RidgeRegression.scala    | 23 ++++++++-------
 .../spark/mllib/util/DataValidators.scala     |  6 ++--
 .../mllib/util/KMeansDataGenerator.scala      |  2 ++
 .../mllib/util/LinearDataGenerator.scala      |  2 ++
 .../LogisticRegressionDataGenerator.scala     |  3 +-
 .../spark/mllib/util/MFDataGenerator.scala    |  6 ++--
 .../org/apache/spark/mllib/util/MLUtils.scala |  6 +++-
 .../spark/mllib/util/SVMDataGenerator.scala   |  2 ++
 23 files changed, 157 insertions(+), 94 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 2df5b0d02b699..e866687093b21 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -28,7 +28,10 @@ import org.apache.spark.mllib.regression._
 import org.apache.spark.rdd.RDD
 
 /**
+ * <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
+ *
  * The Java stubs necessary for the Python mllib bindings.
+ * Users should not call the methods defined in this class directly.
  */
 class PythonMLLibAPI extends Serializable {
   private def deserializeDoubleVector(bytes: Array[Byte]): Array[Double] = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index 798f3a5c94740..44f5b525b8814 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -55,7 +55,7 @@ class LogisticRegressionModel(
     this
   }
 
-  override def predictPoint(dataMatrix: Vector, weightMatrix: Vector,
+  override protected def predictPoint(dataMatrix: Vector, weightMatrix: Vector,
       intercept: Double) = {
     val margin = weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
     val score = 1.0/ (1.0 + math.exp(-margin))
@@ -70,28 +70,28 @@ class LogisticRegressionModel(
  * Train a classification model for Logistic Regression using Stochastic Gradient Descent.
  * NOTE: Labels used in Logistic Regression should be {0, 1}
  */
-class LogisticRegressionWithSGD private (
-    var stepSize: Double,
-    var numIterations: Int,
-    var regParam: Double,
-    var miniBatchFraction: Double)
+class LogisticRegressionWithSGD (
+    private var stepSize: Double,
+    private var numIterations: Int,
+    private var regParam: Double,
+    private var miniBatchFraction: Double)
   extends GeneralizedLinearAlgorithm[LogisticRegressionModel] with Serializable {
 
-  val gradient = new LogisticGradient()
-  val updater = new SimpleUpdater()
+  private val gradient = new LogisticGradient()
+  private val updater = new SimpleUpdater()
   override val optimizer = new GradientDescent(gradient, updater)
     .setStepSize(stepSize)
     .setNumIterations(numIterations)
     .setRegParam(regParam)
     .setMiniBatchFraction(miniBatchFraction)
-  override val validators = List(DataValidators.classificationLabels)
+  override protected val validators = List(DataValidators.binaryLabelValidator)
 
   /**
    * Construct a LogisticRegression object with default parameters
    */
   def this() = this(1.0, 100, 0.0, 1.0)
 
-  def createModel(weights: Vector, intercept: Double) = {
+  override protected def createModel(weights: Vector, intercept: Double) = {
     new LogisticRegressionModel(weights, intercept)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index e956185319a69..37eaaa28a9619 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -40,14 +40,17 @@ class NaiveBayesModel(
   private val brzPi = new BDV[Double](pi)
   private val brzTheta = new BDM[Double](theta.length, theta(0).length)
 
-  var i = 0
-  while (i < theta.length) {
-    var j = 0
-    while (j < theta(i).length) {
-      brzTheta(i, j) = theta(i)(j)
-      j += 1
+  {
+    // Need to put an extra pair of braces to prevent Scala treat `i` as a member.
+    var i = 0
+    while (i < theta.length) {
+      var j = 0
+      while (j < theta(i).length) {
+        brzTheta(i, j) = theta(i)(j)
+        j += 1
+      }
+      i += 1
     }
-    i += 1
   }
 
   override def predict(testData: RDD[Vector]): RDD[Double] = testData.map(predict)
@@ -65,7 +68,7 @@ class NaiveBayesModel(
  * document classification.  By making every vector a 0-1 vector, it can also be used as
  * Bernoulli NB ([[http://tinyurl.com/p7c96j6]]).
  */
-class NaiveBayes private (var lambda: Double) extends Serializable with Logging {
+class NaiveBayes (private var lambda: Double) extends Serializable with Logging {
 
   def this() = this(1.0)
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
index e31a08899f8bc..2bf4f9c2d7cfc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -55,7 +55,9 @@ class SVMModel(
     this
   }
 
-  override def predictPoint(dataMatrix: Vector, weightMatrix: Vector,
+  override protected def predictPoint(
+      dataMatrix: Vector,
+      weightMatrix: Vector,
       intercept: Double) = {
     val margin = weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
     threshold match {
@@ -69,29 +71,28 @@ class SVMModel(
  * Train a Support Vector Machine (SVM) using Stochastic Gradient Descent.
  * NOTE: Labels used in SVM should be {0, 1}.
  */
-class SVMWithSGD private (
-    var stepSize: Double,
-    var numIterations: Int,
-    var regParam: Double,
-    var miniBatchFraction: Double)
+class SVMWithSGD(
+    private var stepSize: Double,
+    private var numIterations: Int,
+    private var regParam: Double,
+    private var miniBatchFraction: Double)
   extends GeneralizedLinearAlgorithm[SVMModel] with Serializable {
 
-  val gradient = new HingeGradient()
-  val updater = new SquaredL2Updater()
+  private val gradient = new HingeGradient()
+  private val updater = new SquaredL2Updater()
   override val optimizer = new GradientDescent(gradient, updater)
     .setStepSize(stepSize)
     .setNumIterations(numIterations)
     .setRegParam(regParam)
     .setMiniBatchFraction(miniBatchFraction)
-
-  override val validators = List(DataValidators.classificationLabels)
+  override protected val validators = List(DataValidators.binaryLabelValidator)
 
   /**
    * Construct a SVM object with default parameters
    */
   def this() = this(1.0, 100, 1.0, 1.0)
 
-  def createModel(weights: Vector, intercept: Double) = {
+  override protected def createModel(weights: Vector, intercept: Double) = {
     new SVMModel(weights, intercept)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index a78503df3134d..b1df33ebb2cc9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -36,13 +36,13 @@ import org.apache.spark.util.random.XORShiftRandom
  * This is an iterative algorithm that will make multiple passes over the data, so any RDDs given
  * to it should be cached by the user.
  */
-class KMeans private (
-    var k: Int,
-    var maxIterations: Int,
-    var runs: Int,
-    var initializationMode: String,
-    var initializationSteps: Int,
-    var epsilon: Double) extends Serializable with Logging {
+class KMeans(
+    private var k: Int,
+    private var maxIterations: Int,
+    private var runs: Int,
+    private var initializationMode: String,
+    private var initializationSteps: Int,
+    private var epsilon: Double) extends Serializable with Logging {
   def this() = this(2, 20, 1, KMeans.K_MEANS_PARALLEL, 5, 1e-4)
 
   /** Set the number of clusters to create (k). Default: 2. */
@@ -71,6 +71,8 @@ class KMeans private (
   }
 
   /**
+   * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+   *
    * Set the number of runs of the algorithm to execute in parallel. We initialize the algorithm
    * this many times with random starting conditions (configured by the initialization mode), then
    * return the best clustering found over any run. Default: 1.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index 2cea58cd3fd22..99a849f1c66b1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -64,11 +64,13 @@ trait Vector extends Serializable {
 
 /**
  * Factory methods for [[org.apache.spark.mllib.linalg.Vector]].
+ * We don't use the name `Vector` because Scala imports
+ * [[scala.collection.immutable.Vector]] by default.
  */
 object Vectors {
 
   /**
-   * Creates a dense vector.
+   * Creates a dense vector from its values.
    */
   @varargs
   def dense(firstValue: Double, otherValues: Double*): Vector =
@@ -158,20 +160,21 @@ class DenseVector(val values: Array[Double]) extends Vector {
 /**
  * A sparse vector represented by an index array and an value array.
  *
- * @param n size of the vector.
+ * @param size size of the vector.
  * @param indices index array, assume to be strictly increasing.
  * @param values value array, must have the same length as the index array.
  */
-class SparseVector(val n: Int, val indices: Array[Int], val values: Array[Double]) extends Vector {
-
-  override def size: Int = n
+class SparseVector(
+    override val size: Int,
+    val indices: Array[Int],
+    val values: Array[Double]) extends Vector {
 
   override def toString: String = {
-    "(" + n + "," + indices.zip(values).mkString("[", "," ,"]") + ")"
+    "(" + size + "," + indices.zip(values).mkString("[", "," ,"]") + ")"
   }
 
   override def toArray: Array[Double] = {
-    val data = new Array[Double](n)
+    val data = new Array[Double](size)
     var i = 0
     val nnz = indices.length
     while (i < nnz) {
@@ -181,5 +184,5 @@ class SparseVector(val n: Int, val indices: Array[Int], val values: Array[Double
     data
   }
 
-  private[mllib] override def toBreeze: BV[Double] = new BSV[Double](indices, values, n)
+  private[mllib] override def toBreeze: BV[Double] = new BSV[Double](indices, values, size)
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
index 20654284965ed..8a6d20f6f6ae8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
@@ -22,6 +22,8 @@ import breeze.linalg.{axpy => brzAxpy}
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
 
 /**
+ * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ *
  * Class used to compute the gradient for a loss function, given a single data point.
  */
 abstract class Gradient extends Serializable {
@@ -51,6 +53,8 @@ abstract class Gradient extends Serializable {
 }
 
 /**
+ * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ *
  * Compute gradient and loss for a logistic loss function, as used in binary classification.
  * See also the documentation for the precise formulation.
  */
@@ -92,6 +96,8 @@ class LogisticGradient extends Gradient {
 }
 
 /**
+ * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ *
  * Compute gradient and loss for a Least-squared loss function, as used in linear regression.
  * This is correct for the averaged least squares loss function (mean squared error)
  *              L = 1/n ||A weights-y||^2
@@ -124,6 +130,8 @@ class LeastSquaresGradient extends Gradient {
 }
 
 /**
+ * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ *
  * Compute gradient and loss for a Hinge loss function, as used in SVM binary classification.
  * See also the documentation for the precise formulation.
  * NOTE: This assumes that the labels are {0,1}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
index d0777ffd63ff8..60cad435be5c9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
@@ -19,18 +19,20 @@ package org.apache.spark.mllib.optimization
 
 import scala.collection.mutable.ArrayBuffer
 
-import breeze.linalg.{Vector => BV, DenseVector => BDV}
+import breeze.linalg.{DenseVector => BDV}
 
 import org.apache.spark.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
 
 /**
+ * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ *
  * Class used to solve an optimization problem using Gradient Descent.
  * @param gradient Gradient function to be used.
  * @param updater Updater to be used to update weights after every iteration.
  */
-class GradientDescent(var gradient: Gradient, var updater: Updater)
+class GradientDescent(private var gradient: Gradient, private var updater: Updater)
   extends Optimizer with Logging
 {
   private var stepSize: Double = 1.0
@@ -107,7 +109,11 @@ class GradientDescent(var gradient: Gradient, var updater: Updater)
 
 }
 
-// Top-level method to run gradient descent.
+/**
+ * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ *
+ * Top-level method to run gradient descent.
+ */
 object GradientDescent extends Logging {
   /**
    * Run stochastic gradient descent (SGD) in parallel using mini batches.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala
index f9ce908a5f3b0..a655a8bb7a4ed 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala
@@ -21,6 +21,11 @@ import org.apache.spark.rdd.RDD
 
 import org.apache.spark.mllib.linalg.Vector
 
+/**
+ * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ *
+ * Trait for optimization problem solvers.
+ */
 trait Optimizer extends Serializable {
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
index 3b7754cd7ac28..a241bfd4e5858 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
@@ -24,6 +24,8 @@ import breeze.linalg.{norm => brzNorm, axpy => brzAxpy, Vector => BV}
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
 
 /**
+ * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ *
  * Class used to perform steps (weight update) using Gradient Descent methods.
  *
  * For general minimization problems, or for regularized problems of the form
@@ -59,6 +61,8 @@ abstract class Updater extends Serializable {
 }
 
 /**
+ * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ *
  * A simple updater for gradient descent *without* any regularization.
  * Uses a step-size decreasing with the square root of the number of iterations.
  */
@@ -78,6 +82,8 @@ class SimpleUpdater extends Updater {
 }
 
 /**
+ * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ *
  * Updater for L1 regularized problems.
  *          R(w) = ||w||_1
  * Uses a step-size decreasing with the square root of the number of iterations.
@@ -120,6 +126,8 @@ class L1Updater extends Updater {
 }
 
 /**
+ * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ *
  * Updater for L2 regularized problems.
  *          R(w) = 1/2 ||w||^2
  * Uses a step-size decreasing with the square root of the number of iterations.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index 0cc9f48769f83..411d8d7b72d8b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -22,6 +22,9 @@ import scala.math.{abs, sqrt}
 import scala.util.Random
 import scala.util.Sorting
 
+import com.esotericsoftware.kryo.Kryo
+import org.jblas.{DoubleMatrix, SimpleBlas, Solve}
+
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.{Logging, HashPartitioner, Partitioner, SparkContext, SparkConf}
 import org.apache.spark.storage.StorageLevel
@@ -29,10 +32,6 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.serializer.KryoRegistrator
 import org.apache.spark.SparkContext._
 
-import com.esotericsoftware.kryo.Kryo
-import org.jblas.{DoubleMatrix, SimpleBlas, Solve}
-
-
 /**
  * Out-link information for a user or product block. This includes the original user/product IDs
  * of the elements within this block, and the list of destination blocks that each user or
@@ -89,14 +88,14 @@ case class Rating(val user: Int, val product: Int, val rating: Double)
  * indicated user
  * preferences rather than explicit ratings given to items.
  */
-class ALS private (
-    var numBlocks: Int,
-    var rank: Int,
-    var iterations: Int,
-    var lambda: Double,
-    var implicitPrefs: Boolean,
-    var alpha: Double,
-    var seed: Long = System.nanoTime()
+class ALS(
+    private var numBlocks: Int,
+    private var rank: Int,
+    private var iterations: Int,
+    private var lambda: Double,
+    private var implicitPrefs: Boolean,
+    private var alpha: Double,
+    private var seed: Long = System.nanoTime()
   ) extends Serializable with Logging {
   def this() = this(-1, 10, 10, 0.01, false, 1.0)
 
@@ -127,11 +126,17 @@ class ALS private (
     this
   }
 
+  /** Sets whether to use implicit preference. Default: false. */
   def setImplicitPrefs(implicitPrefs: Boolean): ALS = {
     this.implicitPrefs = implicitPrefs
     this
   }
 
+  /**
+   * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+   *
+   * Sets the constant used in computing confidence in implicit ALS. Default: 1.0.
+   */
   def setAlpha(alpha: Double): ALS = {
     this.alpha = alpha
     this
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index 443fc5de5bf04..80590c7974949 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -68,6 +68,8 @@ class MatrixFactorizationModel(
   }
 
   /**
+   * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+   *
    * Predict the rating of many users for many products.
    * This is a Java stub for python predictAll()
    *
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
index 80dc0f12ff84f..1d5b2f036155d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
@@ -79,7 +79,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
 
   protected val validators: Seq[RDD[LabeledPoint] => Boolean] = List()
 
-  val optimizer: Optimizer
+  /** The optimizer to solve the problem. */
+  def optimizer: Optimizer
 
   /** Whether to add intercept (default: true). */
   protected var addIntercept: Boolean = true
@@ -100,6 +101,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
   }
 
   /**
+   * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+   *
    * Set if the algorithm should validate data before training. Default true.
    */
   def setValidateData(validateData: Boolean): this.type = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
index 25920d0dc976e..7595580a4b6eb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -51,16 +51,17 @@ class LassoModel(
  * its corresponding right hand side label y.
  * See also the documentation for the precise formulation.
  */
-class LassoWithSGD private (
-    var stepSize: Double,
-    var numIterations: Int,
-    var regParam: Double,
-    var miniBatchFraction: Double)
+class LassoWithSGD(
+    private var stepSize: Double,
+    private var numIterations: Int,
+    private var regParam: Double,
+    private var miniBatchFraction: Double)
   extends GeneralizedLinearAlgorithm[LassoModel] with Serializable {
 
-  val gradient = new LeastSquaresGradient()
-  val updater = new L1Updater()
-  @transient val optimizer = new GradientDescent(gradient, updater).setStepSize(stepSize)
+  private val gradient = new LeastSquaresGradient()
+  private val updater = new L1Updater()
+  override val optimizer = new GradientDescent(gradient, updater)
+    .setStepSize(stepSize)
     .setNumIterations(numIterations)
     .setRegParam(regParam)
     .setMiniBatchFraction(miniBatchFraction)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index 9ed927994e795..ef4379d4b83df 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -51,15 +51,16 @@ class LinearRegressionModel(
  * its corresponding right hand side label y.
  * See also the documentation for the precise formulation.
  */
-class LinearRegressionWithSGD private (
-    var stepSize: Double,
-    var numIterations: Int,
-    var miniBatchFraction: Double)
+class LinearRegressionWithSGD(
+    private var stepSize: Double,
+    private var numIterations: Int,
+    private var miniBatchFraction: Double)
   extends GeneralizedLinearAlgorithm[LinearRegressionModel] with Serializable {
 
-  val gradient = new LeastSquaresGradient()
-  val updater = new SimpleUpdater()
-  val optimizer = new GradientDescent(gradient, updater).setStepSize(stepSize)
+  private val gradient = new LeastSquaresGradient()
+  private val updater = new SimpleUpdater()
+  override val optimizer = new GradientDescent(gradient, updater)
+    .setStepSize(stepSize)
     .setNumIterations(numIterations)
     .setMiniBatchFraction(miniBatchFraction)
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index 1f17d2107f940..39c6c4642b3ba 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -51,17 +51,18 @@ class RidgeRegressionModel(
  * its corresponding right hand side label y.
  * See also the documentation for the precise formulation.
  */
-class RidgeRegressionWithSGD private (
-    var stepSize: Double,
-    var numIterations: Int,
-    var regParam: Double,
-    var miniBatchFraction: Double)
-    extends GeneralizedLinearAlgorithm[RidgeRegressionModel] with Serializable {
-
-  val gradient = new LeastSquaresGradient()
-  val updater = new SquaredL2Updater()
-
-  @transient val optimizer = new GradientDescent(gradient, updater).setStepSize(stepSize)
+class RidgeRegressionWithSGD(
+    private var stepSize: Double,
+    private var numIterations: Int,
+    private var regParam: Double,
+    private var miniBatchFraction: Double)
+  extends GeneralizedLinearAlgorithm[RidgeRegressionModel] with Serializable {
+
+  private val gradient = new LeastSquaresGradient()
+  private val updater = new SquaredL2Updater()
+
+  override val optimizer = new GradientDescent(gradient, updater)
+    .setStepSize(stepSize)
     .setNumIterations(numIterations)
     .setRegParam(regParam)
     .setMiniBatchFraction(miniBatchFraction)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
index 8b55bce7c4bec..75909884aed98 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
@@ -22,6 +22,8 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.regression.LabeledPoint
 
 /**
+ * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ *
  * A collection of methods used to validate data before applying ML algorithms.
  */
 object DataValidators extends Logging {
@@ -29,11 +31,9 @@ object DataValidators extends Logging {
   /**
    * Function to check if labels used for classification are either zero or one.
    *
-   * @param data - input data set that needs to be checked
-   *
    * @return True if labels are all zero or one, false otherwise.
    */
-   val classificationLabels: RDD[LabeledPoint] => Boolean = { data =>
+   val binaryLabelValidator: RDD[LabeledPoint] => Boolean = { data =>
     val numInvalid = data.filter(x => x.label != 1.0 && x.label != 0.0).count()
     if (numInvalid != 0) {
       logError("Classification labels should be 0 or 1. Found " + numInvalid + " invalid labels")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
index 9109189dff52f..f5db8a2a493c4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
@@ -23,6 +23,8 @@ import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 
 /**
+ * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ *
  * Generate test data for KMeans. This class first chooses k cluster centers
  * from a d-dimensional Gaussian distribution scaled by factor r and then creates a Gaussian
  * cluster with scale 1 around each center.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
index 81e4eda2a68c4..c6561f3bdc13d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
@@ -28,6 +28,8 @@ import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
 
 /**
+ * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ *
  * Generate sample data used for Linear Data. This class generates
  * uniformly random values for every feature and adds Gaussian noise with mean `eps` to the
  * response variable `Y`.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
index 61498dcc2be00..41fe234491e89 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
@@ -25,10 +25,11 @@ import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.linalg.Vectors
 
 /**
+ * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ *
  * Generate test data for LogisticRegression. This class chooses positive labels
  * with probability `probOne` and scales features for positive examples by `eps`.
  */
-
 object LogisticRegressionDataGenerator {
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
index 348aba1dea5b6..e2430f8052640 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
@@ -25,6 +25,8 @@ import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 
 /**
+ * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ *
 * Generate RDD(s) containing data for Matrix Factorization.
 *
 * This method samples training entries according to the oversampling factor
@@ -47,9 +49,7 @@ import org.apache.spark.rdd.RDD
 *   test           (Boolean) Whether to create testing RDD.
 *   testSampFact   (Double) Percentage of training data to use as test data.
 */
-
-object MFDataGenerator{
-
+object MFDataGenerator {
   def main(args: Array[String]) {
     if (args.length < 2) {
       println("Usage: MFDataGenerator " +
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index cb85e433bfc73..d4180e03f30fa 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -140,6 +140,8 @@ object MLUtils {
     loadLibSVMData(sc, path, labelParser, numFeatures, sc.defaultMinSplits)
 
   /**
+   * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+   *
    * Load labeled data from a file. The data format used here is
    * <L>, <f1> <f2> ...
    * where <f1>, <f2> are feature values in Double and <L> is the corresponding label as Double.
@@ -159,6 +161,8 @@ object MLUtils {
   }
 
   /**
+   * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+   *
    * Save labeled data to a file. The data format used here is
    * <L>, <f1> <f2> ...
    * where <f1>, <f2> are feature values in Double and <L> is the corresponding label as Double.
@@ -183,7 +187,7 @@ object MLUtils {
    *     xColMean - Row vector with mean for every column (or feature) of the input data
    *     xColSd - Row vector standard deviation for every column (or feature) of the input data.
    */
-  def computeStats(
+  private[mllib] def computeStats(
       data: RDD[LabeledPoint],
       numFeatures: Int,
       numExamples: Long): (Double, Vector, Vector) = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
index e300c3dbe1fe0..5e591fc4199fc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
@@ -27,6 +27,8 @@ import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
 
 /**
+ * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ *
  * Generate sample data used for SVM. This class generates uniform random values
  * for the features and adds Gaussian noise with weight 0.1 to generate labels.
  */

From 0b674fa8e645be54114d38fd5b4fde5720c7e3c3 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Mon, 7 Apr 2014 09:15:19 -0700
Subject: [PATCH 3/9] mark decision tree APIs

---
 .../apache/spark/mllib/classification/NaiveBayes.scala |  2 +-
 .../org/apache/spark/mllib/tree/DecisionTree.scala     |  4 +++-
 .../spark/mllib/tree/configuration/Strategy.scala      |  2 +-
 .../org/apache/spark/mllib/tree/impurity/Entropy.scala | 10 +++++++---
 .../org/apache/spark/mllib/tree/impurity/Gini.scala    |  6 +++++-
 .../apache/spark/mllib/tree/impurity/Impurity.scala    |  6 ++++--
 .../apache/spark/mllib/tree/impurity/Variance.scala    |  4 ++++
 .../scala/org/apache/spark/mllib/tree/model/Bin.scala  |  1 +
 .../spark/mllib/tree/model/DecisionTreeModel.scala     |  2 ++
 .../org/apache/spark/mllib/tree/model/Filter.scala     |  2 +-
 .../spark/mllib/tree/model/InformationGainStats.scala  |  2 ++
 .../scala/org/apache/spark/mllib/tree/model/Node.scala |  2 ++
 .../org/apache/spark/mllib/tree/model/Split.scala      |  8 ++++----
 13 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index 37eaaa28a9619..2026d5ba5270c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -41,7 +41,7 @@ class NaiveBayesModel(
   private val brzTheta = new BDM[Double](theta.length, theta(0).length)
 
   {
-    // Need to put an extra pair of braces to prevent Scala treat `i` as a member.
+    // Need to put an extra pair of braces to prevent Scala treating `i` as a member.
     var i = 0
     while (i < theta.length) {
       var j = 0
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
index dee9594a9dd79..04e7e4241910e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
@@ -33,13 +33,15 @@ import org.apache.spark.util.random.XORShiftRandom
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 
 /**
+ * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ *
  * A class that implements a decision tree algorithm for classification and regression. It
  * supports both continuous and categorical features.
  * @param strategy The configuration parameters for the tree algorithm which specify the type
  *                 of algorithm (classification, regression, etc.), feature type (continuous,
  *                 categorical), depth of the tree, quantile calculation strategy, etc.
  */
-class DecisionTree private(val strategy: Strategy) extends Serializable with Logging {
+class DecisionTree (private val strategy: Strategy) extends Serializable with Logging {
 
   /**
    * Method to train a decision tree model over an RDD
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
index df565f3eb8859..0cbe7d73cddad 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
@@ -40,4 +40,4 @@ class Strategy (
     val maxDepth: Int,
     val maxBins: Int = 100,
     val quantileCalculationStrategy: QuantileStrategy = Sort,
-    val categoricalFeaturesInfo: Map[Int,Int] = Map[Int,Int]()) extends Serializable
+    val categoricalFeaturesInfo: Map[Int, Int] = Map[Int, Int]()) extends Serializable
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
index b93995fcf9441..beec48bb3a108 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
@@ -18,20 +18,24 @@
 package org.apache.spark.mllib.tree.impurity
 
 /**
+ * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ *
  * Class for calculating [[http://en.wikipedia.org/wiki/Binary_entropy_function entropy]] during
  * binary classification.
  */
 object Entropy extends Impurity {
 
-   def log2(x: Double) = scala.math.log(x) / scala.math.log(2)
+   private[tree] def log2(x: Double) = scala.math.log(x) / scala.math.log(2)
 
   /**
+   * <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
+   *
    * entropy calculation
    * @param c0 count of instances with label 0
    * @param c1 count of instances with label 1
    * @return entropy value
    */
-   def calculate(c0: Double, c1: Double): Double = {
+   override def calculate(c0: Double, c1: Double): Double = {
      if (c0 == 0 || c1 == 0) {
        0
      } else {
@@ -42,6 +46,6 @@ object Entropy extends Impurity {
      }
    }
 
-  def calculate(count: Double, sum: Double, sumSquares: Double): Double =
+  override def calculate(count: Double, sum: Double, sumSquares: Double): Double =
     throw new UnsupportedOperationException("Entropy.calculate")
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
index c0407554a91b3..5babe7d10d111 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
@@ -18,6 +18,8 @@
 package org.apache.spark.mllib.tree.impurity
 
 /**
+ * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ *
  * Class for calculating the
  * [[http://en.wikipedia.org/wiki/Decision_tree_learning#Gini_impurity Gini impurity]]
  * during binary classification.
@@ -25,6 +27,8 @@ package org.apache.spark.mllib.tree.impurity
 object Gini extends Impurity {
 
   /**
+   * <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
+   *
    * Gini coefficient calculation
    * @param c0 count of instances with label 0
    * @param c1 count of instances with label 1
@@ -41,6 +45,6 @@ object Gini extends Impurity {
     }
   }
 
-  def calculate(count: Double, sum: Double, sumSquares: Double): Double =
+  override def calculate(count: Double, sum: Double, sumSquares: Double): Double =
     throw new UnsupportedOperationException("Gini.calculate")
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
index 43f296ac56bc8..e6fa115030e7a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
@@ -18,12 +18,14 @@
 package org.apache.spark.mllib.tree.impurity
 
 /**
+ * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ *
  * Trait for calculating information gain.
  */
 trait Impurity extends Serializable {
 
   /**
-   * <span class="badge badge-red" style="float: right;">DEVELOPER API - UNSTABLE</span>
+   * <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
    *
    * information calculation for binary classification
    * @param c0 count of instances with label 0
@@ -33,7 +35,7 @@ trait Impurity extends Serializable {
   def calculate(c0 : Double, c1 : Double): Double
 
   /**
-   * <span class="badge badge-red" style="float: right;">DEVELOPER API - UNSTABLE</span>
+   * <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
    *
    * information calculation for regression
    * @param count number of instances
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
index b74577dcec167..7be3b9236ecd9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
@@ -18,6 +18,8 @@
 package org.apache.spark.mllib.tree.impurity
 
 /**
+ * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ *
  * Class for calculating variance during regression
  */
 object Variance extends Impurity {
@@ -25,6 +27,8 @@ object Variance extends Impurity {
      throw new UnsupportedOperationException("Variance.calculate")
 
   /**
+   * <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
+   *
    * variance calculation
    * @param count number of instances
    * @param sum sum of labels
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Bin.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Bin.scala
index a57faa13745f7..2d71e1e366069 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Bin.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Bin.scala
@@ -30,4 +30,5 @@ import org.apache.spark.mllib.tree.configuration.FeatureType._
  * @param featureType type of feature -- categorical or continuous
  * @param category categorical label value accepted in the bin
  */
+private[tree]
 case class Bin(lowSplit: Split, highSplit: Split, featureType: FeatureType, category: Double)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
index a6dca84a2ce09..e336ea74e3b76 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
@@ -22,6 +22,8 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg.Vector
 
 /**
+ * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ *
  * Model to store the decision tree parameters
  * @param topNode root node
  * @param algo algorithm type -- classification or regression
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Filter.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Filter.scala
index ebc9595eafef3..2deaf4ae8dcab 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Filter.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Filter.scala
@@ -22,7 +22,7 @@ package org.apache.spark.mllib.tree.model
  * @param split split specifying the feature index, type and threshold
  * @param comparison integer specifying <,=,>
  */
-case class Filter(split: Split, comparison: Int) {
+private[tree] case class Filter(split: Split, comparison: Int) {
   // Comparison -1,0,1 signifies <.=,>
   override def toString = " split = " + split + "comparison = " + comparison
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
index 99bf79cf12e45..aa1a478ea41b5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
@@ -18,6 +18,8 @@
 package org.apache.spark.mllib.tree.model
 
 /**
+ * <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
+ *
  * Information gain statistics for each split
  * @param gain information gain value
  * @param impurity current node impurity
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
index aac3f9ce308f7..361361f937c76 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
@@ -22,6 +22,8 @@ import org.apache.spark.mllib.tree.configuration.FeatureType._
 import org.apache.spark.mllib.linalg.Vector
 
 /**
+ * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ *
  * Node in a decision tree
  * @param id integer node id
  * @param predict predicted value at the node
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
index 4e64a81dda74e..1ceb64ca44290 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
@@ -30,7 +30,7 @@ case class Split(
     feature: Int,
     threshold: Double,
     featureType: FeatureType,
-    categories: List[Double]){
+    categories: List[Double]) {
 
   override def toString =
     "Feature = " + feature + ", threshold = " + threshold + ", featureType =  " + featureType +
@@ -42,7 +42,7 @@ case class Split(
  * @param feature feature index
  * @param featureType type of feature -- categorical or continuous
  */
-class DummyLowSplit(feature: Int, featureType: FeatureType)
+private[tree] class DummyLowSplit(feature: Int, featureType: FeatureType)
   extends Split(feature, Double.MinValue, featureType, List())
 
 /**
@@ -50,7 +50,7 @@ class DummyLowSplit(feature: Int, featureType: FeatureType)
  * @param feature feature index
  * @param featureType type of feature -- categorical or continuous
  */
-class DummyHighSplit(feature: Int, featureType: FeatureType)
+private[tree] class DummyHighSplit(feature: Int, featureType: FeatureType)
   extends Split(feature, Double.MaxValue, featureType, List())
 
 /**
@@ -59,6 +59,6 @@ class DummyHighSplit(feature: Int, featureType: FeatureType)
  * @param feature feature index
  * @param featureType type of feature -- categorical or continuous
  */
-class DummyCategoricalSplit(feature: Int, featureType: FeatureType)
+private[tree] class DummyCategoricalSplit(feature: Int, featureType: FeatureType)
   extends Split(feature, Double.MaxValue, featureType, List())
 

From 00ffbcc14cf162591705f0920f4d26fdb741d0da Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Tue, 8 Apr 2014 00:46:07 -0700
Subject: [PATCH 4/9] update tree API annotation

---
 .../main/scala/org/apache/spark/mllib/tree/DecisionTree.scala | 4 ++--
 .../org/apache/spark/mllib/tree/configuration/Algo.scala      | 2 ++
 .../apache/spark/mllib/tree/configuration/FeatureType.scala   | 2 ++
 .../spark/mllib/tree/configuration/QuantileStrategy.scala     | 2 ++
 .../org/apache/spark/mllib/tree/configuration/Strategy.scala  | 2 ++
 .../main/scala/org/apache/spark/mllib/tree/model/Node.scala   | 2 +-
 .../main/scala/org/apache/spark/mllib/tree/model/Split.scala  | 2 ++
 7 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
index 04e7e4241910e..4fc50dfa2fd69 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
@@ -1026,7 +1026,7 @@ object DecisionTree extends Serializable with Logging {
     }
   }
 
-  val usage = """
+  private val usage = """
     Usage: DecisionTreeRunner <master>[slices] --algo <Classification,
     Regression> --trainDataDir path --testDataDir path --maxDepth num [--impurity <Gini,Entropy,
     Variance>] [--maxBins num]
@@ -1115,7 +1115,7 @@ object DecisionTree extends Serializable with Logging {
    * @return An RDD of LabeledPoint. Each labeled point has two elements: the first element is
    *         the label, and the second element represents the feature values (an array of Double).
    */
-  def loadLabeledData(sc: SparkContext, dir: String): RDD[LabeledPoint] = {
+  private def loadLabeledData(sc: SparkContext, dir: String): RDD[LabeledPoint] = {
     sc.textFile(dir).map { line =>
       val parts = line.trim().split(",")
       val label = parts(0).toDouble
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
index 2dd1f0f27b8f5..332062de7463d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
@@ -18,6 +18,8 @@
 package org.apache.spark.mllib.tree.configuration
 
 /**
+ * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ *
  * Enum to select the algorithm for the decision tree
  */
 object Algo extends Enumeration {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
index 09ee0586c58fa..e2a57837d5cef 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
@@ -18,6 +18,8 @@
 package org.apache.spark.mllib.tree.configuration
 
 /**
+ * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ *
  * Enum to describe whether a feature is "continuous" or "categorical"
  */
 object FeatureType extends Enumeration {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
index 2457a480c2a14..95319b739ab32 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
@@ -18,6 +18,8 @@
 package org.apache.spark.mllib.tree.configuration
 
 /**
+ * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ *
  * Enum for selecting the quantile calculation strategy
  */
 object QuantileStrategy extends Enumeration {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
index 0cbe7d73cddad..13cd656128768 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
@@ -22,6 +22,8 @@ import org.apache.spark.mllib.tree.configuration.Algo._
 import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
 
 /**
+ * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ *
  * Stores all the configuration options for tree construction
  * @param algo classification or regression
  * @param impurity criterion used for information gain calculation
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
index 361361f937c76..6b644e7657f40 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
@@ -22,7 +22,7 @@ import org.apache.spark.mllib.tree.configuration.FeatureType._
 import org.apache.spark.mllib.linalg.Vector
 
 /**
- * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ * <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
  *
  * Node in a decision tree
  * @param id integer node id
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
index 1ceb64ca44290..f8f4e5abfa6a1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
@@ -20,6 +20,8 @@ package org.apache.spark.mllib.tree.model
 import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType
 
 /**
+ * <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
+ *
  * Split applied to a feature
  * @param feature feature index
  * @param threshold threshold for continuous feature

From ef1a71770b5e415290e6936980bf6c1e7fdee799 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Tue, 8 Apr 2014 22:59:33 -0700
Subject: [PATCH 5/9] mark some constructors private add default parameters to
 JavaDoc

---
 .../mllib/api/python/PythonMLLibAPI.scala     |  1 -
 .../classification/LogisticRegression.scala   |  2 +-
 .../mllib/classification/NaiveBayes.scala     | 10 ++++--
 .../spark/mllib/classification/SVM.scala      |  2 +-
 .../spark/mllib/clustering/KMeans.scala       | 35 +++++++++++++++++--
 .../spark/mllib/recommendation/ALS.scala      |  7 +++-
 .../apache/spark/mllib/regression/Lasso.scala |  5 +--
 .../mllib/regression/LinearRegression.scala   |  5 +--
 .../mllib/regression/RidgeRegression.scala    |  5 +--
 9 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index e866687093b21..ac0631abe5f8f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -31,7 +31,6 @@ import org.apache.spark.rdd.RDD
  * <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
  *
  * The Java stubs necessary for the Python mllib bindings.
- * Users should not call the methods defined in this class directly.
  */
 class PythonMLLibAPI extends Serializable {
   private def deserializeDoubleVector(bytes: Array[Byte]): Array[Double] = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index 44f5b525b8814..4f9eaacf67fe4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -70,7 +70,7 @@ class LogisticRegressionModel(
  * Train a classification model for Logistic Regression using Stochastic Gradient Descent.
  * NOTE: Labels used in Logistic Regression should be {0, 1}
  */
-class LogisticRegressionWithSGD (
+class LogisticRegressionWithSGD private (
     private var stepSize: Double,
     private var numIterations: Int,
     private var regParam: Double,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index 2026d5ba5270c..14c6517f96936 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -27,10 +27,14 @@ import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
 
 /**
+ * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ *
  * Model for Naive Bayes Classifiers.
  *
- * @param pi Log of class priors, whose dimension is C.
- * @param theta Log of class conditional probabilities, whose dimension is CxD.
+ * @param labels list of labels
+ * @param pi log of class priors, whose dimension is C, number of labels
+ * @param theta log of class conditional probabilities, whose dimension is C-by-D,
+ *              where D is number of features
  */
 class NaiveBayesModel(
     val labels: Array[Double],
@@ -68,7 +72,7 @@ class NaiveBayesModel(
  * document classification.  By making every vector a 0-1 vector, it can also be used as
  * Bernoulli NB ([[http://tinyurl.com/p7c96j6]]).
  */
-class NaiveBayes (private var lambda: Double) extends Serializable with Logging {
+class NaiveBayes private (private var lambda: Double) extends Serializable with Logging {
 
   def this() = this(1.0)
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
index 2bf4f9c2d7cfc..956654b1fe90a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -71,7 +71,7 @@ class SVMModel(
  * Train a Support Vector Machine (SVM) using Stochastic Gradient Descent.
  * NOTE: Labels used in SVM should be {0, 1}.
  */
-class SVMWithSGD(
+class SVMWithSGD private (
     private var stepSize: Double,
     private var numIterations: Int,
     private var regParam: Double,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index b1df33ebb2cc9..c4d346f614dd8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -36,13 +36,18 @@ import org.apache.spark.util.random.XORShiftRandom
  * This is an iterative algorithm that will make multiple passes over the data, so any RDDs given
  * to it should be cached by the user.
  */
-class KMeans(
+class KMeans private (
     private var k: Int,
     private var maxIterations: Int,
     private var runs: Int,
     private var initializationMode: String,
     private var initializationSteps: Int,
     private var epsilon: Double) extends Serializable with Logging {
+
+  /**
+   * Constructs a KMeans instance with default parameters: {k: 2, maxIterations: 20, runs: 1,
+   * initializationMode: "k-means||", initializationSteps: 5, epsilon: 1e-4}.
+   */
   def this() = this(2, 20, 1, KMeans.K_MEANS_PARALLEL, 5, 1e-4)
 
   /** Set the number of clusters to create (k). Default: 2. */
@@ -318,8 +323,8 @@ object KMeans {
       data: RDD[Vector],
       k: Int,
       maxIterations: Int,
-      runs: Int = 1,
-      initializationMode: String = K_MEANS_PARALLEL): KMeansModel = {
+      runs: Int,
+      initializationMode: String): KMeansModel = {
     new KMeans().setK(k)
       .setMaxIterations(maxIterations)
       .setRuns(runs)
@@ -327,6 +332,27 @@ object KMeans {
       .run(data)
   }
 
+  /**
+   * Trains a k-means model using specified parameters and the default values for unspecified.
+   */
+  def train(
+      data: RDD[Vector],
+      k: Int,
+      maxIterations: Int): KMeansModel = {
+    train(data, k, maxIterations, 1, K_MEANS_PARALLEL)
+  }
+
+  /**
+   * Trains a k-means model using specified parameters and the default values for unspecified.
+   */
+  def train(
+      data: RDD[Vector],
+      k: Int,
+      maxIterations: Int,
+      runs: Int): KMeansModel = {
+    train(data, k, maxIterations, runs, K_MEANS_PARALLEL)
+  }
+
   /**
    * Returns the index of the closest center to the given point, as well as the squared distance.
    */
@@ -371,6 +397,9 @@ object KMeans {
     MLUtils.fastSquaredDistance(v1.vector, v1.norm, v2.vector, v2.norm)
   }
 
+  /**
+   * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+   */
   def main(args: Array[String]) {
     if (args.length < 4) {
       println("Usage: KMeans <master> <input_file> <k> <max_iterations> [<runs>]")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index 411d8d7b72d8b..6f87b2eeb011e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -88,7 +88,7 @@ case class Rating(val user: Int, val product: Int, val rating: Double)
  * indicated user
  * preferences rather than explicit ratings given to items.
  */
-class ALS(
+class ALS private (
     private var numBlocks: Int,
     private var rank: Int,
     private var iterations: Int,
@@ -97,6 +97,11 @@ class ALS(
     private var alpha: Double,
     private var seed: Long = System.nanoTime()
   ) extends Serializable with Logging {
+
+  /**
+   * Constructs an ALS instance with default parameters: {numBlocks: -1, rank: 10, iterations: 10,
+   * lambda: 0.01, implicitPrefs: false, alpha: 1.0}.
+   */
   def this() = this(-1, 10, 10, 0.01, false, 1.0)
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
index 7595580a4b6eb..5f0812fd2e0eb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -51,7 +51,7 @@ class LassoModel(
  * its corresponding right hand side label y.
  * See also the documentation for the precise formulation.
  */
-class LassoWithSGD(
+class LassoWithSGD private (
     private var stepSize: Double,
     private var numIterations: Int,
     private var regParam: Double,
@@ -70,7 +70,8 @@ class LassoWithSGD(
   super.setIntercept(false)
 
   /**
-   * Construct a Lasso object with default parameters
+   * Construct a Lasso object with default parameters: {stepSize: 1.0, numIterations: 100,
+   * regParam: 1.0, miniBatchFraction: 1.0}.
    */
   def this() = this(1.0, 100, 1.0, 1.0)
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index ef4379d4b83df..228fa8db3e721 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -51,7 +51,7 @@ class LinearRegressionModel(
  * its corresponding right hand side label y.
  * See also the documentation for the precise formulation.
  */
-class LinearRegressionWithSGD(
+class LinearRegressionWithSGD private (
     private var stepSize: Double,
     private var numIterations: Int,
     private var miniBatchFraction: Double)
@@ -65,7 +65,8 @@ class LinearRegressionWithSGD(
     .setMiniBatchFraction(miniBatchFraction)
 
   /**
-   * Construct a LinearRegression object with default parameters
+   * Construct a LinearRegression object with default parameters: {stepSize: 1.0,
+   * numIterations: 100, miniBatchFraction: 1.0}.
    */
   def this() = this(1.0, 100, 1.0)
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index 39c6c4642b3ba..e702027c7c170 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -51,7 +51,7 @@ class RidgeRegressionModel(
  * its corresponding right hand side label y.
  * See also the documentation for the precise formulation.
  */
-class RidgeRegressionWithSGD(
+class RidgeRegressionWithSGD private (
     private var stepSize: Double,
     private var numIterations: Int,
     private var regParam: Double,
@@ -71,7 +71,8 @@ class RidgeRegressionWithSGD(
   super.setIntercept(false)
 
   /**
-   * Construct a RidgeRegression object with default parameters
+   * Construct a RidgeRegression object with default parameters: {stepSize: 1.0, numIterations: 100,
+   * regParam: 1.0, miniBatchFraction: 1.0}.
    */
   def this() = this(1.0, 100, 1.0, 1.0)
 

From da31733a54f314ebc2d788c7d4261651fd4e6b73 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Wed, 9 Apr 2014 01:03:12 -0700
Subject: [PATCH 6/9] update developer and experimental tags

---
 .../apache/spark/mllib/api/python/PythonMLLibAPI.scala    | 2 +-
 .../apache/spark/mllib/classification/NaiveBayes.scala    | 2 +-
 .../scala/org/apache/spark/mllib/clustering/KMeans.scala  | 4 ++--
 .../org/apache/spark/mllib/optimization/Gradient.scala    | 8 ++++----
 .../apache/spark/mllib/optimization/GradientDescent.scala | 4 ++--
 .../org/apache/spark/mllib/optimization/Optimizer.scala   | 2 +-
 .../org/apache/spark/mllib/optimization/Updater.scala     | 8 ++++----
 .../scala/org/apache/spark/mllib/recommendation/ALS.scala | 2 +-
 .../mllib/recommendation/MatrixFactorizationModel.scala   | 2 +-
 .../mllib/regression/GeneralizedLinearAlgorithm.scala     | 2 +-
 .../scala/org/apache/spark/mllib/tree/DecisionTree.scala  | 2 +-
 .../org/apache/spark/mllib/tree/configuration/Algo.scala  | 2 +-
 .../spark/mllib/tree/configuration/FeatureType.scala      | 2 +-
 .../spark/mllib/tree/configuration/QuantileStrategy.scala | 2 +-
 .../apache/spark/mllib/tree/configuration/Strategy.scala  | 2 +-
 .../org/apache/spark/mllib/tree/impurity/Entropy.scala    | 4 ++--
 .../scala/org/apache/spark/mllib/tree/impurity/Gini.scala | 4 ++--
 .../org/apache/spark/mllib/tree/impurity/Impurity.scala   | 6 +++---
 .../org/apache/spark/mllib/tree/impurity/Variance.scala   | 4 ++--
 .../apache/spark/mllib/tree/model/DecisionTreeModel.scala | 2 +-
 .../spark/mllib/tree/model/InformationGainStats.scala     | 2 +-
 .../scala/org/apache/spark/mllib/tree/model/Node.scala    | 2 +-
 .../scala/org/apache/spark/mllib/tree/model/Split.scala   | 2 +-
 .../org/apache/spark/mllib/util/DataValidators.scala      | 2 +-
 .../org/apache/spark/mllib/util/KMeansDataGenerator.scala | 2 +-
 .../org/apache/spark/mllib/util/LinearDataGenerator.scala | 2 +-
 .../mllib/util/LogisticRegressionDataGenerator.scala      | 2 +-
 .../org/apache/spark/mllib/util/MFDataGenerator.scala     | 2 +-
 .../main/scala/org/apache/spark/mllib/util/MLUtils.scala  | 4 ++--
 .../org/apache/spark/mllib/util/SVMDataGenerator.scala    | 2 +-
 30 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index ac0631abe5f8f..3e3732db0be6f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -28,7 +28,7 @@ import org.apache.spark.mllib.regression._
 import org.apache.spark.rdd.RDD
 
 /**
- * <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
  * The Java stubs necessary for the Python mllib bindings.
  */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index 14c6517f96936..eaf2cfb8b7c12 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -27,7 +27,7 @@ import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
 
 /**
- * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ * :: Experimental ::
  *
  * Model for Naive Bayes Classifiers.
  *
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index c4d346f614dd8..b0820819a945f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -76,7 +76,7 @@ class KMeans private (
   }
 
   /**
-   * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+   * :: Experimental ::
    *
    * Set the number of runs of the algorithm to execute in parallel. We initialize the algorithm
    * this many times with random starting conditions (configured by the initialization mode), then
@@ -398,7 +398,7 @@ object KMeans {
   }
 
   /**
-   * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+   * :: Experimental ::
    */
   def main(args: Array[String]) {
     if (args.length < 4) {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
index 8a6d20f6f6ae8..e2e7190fb3b0a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
@@ -22,7 +22,7 @@ import breeze.linalg.{axpy => brzAxpy}
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
 
 /**
- * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
  * Class used to compute the gradient for a loss function, given a single data point.
  */
@@ -53,7 +53,7 @@ abstract class Gradient extends Serializable {
 }
 
 /**
- * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
  * Compute gradient and loss for a logistic loss function, as used in binary classification.
  * See also the documentation for the precise formulation.
@@ -96,7 +96,7 @@ class LogisticGradient extends Gradient {
 }
 
 /**
- * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
  * Compute gradient and loss for a Least-squared loss function, as used in linear regression.
  * This is correct for the averaged least squares loss function (mean squared error)
@@ -130,7 +130,7 @@ class LeastSquaresGradient extends Gradient {
 }
 
 /**
- * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
  * Compute gradient and loss for a Hinge loss function, as used in SVM binary classification.
  * See also the documentation for the precise formulation.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
index 60cad435be5c9..16e3ebc0df491 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
@@ -26,7 +26,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
 
 /**
- * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
  * Class used to solve an optimization problem using Gradient Descent.
  * @param gradient Gradient function to be used.
@@ -110,7 +110,7 @@ class GradientDescent(private var gradient: Gradient, private var updater: Updat
 }
 
 /**
- * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
  * Top-level method to run gradient descent.
  */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala
index a655a8bb7a4ed..57eb2afe8c6d6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala
@@ -22,7 +22,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg.Vector
 
 /**
- * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
  * Trait for optimization problem solvers.
  */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
index a241bfd4e5858..3963088553536 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
@@ -24,7 +24,7 @@ import breeze.linalg.{norm => brzNorm, axpy => brzAxpy, Vector => BV}
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
 
 /**
- * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
  * Class used to perform steps (weight update) using Gradient Descent methods.
  *
@@ -61,7 +61,7 @@ abstract class Updater extends Serializable {
 }
 
 /**
- * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
  * A simple updater for gradient descent *without* any regularization.
  * Uses a step-size decreasing with the square root of the number of iterations.
@@ -82,7 +82,7 @@ class SimpleUpdater extends Updater {
 }
 
 /**
- * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
  * Updater for L1 regularized problems.
  *          R(w) = ||w||_1
@@ -126,7 +126,7 @@ class L1Updater extends Updater {
 }
 
 /**
- * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
  * Updater for L2 regularized problems.
  *          R(w) = 1/2 ||w||^2
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index 9046960110af2..39d87e2061d79 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -138,7 +138,7 @@ class ALS private (
   }
 
   /**
-   * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+   * :: Experimental ::
    *
    * Sets the constant used in computing confidence in implicit ALS. Default: 1.0.
    */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index 80590c7974949..da2e70d2182d5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -68,7 +68,7 @@ class MatrixFactorizationModel(
   }
 
   /**
-   * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+   * :: DeveloperApi ::
    *
    * Predict the rating of many users for many products.
    * This is a Java stub for python predictAll()
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
index 1d5b2f036155d..83c4b08006cfe 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
@@ -101,7 +101,7 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
   }
 
   /**
-   * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+   * :: Experimental ::
    *
    * Set if the algorithm should validate data before training. Default true.
    */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
index 4fc50dfa2fd69..118f93b08cf07 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
@@ -33,7 +33,7 @@ import org.apache.spark.util.random.XORShiftRandom
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 
 /**
- * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ * :: Experimental ::
  *
  * A class that implements a decision tree algorithm for classification and regression. It
  * supports both continuous and categorical features.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
index 332062de7463d..8b907f3a1e97e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.mllib.tree.configuration
 
 /**
- * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ * :: Experimental ::
  *
  * Enum to select the algorithm for the decision tree
  */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
index e2a57837d5cef..db1d4fbbf4c34 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.mllib.tree.configuration
 
 /**
- * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ * :: Experimental ::
  *
  * Enum to describe whether a feature is "continuous" or "categorical"
  */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
index 95319b739ab32..7c777207a747d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.mllib.tree.configuration
 
 /**
- * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ * :: Experimental ::
  *
  * Enum for selecting the quantile calculation strategy
  */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
index 13cd656128768..6a4c54f0d6d12 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
@@ -22,7 +22,7 @@ import org.apache.spark.mllib.tree.configuration.Algo._
 import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
 
 /**
- * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ * :: Experimental ::
  *
  * Stores all the configuration options for tree construction
  * @param algo classification or regression
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
index beec48bb3a108..c77939809ca60 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.mllib.tree.impurity
 
 /**
- * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ * :: Experimental ::
  *
  * Class for calculating [[http://en.wikipedia.org/wiki/Binary_entropy_function entropy]] during
  * binary classification.
@@ -28,7 +28,7 @@ object Entropy extends Impurity {
    private[tree] def log2(x: Double) = scala.math.log(x) / scala.math.log(2)
 
   /**
-   * <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
+   * :: DeveloperApi ::
    *
    * entropy calculation
    * @param c0 count of instances with label 0
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
index 5babe7d10d111..c2422f9d4f82c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.mllib.tree.impurity
 
 /**
- * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ * :: Experimental ::
  *
  * Class for calculating the
  * [[http://en.wikipedia.org/wiki/Decision_tree_learning#Gini_impurity Gini impurity]]
@@ -27,7 +27,7 @@ package org.apache.spark.mllib.tree.impurity
 object Gini extends Impurity {
 
   /**
-   * <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
+   * :: DeveloperApi ::
    *
    * Gini coefficient calculation
    * @param c0 count of instances with label 0
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
index e6fa115030e7a..496c8b1e910a8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
@@ -18,14 +18,14 @@
 package org.apache.spark.mllib.tree.impurity
 
 /**
- * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ * :: Experimental ::
  *
  * Trait for calculating information gain.
  */
 trait Impurity extends Serializable {
 
   /**
-   * <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
+   * :: DeveloperApi ::
    *
    * information calculation for binary classification
    * @param c0 count of instances with label 0
@@ -35,7 +35,7 @@ trait Impurity extends Serializable {
   def calculate(c0 : Double, c1 : Double): Double
 
   /**
-   * <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
+   * :: DeveloperApi ::
    *
    * information calculation for regression
    * @param count number of instances
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
index 7be3b9236ecd9..9a0a943a10e33 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.mllib.tree.impurity
 
 /**
- * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ * :: Experimental ::
  *
  * Class for calculating variance during regression
  */
@@ -27,7 +27,7 @@ object Variance extends Impurity {
      throw new UnsupportedOperationException("Variance.calculate")
 
   /**
-   * <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
+   * :: DeveloperApi ::
    *
    * variance calculation
    * @param count number of instances
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
index e336ea74e3b76..ace83dd7fdc33 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
@@ -22,7 +22,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg.Vector
 
 /**
- * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+ * :: Experimental ::
  *
  * Model to store the decision tree parameters
  * @param topNode root node
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
index aa1a478ea41b5..13d2ab6203ba7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.mllib.tree.model
 
 /**
- * <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
  * Information gain statistics for each split
  * @param gain information gain value
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
index 6b644e7657f40..0e645fef1a321 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
@@ -22,7 +22,7 @@ import org.apache.spark.mllib.tree.configuration.FeatureType._
 import org.apache.spark.mllib.linalg.Vector
 
 /**
- * <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
  * Node in a decision tree
  * @param id integer node id
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
index f8f4e5abfa6a1..5cedbda86a4b5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.tree.model
 import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType
 
 /**
- * <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
  * Split applied to a feature
  * @param feature feature index
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
index 75909884aed98..3b4652290bc9c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
@@ -22,7 +22,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.regression.LabeledPoint
 
 /**
- * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
  * A collection of methods used to validate data before applying ML algorithms.
  */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
index f5db8a2a493c4..267fb529ba47f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
@@ -23,7 +23,7 @@ import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 
 /**
- * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
  * Generate test data for KMeans. This class first chooses k cluster centers
  * from a d-dimensional Gaussian distribution scaled by factor r and then creates a Gaussian
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
index c6561f3bdc13d..1e237d02fa985 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
@@ -28,7 +28,7 @@ import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
 
 /**
- * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
  * Generate sample data used for Linear Data. This class generates
  * uniformly random values for every feature and adds Gaussian noise with mean `eps` to the
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
index 41fe234491e89..11bcea3565acd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
@@ -25,7 +25,7 @@ import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.linalg.Vectors
 
 /**
- * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
  * Generate test data for LogisticRegression. This class chooses positive labels
  * with probability `probOne` and scales features for positive examples by `eps`.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
index e2430f8052640..67dc1c6a33e28 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
@@ -25,7 +25,7 @@ import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 
 /**
- * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
 * Generate RDD(s) containing data for Matrix Factorization.
 *
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index c77dd5e6dc71b..65042b6feee56 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -122,7 +122,7 @@ object MLUtils {
     loadLibSVMData(sc, path, labelParser, numFeatures, sc.defaultMinSplits)
 
   /**
-   * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+   * :: Experimental ::
    *
    * Load labeled data from a file. The data format used here is
    * <L>, <f1> <f2> ...
@@ -143,7 +143,7 @@ object MLUtils {
   }
 
   /**
-   * <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
+   * :: Experimental ::
    *
    * Save labeled data to a file. The data format used here is
    * <L>, <f1> <f2> ...
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
index 5e591fc4199fc..1f748031fdbcb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
@@ -27,7 +27,7 @@ import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
 
 /**
- * <span class="badge" style="float: right; background-color: #44751E;">DEVELOPER API</span>
+ * :: DeveloperApi ::
  *
  * Generate sample data used for SVM. This class generates uniform random values
  * for the features and adds Gaussian noise with weight 0.1 to generate labels.

From 8773d0d56ca7a1bf34bb2778c357882de5ff70da Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Wed, 9 Apr 2014 01:15:38 -0700
Subject: [PATCH 7/9] add DeveloperApi annotation

---
 .../mllib/api/python/PythonMLLibAPI.scala     |  2 ++
 .../spark/mllib/optimization/Gradient.scala   |  5 ++++
 .../mllib/optimization/GradientDescent.scala  |  3 +++
 .../spark/mllib/optimization/Optimizer.scala  |  2 ++
 .../spark/mllib/optimization/Updater.scala    |  5 ++++
 .../MatrixFactorizationModel.scala            |  7 +++---
 .../spark/mllib/tree/impurity/Entropy.scala   | 25 +++++++++++--------
 .../spark/mllib/tree/impurity/Gini.scala      |  3 +++
 .../spark/mllib/tree/impurity/Impurity.scala  |  5 +++-
 .../spark/mllib/tree/impurity/Variance.scala  |  3 +++
 .../tree/model/InformationGainStats.scala     |  3 +++
 .../apache/spark/mllib/tree/model/Node.scala  |  2 ++
 .../apache/spark/mllib/tree/model/Split.scala |  2 ++
 .../spark/mllib/util/DataValidators.scala     |  2 ++
 .../mllib/util/KMeansDataGenerator.scala      |  3 ++-
 .../mllib/util/LinearDataGenerator.scala      |  2 ++
 .../LogisticRegressionDataGenerator.scala     |  2 ++
 .../spark/mllib/util/MFDataGenerator.scala    |  2 ++
 .../spark/mllib/util/SVMDataGenerator.scala   |  2 ++
 19 files changed, 64 insertions(+), 16 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 3e3732db0be6f..ae27c57799873 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -19,6 +19,7 @@ package org.apache.spark.mllib.api.python
 
 import java.nio.{ByteBuffer, ByteOrder}
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.classification._
 import org.apache.spark.mllib.clustering._
@@ -32,6 +33,7 @@ import org.apache.spark.rdd.RDD
  *
  * The Java stubs necessary for the Python mllib bindings.
  */
+@DeveloperApi
 class PythonMLLibAPI extends Serializable {
   private def deserializeDoubleVector(bytes: Array[Byte]): Array[Double] = {
     val packetLength = bytes.length
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
index e2e7190fb3b0a..1176dc9dbc08d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
@@ -19,6 +19,7 @@ package org.apache.spark.mllib.optimization
 
 import breeze.linalg.{axpy => brzAxpy}
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
 
 /**
@@ -26,6 +27,7 @@ import org.apache.spark.mllib.linalg.{Vectors, Vector}
  *
  * Class used to compute the gradient for a loss function, given a single data point.
  */
+@DeveloperApi
 abstract class Gradient extends Serializable {
   /**
    * Compute the gradient and loss given the features of a single data point.
@@ -58,6 +60,7 @@ abstract class Gradient extends Serializable {
  * Compute gradient and loss for a logistic loss function, as used in binary classification.
  * See also the documentation for the precise formulation.
  */
+@DeveloperApi
 class LogisticGradient extends Gradient {
   override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
     val brzData = data.toBreeze
@@ -103,6 +106,7 @@ class LogisticGradient extends Gradient {
  *              L = 1/n ||A weights-y||^2
  * See also the documentation for the precise formulation.
  */
+@DeveloperApi
 class LeastSquaresGradient extends Gradient {
   override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
     val brzData = data.toBreeze
@@ -136,6 +140,7 @@ class LeastSquaresGradient extends Gradient {
  * See also the documentation for the precise formulation.
  * NOTE: This assumes that the labels are {0,1}
  */
+@DeveloperApi
 class HingeGradient extends Gradient {
   override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
     val brzData = data.toBreeze
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
index 16e3ebc0df491..04267d967dcad 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
@@ -21,6 +21,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import breeze.linalg.{DenseVector => BDV}
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
@@ -32,6 +33,7 @@ import org.apache.spark.mllib.linalg.{Vectors, Vector}
  * @param gradient Gradient function to be used.
  * @param updater Updater to be used to update weights after every iteration.
  */
+@DeveloperApi
 class GradientDescent(private var gradient: Gradient, private var updater: Updater)
   extends Optimizer with Logging
 {
@@ -114,6 +116,7 @@ class GradientDescent(private var gradient: Gradient, private var updater: Updat
  *
  * Top-level method to run gradient descent.
  */
+@DeveloperApi
 object GradientDescent extends Logging {
   /**
    * Run stochastic gradient descent (SGD) in parallel using mini batches.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala
index 57eb2afe8c6d6..0a313f3104b14 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala
@@ -19,6 +19,7 @@ package org.apache.spark.mllib.optimization
 
 import org.apache.spark.rdd.RDD
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.mllib.linalg.Vector
 
 /**
@@ -26,6 +27,7 @@ import org.apache.spark.mllib.linalg.Vector
  *
  * Trait for optimization problem solvers.
  */
+@DeveloperApi
 trait Optimizer extends Serializable {
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
index 3963088553536..e67816796c6b1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
@@ -21,6 +21,7 @@ import scala.math._
 
 import breeze.linalg.{norm => brzNorm, axpy => brzAxpy, Vector => BV}
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.mllib.linalg.{Vectors, Vector}
 
 /**
@@ -37,6 +38,7 @@ import org.apache.spark.mllib.linalg.{Vectors, Vector}
  * The updater is responsible to also perform the update coming from the
  * regularization term R(w) (if any regularization is used).
  */
+@DeveloperApi
 abstract class Updater extends Serializable {
   /**
    * Compute an updated value for weights given the gradient, stepSize, iteration number and
@@ -66,6 +68,7 @@ abstract class Updater extends Serializable {
  * A simple updater for gradient descent *without* any regularization.
  * Uses a step-size decreasing with the square root of the number of iterations.
  */
+@DeveloperApi
 class SimpleUpdater extends Updater {
   override def compute(
       weightsOld: Vector,
@@ -101,6 +104,7 @@ class SimpleUpdater extends Updater {
  *
  * Equivalently, set weight component to signum(w) * max(0.0, abs(w) - shrinkageVal)
  */
+@DeveloperApi
 class L1Updater extends Updater {
   override def compute(
       weightsOld: Vector,
@@ -132,6 +136,7 @@ class L1Updater extends Updater {
  *          R(w) = 1/2 ||w||^2
  * Uses a step-size decreasing with the square root of the number of iterations.
  */
+@DeveloperApi
 class SquaredL2Updater extends Updater {
   override def compute(
       weightsOld: Vector,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index da2e70d2182d5..e05224fc7caf2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -17,13 +17,14 @@
 
 package org.apache.spark.mllib.recommendation
 
+import org.jblas._
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.rdd.RDD
 import org.apache.spark.SparkContext._
 import org.apache.spark.mllib.api.python.PythonMLLibAPI
 
-import org.jblas._
-import org.apache.spark.api.java.JavaRDD
-
 
 /**
  * Model representing the result of matrix factorization.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
index c77939809ca60..8e6e90eafafbb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.mllib.tree.impurity
 
+import org.apache.spark.annotation.DeveloperApi
+
 /**
  * :: Experimental ::
  *
@@ -25,7 +27,7 @@ package org.apache.spark.mllib.tree.impurity
  */
 object Entropy extends Impurity {
 
-   private[tree] def log2(x: Double) = scala.math.log(x) / scala.math.log(2)
+  private[tree] def log2(x: Double) = scala.math.log(x) / scala.math.log(2)
 
   /**
    * :: DeveloperApi ::
@@ -35,16 +37,17 @@ object Entropy extends Impurity {
    * @param c1 count of instances with label 1
    * @return entropy value
    */
-   override def calculate(c0: Double, c1: Double): Double = {
-     if (c0 == 0 || c1 == 0) {
-       0
-     } else {
-       val total = c0 + c1
-       val f0 = c0 / total
-       val f1 = c1 / total
-       -(f0 * log2(f0)) - (f1 * log2(f1))
-     }
-   }
+  @DeveloperApi
+  override def calculate(c0: Double, c1: Double): Double = {
+    if (c0 == 0 || c1 == 0) {
+      0
+    } else {
+      val total = c0 + c1
+      val f0 = c0 / total
+      val f1 = c1 / total
+      -(f0 * log2(f0)) - (f1 * log2(f1))
+    }
+  }
 
   override def calculate(count: Double, sum: Double, sumSquares: Double): Double =
     throw new UnsupportedOperationException("Entropy.calculate")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
index c2422f9d4f82c..7c6fb0575b855 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.mllib.tree.impurity
 
+import org.apache.spark.annotation.DeveloperApi
+
 /**
  * :: Experimental ::
  *
@@ -34,6 +36,7 @@ object Gini extends Impurity {
    * @param c1 count of instances with label 1
    * @return Gini coefficient value
    */
+  @DeveloperApi
   override def calculate(c0: Double, c1: Double): Double = {
     if (c0 == 0 || c1 == 0) {
       0
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
index 496c8b1e910a8..360123e025c2c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.mllib.tree.impurity
 
+import org.apache.spark.annotation.DeveloperApi
+
 /**
  * :: Experimental ::
  *
@@ -32,6 +34,7 @@ trait Impurity extends Serializable {
    * @param c1 count of instances with label 1
    * @return information value
    */
+  @DeveloperApi
   def calculate(c0 : Double, c1 : Double): Double
 
   /**
@@ -43,6 +46,6 @@ trait Impurity extends Serializable {
    * @param sumSquares summation of squares of the labels
    * @return information value
    */
+  @DeveloperApi
   def calculate(count: Double, sum: Double, sumSquares: Double): Double
-
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
index 9a0a943a10e33..93b49ec7f44ef 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.mllib.tree.impurity
 
+import org.apache.spark.annotation.DeveloperApi
+
 /**
  * :: Experimental ::
  *
@@ -34,6 +36,7 @@ object Variance extends Impurity {
    * @param sum sum of labels
    * @param sumSquares summation of squares of the labels
    */
+  @DeveloperApi
   override def calculate(count: Double, sum: Double, sumSquares: Double): Double = {
     val squaredLoss = sumSquares - (sum * sum) / count
     squaredLoss / count
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
index 13d2ab6203ba7..d36b58e92ced6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.mllib.tree.model
 
+import org.apache.spark.annotation.DeveloperApi
+
 /**
  * :: DeveloperApi ::
  *
@@ -27,6 +29,7 @@ package org.apache.spark.mllib.tree.model
  * @param rightImpurity right node impurity
  * @param predict predicted value
  */
+@DeveloperApi
 class InformationGainStats(
     val gain: Double,
     val impurity: Double,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
index 0e645fef1a321..339972141498c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.mllib.tree.model
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.Logging
 import org.apache.spark.mllib.tree.configuration.FeatureType._
 import org.apache.spark.mllib.linalg.Vector
@@ -33,6 +34,7 @@ import org.apache.spark.mllib.linalg.Vector
  * @param rightNode right child
  * @param stats information gain stats
  */
+@DeveloperApi
 class Node (
     val id: Int,
     val predict: Double,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
index 5cedbda86a4b5..8bbb343079b49 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.mllib.tree.model
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType
 
 /**
@@ -28,6 +29,7 @@ import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType
  * @param featureType type of feature -- categorical or continuous
  * @param categories accepted values for categorical variables
  */
+@DeveloperApi
 case class Split(
     feature: Int,
     threshold: Double,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
index 3b4652290bc9c..230c409e1be33 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.mllib.util
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.regression.LabeledPoint
@@ -26,6 +27,7 @@ import org.apache.spark.mllib.regression.LabeledPoint
  *
  * A collection of methods used to validate data before applying ML algorithms.
  */
+@DeveloperApi
 object DataValidators extends Logging {
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
index 267fb529ba47f..e693d13703987 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
@@ -19,6 +19,7 @@ package org.apache.spark.mllib.util
 
 import scala.util.Random
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 
@@ -29,7 +30,7 @@ import org.apache.spark.rdd.RDD
  * from a d-dimensional Gaussian distribution scaled by factor r and then creates a Gaussian
  * cluster with scale 1 around each center.
  */
-
+@DeveloperApi
 object KMeansDataGenerator {
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
index 1e237d02fa985..140ff92869176 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
@@ -22,6 +22,7 @@ import scala.util.Random
 
 import org.jblas.DoubleMatrix
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg.Vectors
@@ -34,6 +35,7 @@ import org.apache.spark.mllib.regression.LabeledPoint
  * uniformly random values for every feature and adds Gaussian noise with mean `eps` to the
  * response variable `Y`.
  */
+@DeveloperApi
 object LinearDataGenerator {
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
index 11bcea3565acd..ca06b9ad58538 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
@@ -19,6 +19,7 @@ package org.apache.spark.mllib.util
 
 import scala.util.Random
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.regression.LabeledPoint
@@ -30,6 +31,7 @@ import org.apache.spark.mllib.linalg.Vectors
  * Generate test data for LogisticRegression. This class chooses positive labels
  * with probability `probOne` and scales features for positive examples by `eps`.
  */
+@DeveloperApi
 object LogisticRegressionDataGenerator {
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
index 67dc1c6a33e28..3bd86d6813375 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
@@ -21,6 +21,7 @@ import scala.util.Random
 
 import org.jblas.DoubleMatrix
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 
@@ -49,6 +50,7 @@ import org.apache.spark.rdd.RDD
 *   test           (Boolean) Whether to create testing RDD.
 *   testSampFact   (Double) Percentage of training data to use as test data.
 */
+@DeveloperApi
 object MFDataGenerator {
   def main(args: Array[String]) {
     if (args.length < 2) {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
index 1f748031fdbcb..87a6f2a0c3976 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
@@ -21,6 +21,7 @@ import scala.util.Random
 
 import org.jblas.DoubleMatrix
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg.Vectors
@@ -32,6 +33,7 @@ import org.apache.spark.mllib.regression.LabeledPoint
  * Generate sample data used for SVM. This class generates uniform random values
  * for the features and adds Gaussian noise with weight 0.1 to generate labels.
  */
+@DeveloperApi
 object SVMDataGenerator {
 
   def main(args: Array[String]) {

From 6b9f8e2a6b0c931928f3cf6b5e1b209dc49cf1f2 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Wed, 9 Apr 2014 01:23:36 -0700
Subject: [PATCH 8/9] add Experimental annotation

---
 .../org/apache/spark/mllib/classification/NaiveBayes.scala     | 2 ++
 .../main/scala/org/apache/spark/mllib/clustering/KMeans.scala  | 2 ++
 .../main/scala/org/apache/spark/mllib/recommendation/ALS.scala | 2 ++
 .../spark/mllib/regression/GeneralizedLinearAlgorithm.scala    | 2 ++
 .../main/scala/org/apache/spark/mllib/tree/DecisionTree.scala  | 2 ++
 .../scala/org/apache/spark/mllib/tree/configuration/Algo.scala | 3 +++
 .../apache/spark/mllib/tree/configuration/FeatureType.scala    | 3 +++
 .../spark/mllib/tree/configuration/QuantileStrategy.scala      | 3 +++
 .../org/apache/spark/mllib/tree/configuration/Strategy.scala   | 2 ++
 .../scala/org/apache/spark/mllib/tree/impurity/Entropy.scala   | 3 ++-
 .../main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala | 3 ++-
 .../scala/org/apache/spark/mllib/tree/impurity/Impurity.scala  | 3 ++-
 .../scala/org/apache/spark/mllib/tree/impurity/Variance.scala  | 3 ++-
 .../org/apache/spark/mllib/tree/model/DecisionTreeModel.scala  | 2 ++
 mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala | 3 +++
 15 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index eaf2cfb8b7c12..5a45f12f1aa12 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -19,6 +19,7 @@ package org.apache.spark.mllib.classification
 
 import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, argmax => brzArgmax, sum => brzSum}
 
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.{Logging, SparkContext}
 import org.apache.spark.SparkContext._
 import org.apache.spark.mllib.linalg.Vector
@@ -36,6 +37,7 @@ import org.apache.spark.rdd.RDD
  * @param theta log of class conditional probabilities, whose dimension is C-by-D,
  *              where D is number of features
  */
+@Experimental
 class NaiveBayesModel(
     val labels: Array[Double],
     val pi: Array[Double],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index b0820819a945f..8f565eb60a60f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -21,6 +21,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import breeze.linalg.{DenseVector => BDV, Vector => BV, norm => breezeNorm}
 
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.{Logging, SparkContext}
 import org.apache.spark.SparkContext._
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
@@ -400,6 +401,7 @@ object KMeans {
   /**
    * :: Experimental ::
    */
+  @Experimental
   def main(args: Array[String]) {
     if (args.length < 4) {
       println("Usage: KMeans <master> <input_file> <k> <max_iterations> [<runs>]")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index 39d87e2061d79..60cbb1c1e1d86 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -25,6 +25,7 @@ import scala.util.Sorting
 import com.esotericsoftware.kryo.Kryo
 import org.jblas.{DoubleMatrix, SimpleBlas, Solve}
 
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.{Logging, HashPartitioner, Partitioner, SparkContext, SparkConf}
 import org.apache.spark.storage.StorageLevel
@@ -142,6 +143,7 @@ class ALS private (
    *
    * Sets the constant used in computing confidence in implicit ALS. Default: 1.0.
    */
+  @Experimental
   def setAlpha(alpha: Double): ALS = {
     this.alpha = alpha
     this
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
index 83c4b08006cfe..c24f5afb99686 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
@@ -19,6 +19,7 @@ package org.apache.spark.mllib.regression
 
 import breeze.linalg.{DenseVector => BDV, SparseVector => BSV}
 
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.{Logging, SparkException}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.optimization._
@@ -105,6 +106,7 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
    *
    * Set if the algorithm should validate data before training. Default true.
    */
+  @Experimental
   def setValidateData(validateData: Boolean): this.type = {
     this.validateData = validateData
     this
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
index 118f93b08cf07..c8a966cd5f5a8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
@@ -19,6 +19,7 @@ package org.apache.spark.mllib.tree
 
 import scala.util.control.Breaks._
 
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.{Logging, SparkContext}
 import org.apache.spark.SparkContext._
 import org.apache.spark.mllib.regression.LabeledPoint
@@ -41,6 +42,7 @@ import org.apache.spark.mllib.linalg.{Vector, Vectors}
  *                 of algorithm (classification, regression, etc.), feature type (continuous,
  *                 categorical), depth of the tree, quantile calculation strategy, etc.
  */
+@Experimental
 class DecisionTree (private val strategy: Strategy) extends Serializable with Logging {
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
index 8b907f3a1e97e..017f84f3b9e8b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
@@ -17,11 +17,14 @@
 
 package org.apache.spark.mllib.tree.configuration
 
+import org.apache.spark.annotation.Experimental
+
 /**
  * :: Experimental ::
  *
  * Enum to select the algorithm for the decision tree
  */
+@Experimental
 object Algo extends Enumeration {
   type Algo = Value
   val Classification, Regression = Value
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
index db1d4fbbf4c34..c0254c32c2dce 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
@@ -17,11 +17,14 @@
 
 package org.apache.spark.mllib.tree.configuration
 
+import org.apache.spark.annotation.Experimental
+
 /**
  * :: Experimental ::
  *
  * Enum to describe whether a feature is "continuous" or "categorical"
  */
+@Experimental
 object FeatureType extends Enumeration {
   type FeatureType = Value
   val Continuous, Categorical = Value
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
index 7c777207a747d..b3e8b224beeaa 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
@@ -17,11 +17,14 @@
 
 package org.apache.spark.mllib.tree.configuration
 
+import org.apache.spark.annotation.Experimental
+
 /**
  * :: Experimental ::
  *
  * Enum for selecting the quantile calculation strategy
  */
+@Experimental
 object QuantileStrategy extends Enumeration {
   type QuantileStrategy = Value
   val Sort, MinMax, ApproxHist = Value
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
index 6a4c54f0d6d12..482faaa9e7256 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.mllib.tree.configuration
 
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.mllib.tree.impurity.Impurity
 import org.apache.spark.mllib.tree.configuration.Algo._
 import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
@@ -36,6 +37,7 @@ import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
  *                                1, 2, ... , k-1. It's important to note that features are
  *                                zero-indexed.
  */
+@Experimental
 class Strategy (
     val algo: Algo,
     val impurity: Impurity,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
index 8e6e90eafafbb..55c43f2fcf9c5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.tree.impurity
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
 
 /**
  * :: Experimental ::
@@ -25,6 +25,7 @@ import org.apache.spark.annotation.DeveloperApi
  * Class for calculating [[http://en.wikipedia.org/wiki/Binary_entropy_function entropy]] during
  * binary classification.
  */
+@Experimental
 object Entropy extends Impurity {
 
   private[tree] def log2(x: Double) = scala.math.log(x) / scala.math.log(2)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
index 7c6fb0575b855..c923b8e8f4cf1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.tree.impurity
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
 
 /**
  * :: Experimental ::
@@ -26,6 +26,7 @@ import org.apache.spark.annotation.DeveloperApi
  * [[http://en.wikipedia.org/wiki/Decision_tree_learning#Gini_impurity Gini impurity]]
  * during binary classification.
  */
+@Experimental
 object Gini extends Impurity {
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
index 360123e025c2c..f407796596c6c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
@@ -17,13 +17,14 @@
 
 package org.apache.spark.mllib.tree.impurity
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
 
 /**
  * :: Experimental ::
  *
  * Trait for calculating information gain.
  */
+@Experimental
 trait Impurity extends Serializable {
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
index 93b49ec7f44ef..2c64644f4ed0f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
@@ -17,13 +17,14 @@
 
 package org.apache.spark.mllib.tree.impurity
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
 
 /**
  * :: Experimental ::
  *
  * Class for calculating variance during regression
  */
+@Experimental
 object Variance extends Impurity {
    override def calculate(c0: Double, c1: Double): Double =
      throw new UnsupportedOperationException("Variance.calculate")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
index ace83dd7fdc33..0f76f4a049057 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.mllib.tree.model
 
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.mllib.tree.configuration.Algo._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg.Vector
@@ -28,6 +29,7 @@ import org.apache.spark.mllib.linalg.Vector
  * @param topNode root node
  * @param algo algorithm type -- classification or regression
  */
+@Experimental
 class DecisionTreeModel(val topNode: Node, val algo: Algo) extends Serializable {
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index 65042b6feee56..7f9804deaf33f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -20,6 +20,7 @@ package org.apache.spark.mllib.util
 import breeze.linalg.{Vector => BV, DenseVector => BDV, SparseVector => BSV,
   squaredDistance => breezeSquaredDistance}
 
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.regression.LabeledPoint
@@ -133,6 +134,7 @@ object MLUtils {
    * @return An RDD of LabeledPoint. Each labeled point has two elements: the first element is
    *         the label, and the second element represents the feature values (an array of Double).
    */
+  @Experimental
   def loadLabeledData(sc: SparkContext, dir: String): RDD[LabeledPoint] = {
     sc.textFile(dir).map { line =>
       val parts = line.split(',')
@@ -152,6 +154,7 @@ object MLUtils {
    * @param data An RDD of LabeledPoints containing data to be saved.
    * @param dir Directory to save the data.
    */
+  @Experimental
   def saveLabeledData(data: RDD[LabeledPoint], dir: String) {
     val dataStr = data.map(x => x.label + "," + x.features.toArray.mkString(" "))
     dataStr.saveAsTextFile(dir)

From dc4cbb3d37a89d1e6499a86e0f7177f90c26b565 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Wed, 9 Apr 2014 01:28:26 -0700
Subject: [PATCH 9/9] mark distribute matrices experimental

---
 .../mllib/linalg/distributed/CoordinateMatrix.scala   |  4 ++++
 .../mllib/linalg/distributed/DistributedMatrix.scala  |  2 --
 .../mllib/linalg/distributed/IndexedRowMatrix.scala   | 11 ++++++++++-
 .../spark/mllib/linalg/distributed/RowMatrix.scala    |  4 ++++
 4 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
index 9194f657494b2..89d5c03d76c42 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
@@ -19,6 +19,7 @@ package org.apache.spark.mllib.linalg.distributed
 
 import breeze.linalg.{DenseMatrix => BDM}
 
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.rdd.RDD
 import org.apache.spark.SparkContext._
 import org.apache.spark.mllib.linalg.Vectors
@@ -32,6 +33,8 @@ import org.apache.spark.mllib.linalg.Vectors
 case class MatrixEntry(i: Long, j: Long, value: Double)
 
 /**
+ * :: Experimental ::
+ *
  * Represents a matrix in coordinate format.
  *
  * @param entries matrix entries
@@ -40,6 +43,7 @@ case class MatrixEntry(i: Long, j: Long, value: Double)
  * @param nCols number of columns. A non-positive value means unknown, and then the number of
  *              columns will be determined by the max column index plus one.
  */
+@Experimental
 class CoordinateMatrix(
     val entries: RDD[MatrixEntry],
     private var nRows: Long,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala
index 13f72a3c724ef..a0e26ce3bc465 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala
@@ -19,8 +19,6 @@ package org.apache.spark.mllib.linalg.distributed
 
 import breeze.linalg.{DenseMatrix => BDM}
 
-import org.apache.spark.mllib.linalg.Matrix
-
 /**
  * Represents a distributively stored matrix backed by one or more RDDs.
  */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
index e110f070bd7c1..24c123ab7eb51 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
@@ -19,14 +19,22 @@ package org.apache.spark.mllib.linalg.distributed
 
 import breeze.linalg.{DenseMatrix => BDM}
 
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg._
 import org.apache.spark.mllib.linalg.SingularValueDecomposition
 
-/** Represents a row of [[org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix]]. */
+/**
+ * :: Experimental ::
+ *
+ * Represents a row of [[org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix]].
+ */
+@Experimental
 case class IndexedRow(index: Long, vector: Vector)
 
 /**
+ * :: Experimental ::
+ *
  * Represents a row-oriented [[org.apache.spark.mllib.linalg.distributed.DistributedMatrix]] with
  * indexed rows.
  *
@@ -36,6 +44,7 @@ case class IndexedRow(index: Long, vector: Vector)
  * @param nCols number of columns. A non-positive value means unknown, and then the number of
  *              columns will be determined by the size of the first row.
  */
+@Experimental
 class IndexedRowMatrix(
     val rows: RDD[IndexedRow],
     private var nRows: Long,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index f59811f18a68f..8d32c1a6dbba1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -23,11 +23,14 @@ import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, svd => brzSvd}
 import breeze.numerics.{sqrt => brzSqrt}
 import com.github.fommil.netlib.BLAS.{getInstance => blas}
 
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.mllib.linalg._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.Logging
 
 /**
+ * :: Experimental ::
+ *
  * Represents a row-oriented distributed Matrix with no meaningful row indices.
  *
  * @param rows rows stored as an RDD[Vector]
@@ -36,6 +39,7 @@ import org.apache.spark.Logging
  * @param nCols number of columns. A non-positive value means unknown, and then the number of
  *              columns will be determined by the size of the first row.
  */
+@Experimental
 class RowMatrix(
     val rows: RDD[Vector],
     private var nRows: Long,