Add MimaExcludes and docs.

yanboliang · yanboliang · commit 2824d856ea27 · 2017-03-01T03:40:08.000-08:00
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -400,6 +400,7 @@ object KMeans extends MLReadable[KMeans] {
 
   /** [[MLWriter]] instance for [[KMeans]] */
   private[KMeans] class KMeansWriter(instance: KMeans) extends MLWriter {
+
     override protected def saveImpl(path: String): Unit = {
       DefaultParamsWriter.saveInitialModel(instance, path)
       DefaultParamsWriter.saveMetadata(instance, path, sc)
@@ -408,11 +409,8 @@ object KMeans extends MLReadable[KMeans] {
 
   private class KMeansReader extends MLReader[KMeans] {
 
-    /** Checked against metadata when loading estimator */
-    private val className = classOf[KMeans].getName
-
     override def load(path: String): KMeans = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sc, classOf[KMeans].getName)
       val instance = new KMeans(metadata.uid)
 
       DefaultParamsReader.getAndSetParams(instance, metadata)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
@@ -280,7 +280,7 @@ private[ml] object DefaultParamsWriter {
    * Helper for [[saveMetadata()]] which extracts the JSON to save.
    * This is useful for ensemble models which need to save metadata for many sub-models.
    *
-   * Note: This function does not handle param `initialModel`.
+   * Note: This function does not handle param `initialModel`, see [[saveInitialModel()]].
    *
    * @see [[saveMetadata()]] for details on what this includes.
    */
@@ -311,6 +311,9 @@ private[ml] object DefaultParamsWriter {
     metadataJson
   }
 
+  /**
+   * Save estimator's `initialModel` to corresponding path.
+   */
   def saveInitialModel[T <: HasInitialModel[_ <: MLWritable with Params]](
       instance: T, path: String): Unit = {
     if (instance.isDefined(instance.initialModel)) {
@@ -453,6 +456,12 @@ private[ml] object DefaultParamsReader {
     cls.getMethod("read").invoke(null).asInstanceOf[MLReader[T]].load(path)
   }
 
+  /**
+   * Load estimator's `initialModel` instance from the given path, and return it.
+   * If the `initialModel` path does not exist, it means the estimator does not have or
+   * set param `initialModel`, then return None.
+   * This assumes the model implements [[MLReadable]].
+   */
   def loadInitialModel[M <: Model[M]](path: String, sc: SparkContext): Option[M] = {
     val hadoopConf = sc.hadoopConfiguration
     val initialModelPath = new Path(path, "initialModel")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/DefaultReadWriteTest.scala b/mllib/src/test/scala/org/apache/spark/ml/util/DefaultReadWriteTest.scala
@@ -114,6 +114,8 @@ trait DefaultReadWriteTest extends TempDirectory { self: Suite =>
     testParams.foreach { case (p, v) =>
       val param = estimator.getParam(p)
       if (param.name == "initialModel") {
+        // Estimator's `initialModel` has same type as the model produced by this estimator.
+        // So we can use `checkModelData` to check equality of `initialModel` as well.
         checkModelData(estimator.get(param).get.asInstanceOf[M],
           estimator2.get(param).get.asInstanceOf[M])
       } else {
@@ -123,6 +125,7 @@ trait DefaultReadWriteTest extends TempDirectory { self: Suite =>
 
     // Test Model save/load
     val model2 = testDefaultReadWrite(model)
+    // Model does not extend HasInitialModel, so we don't check it.
     testParams.filter(_._1 != "initialModel").foreach { case (p, v) =>
       val param = model.getParam(p)
       assert(model.get(param).get === model2.get(param).get)
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
@@ -948,6 +948,9 @@ object MimaExcludes {
       ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setFeatureSubsetStrategy"),
       ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.numTrees"),
       ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setFeatureSubsetStrategy")
+    ) ++ Seq(
+      ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.ml.clustering.KMeans$"),
+      ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.ml.clustering.KMeans")
     )
   }
 

Original file line number	Diff line number	Diff line change
`@@ -948,6 +948,9 @@ object MimaExcludes {`
`948`	`948`	`ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setFeatureSubsetStrategy"),`
`949`	`949`	`ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.numTrees"),`
`950`	`950`	`ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setFeatureSubsetStrategy")`
	`951`	`+ ) ++ Seq(`
	`952`	`+ ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.ml.clustering.KMeans$"),`
	`953`	`+ ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.ml.clustering.KMeans")`
`951`	`954`	`)`
`952`	`955`	`}`
`953`	`956`