LDA should support disable checkpoint

yanboliang · yanboliang · commit a5c03435a206 · 2016-03-31T22:55:09.000+08:00
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -142,9 +142,11 @@ final class EMLDAOptimizer extends LDAOptimizer {
     this.k = k
     this.vocabSize = docs.take(1).head._2.size
     this.checkpointInterval = lda.getCheckpointInterval
-    this.graphCheckpointer = new PeriodicGraphCheckpointer[TopicCounts, TokenCount](
-      checkpointInterval, graph.vertices.sparkContext)
-    this.graphCheckpointer.update(this.graph)
+    if (this.checkpointInterval != -1) {
+      this.graphCheckpointer = new PeriodicGraphCheckpointer[TopicCounts, TokenCount](
+        checkpointInterval, graph.vertices.sparkContext)
+      this.graphCheckpointer.update(this.graph)
+    }
     this.globalTopicTotals = computeGlobalTopicTotals()
     this
   }
@@ -189,7 +191,9 @@ final class EMLDAOptimizer extends LDAOptimizer {
     // Update the vertex descriptors with the new counts.
     val newGraph = Graph(docTopicDistributions, graph.edges)
     graph = newGraph
-    graphCheckpointer.update(newGraph)
+    if (this.checkpointInterval != -1) {
+      graphCheckpointer.update(newGraph)
+    }
     globalTopicTotals = computeGlobalTopicTotals()
     this
   }
@@ -208,7 +212,9 @@ final class EMLDAOptimizer extends LDAOptimizer {
 
   override private[clustering] def getLDAModel(iterationTimes: Array[Double]): LDAModel = {
     require(graph != null, "graph is null, EMLDAOptimizer not initialized.")
-    this.graphCheckpointer.deleteAllCheckpoints()
+    if (this.checkpointInterval != -1) {
+      this.graphCheckpointer.deleteAllCheckpoints()
+    }
     // The constructor's default arguments assume gammaShape = 100 to ensure equivalence in
     // LDAModel.toLocal conversion
     new DistributedLDAModel(this.graph, this.globalTopicTotals, this.k, this.vocabSize,