Rename topicDistribution -> topicDistributions

Feynman Liang · Feynman Liang · commit 3be29479140d · 2015-07-29T20:37:45.000-07:00
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -221,9 +221,6 @@ class LocalLDAModel private[clustering] (
   // TODO
   // override def logLikelihood(documents: RDD[(Long, Vector)]): Double = ???
 
-  // TODO:
-  // override def topicDistributions(documents: RDD[(Long, Vector)]): RDD[(Long, Vector)] = ???
-
   /**
    * Calculate the log variational bound on perplexity. See Equation (16) in original Online
    * LDA paper.
@@ -300,7 +297,8 @@ class LocalLDAModel private[clustering] (
   /**
    * Predicts the topic mixture distribution ("gamma") for a document.
    */
-  def topicDistribution(documents: RDD[(Long, Vector)]): RDD[(Long, Vector)] = {
+  // TODO: declare in LDAModel and override once implemented in DistributedLDAModel
+  def topicDistributions(documents: RDD[(Long, Vector)]): RDD[(Long, Vector)] = {
     // Double transpose because dirichletExpectation normalizes by row and we need to normalize
     // by topic (columns of lambda)
     val expElogbeta = exp(LDAUtils.dirichletExpectation(topicsMatrix.toBreeze.toDenseMatrix.t).t)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
@@ -333,7 +333,7 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext {
       (1, 0.99504), (1, 0.99504))
 
     expectedPredictions.zip(
-      ldaModel.topicDistribution(docs).map { case (_, topics) =>
+      ldaModel.topicDistributions(docs).map { case (_, topics) =>
         // convert results to expectedPredictions format, which only has highest probability topic
         val topicsBz = topics.toBreeze.toDenseVector
         (argmax(topicsBz), max(topicsBz))