File tree Expand file tree Collapse file tree 2 files changed +3
-5
lines changed
main/scala/org/apache/spark/mllib/clustering
test/scala/org/apache/spark/mllib/clustering Expand file tree Collapse file tree 2 files changed +3
-5
lines changed Original file line number Diff line number Diff line change @@ -221,9 +221,6 @@ class LocalLDAModel private[clustering] (
221221 // TODO
222222 // override def logLikelihood(documents: RDD[(Long, Vector)]): Double = ???
223223
224- // TODO:
225- // override def topicDistributions(documents: RDD[(Long, Vector)]): RDD[(Long, Vector)] = ???
226-
227224 /**
228225 * Calculate the log variational bound on perplexity. See Equation (16) in original Online
229226 * LDA paper.
@@ -300,7 +297,8 @@ class LocalLDAModel private[clustering] (
300297 /**
301298 * Predicts the topic mixture distribution ("gamma") for a document.
302299 */
303- def topicDistribution (documents : RDD [(Long , Vector )]): RDD [(Long , Vector )] = {
300+ // TODO: declare in LDAModel and override once implemented in DistributedLDAModel
301+ def topicDistributions (documents : RDD [(Long , Vector )]): RDD [(Long , Vector )] = {
304302 // Double transpose because dirichletExpectation normalizes by row and we need to normalize
305303 // by topic (columns of lambda)
306304 val expElogbeta = exp(LDAUtils .dirichletExpectation(topicsMatrix.toBreeze.toDenseMatrix.t).t)
Original file line number Diff line number Diff line change @@ -333,7 +333,7 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext {
333333 (1 , 0.99504 ), (1 , 0.99504 ))
334334
335335 expectedPredictions.zip(
336- ldaModel.topicDistribution (docs).map { case (_, topics) =>
336+ ldaModel.topicDistributions (docs).map { case (_, topics) =>
337337 // convert results to expectedPredictions format, which only has highest probability topic
338338 val topicsBz = topics.toBreeze.toDenseVector
339339 (argmax(topicsBz), max(topicsBz))
You can’t perform that action at this time.
0 commit comments