Skip to content

Commit 061780c

Browse files
author
Feynman Liang
committed
Code review cleanup
1 parent 3be2947 commit 061780c

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,10 @@ class LocalLDAModel private[clustering] (
295295
}
296296

297297
/**
298-
* Predicts the topic mixture distribution ("gamma") for a document.
298+
* Predicts the topic mixture distribution ("gamma") for a document. Returns a vector of zeros for
299+
* an empty document.
300+
* @param documents documents to predict topic mixture distributions for
301+
* @return topic mixture distributions for each document
299302
*/
300303
// TODO: declare in LDAModel and override once implemented in DistributedLDAModel
301304
def topicDistributions(documents: RDD[(Long, Vector)]): RDD[(Long, Vector)] = {
@@ -307,13 +310,14 @@ class LocalLDAModel private[clustering] (
307310
val k = this.k
308311

309312
documents.map { doc =>
313+
if (doc._2.size == 0) (doc._1, Vectors.zeros(k))
310314
val (gamma, _) = OnlineLDAOptimizer.variationalTopicInference(
311315
doc._2,
312316
expElogbeta,
313317
topicConcentrationBrz,
314318
gammaShape,
315319
k)
316-
(doc._1, Vectors.dense((gamma / sum(gamma)).toArray))
320+
(doc._1, Vectors.dense(normalize(gamma, 1.0).toArray))
317321
}
318322
}
319323

0 commit comments

Comments
 (0)