File tree Expand file tree Collapse file tree 1 file changed +6
-2
lines changed
mllib/src/main/scala/org/apache/spark/mllib/clustering Expand file tree Collapse file tree 1 file changed +6
-2
lines changed Original file line number Diff line number Diff line change @@ -295,7 +295,10 @@ class LocalLDAModel private[clustering] (
295295 }
296296
297297 /**
298- * Predicts the topic mixture distribution ("gamma") for a document.
298+ * Predicts the topic mixture distribution ("gamma") for a document. Returns a vector of zeros for
299+ * an empty document.
300+ * @param documents documents to predict topic mixture distributions for
301+ * @return topic mixture distributions for each document
299302 */
300303 // TODO: declare in LDAModel and override once implemented in DistributedLDAModel
301304 def topicDistributions (documents : RDD [(Long , Vector )]): RDD [(Long , Vector )] = {
@@ -307,13 +310,14 @@ class LocalLDAModel private[clustering] (
307310 val k = this .k
308311
309312 documents.map { doc =>
313+ if (doc._2.size == 0 ) (doc._1, Vectors .zeros(k))
310314 val (gamma, _) = OnlineLDAOptimizer .variationalTopicInference(
311315 doc._2,
312316 expElogbeta,
313317 topicConcentrationBrz,
314318 gammaShape,
315319 k)
316- (doc._1, Vectors .dense((gamma / sum(gamma) ).toArray))
320+ (doc._1, Vectors .dense(normalize (gamma, 1.0 ).toArray))
317321 }
318322 }
319323
You can’t perform that action at this time.
0 commit comments