@@ -37,6 +37,10 @@ import org.apache.spark.util.Utils
3737 * - "token": instance of a term appearing in a document
3838 * - "topic": multinomial distribution over words representing some concept
3939 *
40+ * References:
41+ * - Original LDA paper (journal version):
42+ * Blei, Ng, and Jordan. "Latent Dirichlet Allocation." JMLR, 2003.
43+ *
4044 * @see [[http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation Latent Dirichlet allocation
4145 * (Wikipedia)]]
4246 */
@@ -47,12 +51,11 @@ class LDA private (
4751 private var docConcentration : Double ,
4852 private var topicConcentration : Double ,
4953 private var seed : Long ,
50- private var checkpointInterval : Int ) extends Logging {
54+ private var checkpointInterval : Int ,
55+ private var ldaOptimizer : LDAOptimizer ) extends Logging {
5156
5257 def this () = this (k = 10 , maxIterations = 20 , docConcentration = - 1 , topicConcentration = - 1 ,
53- seed = Utils .random.nextLong(), checkpointInterval = 10 )
54-
55- private var ldaOptimizer : LDAOptimizer = getDefaultOptimizer(" EM" )
58+ seed = Utils .random.nextLong(), checkpointInterval = 10 , ldaOptimizer = new EMLDAOptimizer )
5659
5760 /**
5861 * Number of topics to infer. I.e., the number of soft cluster centers.
@@ -208,7 +211,7 @@ class LDA private (
208211
209212
210213 /** LDAOptimizer used to perform the actual calculation */
211- def getOptimizer () : LDAOptimizer = ldaOptimizer
214+ def getOptimizer : LDAOptimizer = ldaOptimizer
212215
213216 /**
214217 * LDAOptimizer used to perform the actual calculation (default = EMLDAOptimizer)
@@ -220,24 +223,18 @@ class LDA private (
220223
221224 /**
222225 * Set the LDAOptimizer used to perform the actual calculation by algorithm name.
223- * Currently "EM " is supported.
226+ * Currently "em " is supported.
224227 */
225228 def setOptimizer (optimizerName : String ): this .type = {
226- this .ldaOptimizer = getDefaultOptimizer(optimizerName)
229+ this .ldaOptimizer =
230+ optimizerName.toLowerCase match {
231+ case " em" => new EMLDAOptimizer
232+ case other =>
233+ throw new IllegalArgumentException (s " Only em is supported but got $other. " )
234+ }
227235 this
228236 }
229237
230- /**
231- * Get the default optimizer from String parameter.
232- */
233- private def getDefaultOptimizer (optimizerName : String ): LDAOptimizer = {
234- optimizerName match {
235- case " EM" => new EMLDAOptimizer ()
236- case other =>
237- throw new UnsupportedOperationException (s " Only EM are supported but got $other. " )
238- }
239- }
240-
241238 /**
242239 * Learn an LDA model using the given dataset.
243240 *
0 commit comments