@@ -71,7 +71,8 @@ class Word2Vec extends Serializable with Logging {
7171 private var numPartitions = 1
7272 private var numIterations = 1
7373 private var seed = Utils .random.nextLong()
74-
74+ private var minCount = 5
75+
7576 /**
7677 * Sets vector size (default: 100).
7778 */
@@ -114,20 +115,6 @@ class Word2Vec extends Serializable with Logging {
114115 this
115116 }
116117
117- private val EXP_TABLE_SIZE = 1000
118- private val MAX_EXP = 6
119- private val MAX_CODE_LENGTH = 40
120- private val MAX_SENTENCE_LENGTH = 1000
121-
122- /** context words from [-window, window] */
123- private val window = 5
124-
125- /**
126- * The minimum number of times a token must occur in the training corpus to be
127- * included in the word2vec model (default: 5).
128- */
129- private var minCount = 5
130-
131118 /**
132119 * Sets minCount, the minimum number of times a token must appear to be included in the word2vec
133120 * model's vocabulary (default: 5).
@@ -136,6 +123,14 @@ class Word2Vec extends Serializable with Logging {
136123 this .minCount = minCount
137124 this
138125 }
126+
127+ private val EXP_TABLE_SIZE = 1000
128+ private val MAX_EXP = 6
129+ private val MAX_CODE_LENGTH = 40
130+ private val MAX_SENTENCE_LENGTH = 1000
131+
132+ /** context words from [-window, window] */
133+ private val window = 5
139134
140135 private var trainWordsCount = 0
141136 private var vocabSize = 0
0 commit comments