Skip to content

Commit daf88a6

Browse files
committed
modification according to feedback
1 parent a73fa19 commit daf88a6

File tree

5 files changed

+17
-198
lines changed

5 files changed

+17
-198
lines changed

mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, Statistics}
4444
import org.apache.spark.mllib.stat.correlation.CorrelationNames
4545
import org.apache.spark.mllib.util.MLUtils
4646
import org.apache.spark.rdd.RDD
47+
import org.apache.spark.storage.StorageLevel
4748
import org.apache.spark.util.Utils
4849

4950
/**
@@ -304,14 +305,15 @@ class PythonMLLibAPI extends Serializable {
304305
numPartitions: Int,
305306
numIterations: Int,
306307
seed: Long): Word2VecModelWrapper = {
307-
val data = dataJRDD.rdd.cache()
308+
val data = dataJRDD.rdd.persist(StorageLevel.MEMORY_AND_DISK_SER)
308309
val word2vec = new Word2Vec()
309-
.setVectorSize(vectorSize)
310-
.setLearningRate(learningRate)
311-
.setNumPartitions(numPartitions)
312-
.setNumIterations(numIterations)
313-
.setSeed(seed)
310+
.setVectorSize(vectorSize)
311+
.setLearningRate(learningRate)
312+
.setNumPartitions(numPartitions)
313+
.setNumIterations(numIterations)
314+
.setSeed(seed)
314315
val model = word2vec.fit(data)
316+
data.unpersist()
315317
new Word2VecModelWrapper(model)
316318
}
317319

python/docs/pyspark.mllib.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@ pyspark.mllib.clustering module
2020
:undoc-members:
2121
:show-inheritance:
2222

23+
pyspark.mllib.feature module
24+
-------------------------------
25+
26+
.. automodule:: pyspark.mllib.feature
27+
:members:
28+
:undoc-members:
29+
:show-inheritance:
30+
2331
pyspark.mllib.linalg module
2432
---------------------------
2533

python/pyspark/mllib/Word2Vec.py

Lines changed: 0 additions & 192 deletions
This file was deleted.

python/pyspark/mllib/feature.py

Whitespace-only changes.

python/run-tests

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ run_test "pyspark/mllib/stat.py"
8383
run_test "pyspark/mllib/tests.py"
8484
run_test "pyspark/mllib/tree.py"
8585
run_test "pyspark/mllib/util.py"
86+
run-test "pyspark/mllib/feature.py"
8687

8788
# Try to test with PyPy
8889
if [ $(which pypy) ]; then

0 commit comments

Comments
 (0)