Skip to content

Commit 58dfba6

Browse files
committed
[SPARK-14665][ML][PYTHON] Fixed bug with StopWordsRemover default stopwords
The default stopwords were a Java object. They are no longer. Unit test which failed before the fix Author: Joseph K. Bradley <[email protected]> Closes #12422 from jkbradley/pyspark-stopwords. (cherry picked from commit d6ae7d4) Signed-off-by: Joseph K. Bradley <[email protected]> Conflicts: python/pyspark/ml/feature.py python/pyspark/ml/tests.py
1 parent 93c9a63 commit 58dfba6

File tree

2 files changed

+6
-1
lines changed

2 files changed

+6
-1
lines changed

python/pyspark/ml/feature.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1402,7 +1402,7 @@ def __init__(self, inputCol=None, outputCol=None, stopWords=None,
14021402
self.caseSensitive = Param(self, "caseSensitive", "whether to do a case " +
14031403
"sensitive comparison over the stop words")
14041404
stopWordsObj = _jvm().org.apache.spark.ml.feature.StopWords
1405-
defaultStopWords = stopWordsObj.English()
1405+
defaultStopWords = list(stopWordsObj.English())
14061406
self._setDefault(stopWords=defaultStopWords)
14071407
kwargs = self.__init__._input_kwargs
14081408
self.setParams(**kwargs)

python/pyspark/ml/tests.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
"""
2121

2222
import sys
23+
if sys.version > '3':
24+
basestring = str
25+
2326
try:
2427
import xmlrunner
2528
except ImportError:
@@ -283,6 +286,8 @@ def test_stopwordsremover(self):
283286
self.assertEqual(stopWordRemover.getInputCol(), "input")
284287
transformedDF = stopWordRemover.transform(dataset)
285288
self.assertEqual(transformedDF.head().output, ["panda"])
289+
self.assertEqual(type(stopWordRemover.getStopWords()), list)
290+
self.assertTrue(isinstance(stopWordRemover.getStopWords()[0], basestring))
286291
# Custom
287292
stopwords = ["panda"]
288293
stopWordRemover.setStopWords(stopwords)

0 commit comments

Comments
 (0)