Skip to content

Commit 789342f

Browse files
committed
add new tests for stopwords
1 parent 55191ce commit 789342f

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

python/pyspark/ml/tests.py

100644100755
Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,13 +336,20 @@ def test_stopwordsremover(self):
336336
self.assertEqual(stopWordRemover.getInputCol(), "input")
337337
transformedDF = stopWordRemover.transform(dataset)
338338
self.assertEqual(transformedDF.head().output, ["panda"])
339-
# Custom
339+
# with particular stop words list
340340
stopwords = ["panda"]
341341
stopWordRemover.setStopWords(stopwords)
342342
self.assertEqual(stopWordRemover.getInputCol(), "input")
343343
self.assertEqual(stopWordRemover.getStopWords(), stopwords)
344344
transformedDF = stopWordRemover.transform(dataset)
345345
self.assertEqual(transformedDF.head().output, ["a"])
346+
# with language selection
347+
stopwords = StopWordsRemover.loadStopWords("turkish")
348+
dataset = sqlContext.createDataFrame([Row(input=["acaba", "ama", "biri"])])
349+
stopWordRemover.setStopWords(stopwords)
350+
self.assertEqual(stopWordRemover.getStopWords(), stopwords)
351+
transformedDF = stopWordRemover.transform(dataset)
352+
self.assertEqual(transformedDF.head().output, [])
346353

347354

348355
class HasInducedError(Params):

0 commit comments

Comments
 (0)