Skip to content

Commit 1d2f28f

Browse files
author
VinceShieh
committed
include changes made by SPARK-11569
Signed-off-by: VinceShieh <[email protected]>
1 parent b4bb765 commit 1d2f28f

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

python/pyspark/ml/feature.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1936,7 +1936,7 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Ja
19361936
... key=lambda x: x[0])
19371937
[(0, 'a'), (1, 'b'), (2, 'c'), (3, 'a'), (4, 'a'), (5, 'c')]
19381938
>>> testData2 = sc.parallelize([Row(id=0, label="a"), Row(id=1, label="d"),
1939-
... Row(id=2, label="e")], 2)
1939+
... Row(id=2, label=None)], 2)
19401940
>>> dfKeep= spark.createDataFrame(testData2)
19411941
>>> modelKeep = stringIndexer.setHandleInvalid("keep").fit(stringIndDf)
19421942
>>> tdK = modelKeep.transform(dfKeep)
@@ -1962,10 +1962,10 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Ja
19621962
.. versionadded:: 1.4.0
19631963
"""
19641964

1965-
handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle unseen labels. " +
1966-
"Options are 'skip' (filter out rows with unseen labels), " +
1967-
"error (throw an error), or 'keep' (put unseen labels in a special " +
1968-
"additional bucket, at index numLabels).",
1965+
handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid data (unseen " +
1966+
"labels or NULL values). Options are 'skip' (filter out rows with " +
1967+
"invalid data), error (throw an error), or 'keep' (put invalid data " +
1968+
"in a special additional bucket, at index numLabels).",
19691969
typeConverter=TypeConverters.toString)
19701970

19711971
@keyword_only

0 commit comments

Comments
 (0)