@@ -1936,7 +1936,7 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Ja
19361936 ... key=lambda x: x[0])
19371937 [(0, 'a'), (1, 'b'), (2, 'c'), (3, 'a'), (4, 'a'), (5, 'c')]
19381938 >>> testData2 = sc.parallelize([Row(id=0, label="a"), Row(id=1, label="d"),
1939- ... Row(id=2, label="e" )], 2)
1939+ ... Row(id=2, label=None )], 2)
19401940 >>> dfKeep= spark.createDataFrame(testData2)
19411941 >>> modelKeep = stringIndexer.setHandleInvalid("keep").fit(stringIndDf)
19421942 >>> tdK = modelKeep.transform(dfKeep)
@@ -1962,10 +1962,10 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Ja
19621962 .. versionadded:: 1.4.0
19631963 """
19641964
1965- handleInvalid = Param (Params ._dummy (), "handleInvalid" , "how to handle unseen labels. " +
1966- "Options are 'skip' (filter out rows with unseen labels), " +
1967- "error (throw an error), or 'keep' (put unseen labels in a special " +
1968- "additional bucket, at index numLabels)." ,
1965+ handleInvalid = Param (Params ._dummy (), "handleInvalid" , "how to handle invalid data (unseen " +
1966+ "labels or NULL values). Options are 'skip' (filter out rows with " +
1967+ "invalid data), error (throw an error), or 'keep' (put invalid data " +
1968+ "in a special additional bucket, at index numLabels)." ,
19691969 typeConverter = TypeConverters .toString )
19701970
19711971 @keyword_only
0 commit comments