Skip to content

Commit 44f0a36

Browse files
author
Wayne Zhang
committed
add tests
1 parent 1f336ab commit 44f0a36

File tree

1 file changed

+12
-3
lines changed

1 file changed

+12
-3
lines changed

python/pyspark/ml/feature.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2082,8 +2082,9 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid,
20822082
"""
20832083
A label indexer that maps a string column of labels to an ML column of label indices.
20842084
If the input column is numeric, we cast it to string and index the string values.
2085-
The indices are in [0, numLabels), ordered by label frequencies.
2086-
So the most frequent label gets index 0.
2085+
The indices are in [0, numLabels). By default, this is ordered by label frequencies
2086+
so the most frequent label gets index 0. The ordering behavior is controlled by
2087+
setting stringOrderType.
20872088
20882089
>>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed", handleInvalid='error')
20892090
>>> model = stringIndexer.fit(stringIndDf)
@@ -2111,14 +2112,22 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid,
21112112
>>> loadedInverter = IndexToString.load(indexToStringPath)
21122113
>>> loadedInverter.getLabels() == inverter.getLabels()
21132114
True
2115+
>>> stringIndexer.getStringOrderType()
2116+
'frequencyDesc'
2117+
>>> stringIndexer.setStringOrderType("alphabetDesc")
2118+
>>> model = stringIndexer.fit(stringIndDf)
2119+
>>> td = model.transform(stringIndDf)
2120+
>>> sorted(set([(i[0], i[1]) for i in td.select(td.id, td.indexed).collect()]),
2121+
... key=lambda x: x[0])
2122+
[(0, 2.0), (1, 1.0), (2, 0.0), (3, 2.0), (4, 2.0), (5, 0.0)]
21142123
21152124
.. versionadded:: 1.4.0
21162125
"""
21172126

21182127
stringOrderType = Param(Params._dummy(), "stringOrderType",
21192128
"How to order labels of string column. The first label after " +
21202129
"ordering is assigned an index of 0. Supported options: " +
2121-
"frequencyDesc, frequencyAsc, alphabetDsec, alphabetAsc.",
2130+
"frequencyDesc, frequencyAsc, alphabetDesc, alphabetAsc.",
21222131
typeConverter=TypeConverters.toString)
21232132

21242133
@keyword_only

0 commit comments

Comments
 (0)