@@ -352,7 +352,7 @@ class CountVectorizerModel(JavaModel, JavaMLReadable, JavaMLWritable):
352352 """
353353 .. note:: Experimental
354354
355- Model fitted by CountVectorizer.
355+ Model fitted by :py:class:` CountVectorizer` .
356356
357357 .. versionadded:: 1.6.0
358358 """
@@ -609,7 +609,7 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab
609609 """
610610
611611 minDocFreq = Param (Params ._dummy (), "minDocFreq" ,
612- "minimum of documents in which a term should appear for filtering" ,
612+ "minimum number of documents in which a term should appear for filtering" ,
613613 typeConverter = TypeConverters .toInt )
614614
615615 @keyword_only
@@ -655,7 +655,7 @@ class IDFModel(JavaModel, JavaMLReadable, JavaMLWritable):
655655 """
656656 .. note:: Experimental
657657
658- Model fitted by IDF.
658+ Model fitted by :py:class:` IDF` .
659659
660660 .. versionadded:: 1.4.0
661661 """
@@ -1302,7 +1302,8 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
13021302
13031303 minTokenLength = Param (Params ._dummy (), "minTokenLength" , "minimum token length (>= 0)" ,
13041304 typeConverter = TypeConverters .toInt )
1305- gaps = Param (Params ._dummy (), "gaps" , "whether regex splits on gaps (True) or matches tokens" )
1305+ gaps = Param (Params ._dummy (), "gaps" , "whether regex splits on gaps (True) or matches tokens " +
1306+ "(False)" )
13061307 pattern = Param (Params ._dummy (), "pattern" , "regex pattern (Java dialect) used for tokenizing" ,
13071308 typeConverter = TypeConverters .toString )
13081309 toLowercase = Param (Params ._dummy (), "toLowercase" , "whether to convert all characters to " +
@@ -1549,7 +1550,7 @@ class StandardScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
15491550 """
15501551 .. note:: Experimental
15511552
1552- Model fitted by StandardScaler.
1553+ Model fitted by :py:class:` StandardScaler` .
15531554
15541555 .. versionadded:: 1.4.0
15551556 """
@@ -1641,7 +1642,7 @@ class StringIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable):
16411642 """
16421643 .. note:: Experimental
16431644
1644- Model fitted by StringIndexer.
1645+ Model fitted by :py:class:` StringIndexer` .
16451646
16461647 .. versionadded:: 1.4.0
16471648 """
@@ -1907,7 +1908,7 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Ja
19071908 """
19081909 .. note:: Experimental
19091910
1910- Class for indexing categorical feature columns in a dataset of [[ Vector]] .
1911+ Class for indexing categorical feature columns in a dataset of ` Vector` .
19111912
19121913 This has 2 usage modes:
19131914 - Automatically identify categorical features (default behavior)
@@ -2023,7 +2024,17 @@ class VectorIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable):
20232024 """
20242025 .. note:: Experimental
20252026
2026- Model fitted by VectorIndexer.
2027+ Model fitted by :py:class:`VectorIndexer`.
2028+
2029+ Transform categorical features to use 0-based indices instead of their original values.
2030+ - Categorical features are mapped to indices.
2031+ - Continuous features (columns) are left unchanged.
2032+
2033+ This also appends metadata to the output column, marking features as Numeric (continuous),
2034+ Nominal (categorical), or Binary (either continuous or categorical).
2035+ Non-ML metadata is not carried over from the input to the output column.
2036+
2037+ This maintains vector sparsity.
20272038
20282039 .. versionadded:: 1.4.0
20292040 """
@@ -2296,7 +2307,7 @@ class Word2VecModel(JavaModel, JavaMLReadable, JavaMLWritable):
22962307 """
22972308 .. note:: Experimental
22982309
2299- Model fitted by Word2Vec.
2310+ Model fitted by :py:class:` Word2Vec` .
23002311
23012312 .. versionadded:: 1.4.0
23022313 """
@@ -2327,7 +2338,8 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab
23272338 """
23282339 .. note:: Experimental
23292340
2330- PCA trains a model to project vectors to a low-dimensional space using PCA.
2341+ PCA trains a model to project vectors to a lower dimensional space of the
2342+ top :py:attr:`k` principal components.
23312343
23322344 >>> from pyspark.ml.linalg import Vectors
23332345 >>> data = [(Vectors.sparse(5, [(1, 1.0), (3, 7.0)]),),
@@ -2401,7 +2413,7 @@ class PCAModel(JavaModel, JavaMLReadable, JavaMLWritable):
24012413 """
24022414 .. note:: Experimental
24032415
2404- Model fitted by PCA.
2416+ Model fitted by :py:class:` PCA`. Transforms vectors to a lower dimensional space .
24052417
24062418 .. versionadded:: 1.5.0
24072419 """
@@ -2532,7 +2544,8 @@ class RFormulaModel(JavaModel, JavaMLReadable, JavaMLWritable):
25322544 """
25332545 .. note:: Experimental
25342546
2535- Model fitted by :py:class:`RFormula`.
2547+ Model fitted by :py:class:`RFormula`. Fitting is required to determine the
2548+ factor levels of formula terms.
25362549
25372550 .. versionadded:: 1.5.0
25382551 """
@@ -2624,7 +2637,7 @@ class ChiSqSelectorModel(JavaModel, JavaMLReadable, JavaMLWritable):
26242637 """
26252638 .. note:: Experimental
26262639
2627- Model fitted by ChiSqSelector.
2640+ Model fitted by :py:class:` ChiSqSelector` .
26282641
26292642 .. versionadded:: 2.0.0
26302643 """
0 commit comments