@@ -3043,26 +3043,35 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol, JavaMLReadable, JavaM
30433043 "Force to index label whether it is numeric or string" ,
30443044 typeConverter = TypeConverters .toBoolean )
30453045
3046+ stringIndexerOrderType = Param (Params ._dummy (), "stringIndexerOrderType" ,
3047+ "How to order categories of a string feature column used by " +
3048+ "StringIndexer. The last category after ordering is dropped " +
3049+ "when encoding strings. Supported options: frequencyDesc, " +
3050+ "frequencyAsc, alphabetDesc, alphabetAsc. The default value " +
3051+ "is frequencyDesc. When the ordering is set to alphabetDesc, " +
3052+ "RFormula drops the same category as R when encoding strings." ,
3053+ typeConverter = TypeConverters .toString )
3054+
30463055 @keyword_only
30473056 def __init__ (self , formula = None , featuresCol = "features" , labelCol = "label" ,
3048- forceIndexLabel = False ):
3057+ forceIndexLabel = False , stringIndexerOrderType = "frequencyDesc" ):
30493058 """
30503059 __init__(self, formula=None, featuresCol="features", labelCol="label", \
3051- forceIndexLabel=False)
3060+ forceIndexLabel=False, stringIndexerOrderType="frequencyDesc" )
30523061 """
30533062 super (RFormula , self ).__init__ ()
30543063 self ._java_obj = self ._new_java_obj ("org.apache.spark.ml.feature.RFormula" , self .uid )
3055- self ._setDefault (forceIndexLabel = False )
3064+ self ._setDefault (forceIndexLabel = False , stringIndexerOrderType = "frequencyDesc" )
30563065 kwargs = self ._input_kwargs
30573066 self .setParams (** kwargs )
30583067
30593068 @keyword_only
30603069 @since ("1.5.0" )
30613070 def setParams (self , formula = None , featuresCol = "features" , labelCol = "label" ,
3062- forceIndexLabel = False ):
3071+ forceIndexLabel = False , stringIndexerOrderType = "frequencyDesc" ):
30633072 """
30643073 setParams(self, formula=None, featuresCol="features", labelCol="label", \
3065- forceIndexLabel=False)
3074+ forceIndexLabel=False, stringIndexerOrderType="frequencyDesc" )
30663075 Sets params for RFormula.
30673076 """
30683077 kwargs = self ._input_kwargs
@@ -3096,6 +3105,20 @@ def getForceIndexLabel(self):
30963105 """
30973106 return self .getOrDefault (self .forceIndexLabel )
30983107
3108+ @since ("2.3.0" )
3109+ def setStringIndexerOrderType (self , value ):
3110+ """
3111+ Sets the value of :py:attr:`stringIndexerOrderType`.
3112+ """
3113+ return self ._set (stringIndexerOrderType = value )
3114+
3115+ @since ("2.3.0" )
3116+ def getStringIndexerOrderType (self ):
3117+ """
3118+ Gets the value of :py:attr:`stringIndexerOrderType` or its default value 'frequencyDesc'.
3119+ """
3120+ return self .getOrDefault (self .stringIndexerOrderType )
3121+
30993122 def _create_model (self , java_model ):
31003123 return RFormulaModel (java_model )
31013124
0 commit comments