Skip to content

Commit 14e5dec

Browse files
Nick Pentreathmengxr
authored andcommitted
[SPARK-10258][DOC][ML] Add @SInCE annotations to ml.feature
This PR adds missing `Since` annotations to `ml.feature` package. Closes #8505. ## How was this patch tested? Existing tests. Author: Nick Pentreath <[email protected]> Closes #13641 from MLnick/add-since-annotations. (cherry picked from commit 37494a1) Signed-off-by: Xiangrui Meng <[email protected]>
1 parent 37d05ec commit 14e5dec

28 files changed

+362
-68
lines changed

mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,11 @@ import org.apache.spark.sql.types._
3535
* Binarize a column of continuous features given a threshold.
3636
*/
3737
@Experimental
38-
final class Binarizer(override val uid: String)
38+
@Since("1.4.0")
39+
final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
3940
extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
4041

42+
@Since("1.4.0")
4143
def this() = this(Identifiable.randomUID("binarizer"))
4244

4345
/**
@@ -47,21 +49,26 @@ final class Binarizer(override val uid: String)
4749
* Default: 0.0
4850
* @group param
4951
*/
52+
@Since("1.4.0")
5053
val threshold: DoubleParam =
5154
new DoubleParam(this, "threshold", "threshold used to binarize continuous features")
5255

5356
/** @group getParam */
57+
@Since("1.4.0")
5458
def getThreshold: Double = $(threshold)
5559

5660
/** @group setParam */
61+
@Since("1.4.0")
5762
def setThreshold(value: Double): this.type = set(threshold, value)
5863

5964
setDefault(threshold -> 0.0)
6065

6166
/** @group setParam */
67+
@Since("1.4.0")
6268
def setInputCol(value: String): this.type = set(inputCol, value)
6369

6470
/** @group setParam */
71+
@Since("1.4.0")
6572
def setOutputCol(value: String): this.type = set(outputCol, value)
6673

6774
@Since("2.0.0")
@@ -96,6 +103,7 @@ final class Binarizer(override val uid: String)
96103
}
97104
}
98105

106+
@Since("1.4.0")
99107
override def transformSchema(schema: StructType): StructType = {
100108
val inputType = schema($(inputCol)).dataType
101109
val outputColName = $(outputCol)
@@ -115,6 +123,7 @@ final class Binarizer(override val uid: String)
115123
StructType(schema.fields :+ outCol)
116124
}
117125

126+
@Since("1.4.1")
118127
override def copy(extra: ParamMap): Binarizer = defaultCopy(extra)
119128
}
120129

mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,11 @@ import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
3535
* `Bucketizer` maps a column of continuous features to a column of feature buckets.
3636
*/
3737
@Experimental
38-
final class Bucketizer(override val uid: String)
38+
@Since("1.4.0")
39+
final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
3940
extends Model[Bucketizer] with HasInputCol with HasOutputCol with DefaultParamsWritable {
4041

42+
@Since("1.4.0")
4143
def this() = this(Identifiable.randomUID("bucketizer"))
4244

4345
/**
@@ -48,6 +50,7 @@ final class Bucketizer(override val uid: String)
4850
* otherwise, values outside the splits specified will be treated as errors.
4951
* @group param
5052
*/
53+
@Since("1.4.0")
5154
val splits: DoubleArrayParam = new DoubleArrayParam(this, "splits",
5255
"Split points for mapping continuous features into buckets. With n+1 splits, there are n " +
5356
"buckets. A bucket defined by splits x,y holds values in the range [x,y) except the last " +
@@ -57,15 +60,19 @@ final class Bucketizer(override val uid: String)
5760
Bucketizer.checkSplits)
5861

5962
/** @group getParam */
63+
@Since("1.4.0")
6064
def getSplits: Array[Double] = $(splits)
6165

6266
/** @group setParam */
67+
@Since("1.4.0")
6368
def setSplits(value: Array[Double]): this.type = set(splits, value)
6469

6570
/** @group setParam */
71+
@Since("1.4.0")
6672
def setInputCol(value: String): this.type = set(inputCol, value)
6773

6874
/** @group setParam */
75+
@Since("1.4.0")
6976
def setOutputCol(value: String): this.type = set(outputCol, value)
7077

7178
@Since("2.0.0")
@@ -86,16 +93,19 @@ final class Bucketizer(override val uid: String)
8693
attr.toStructField()
8794
}
8895

96+
@Since("1.4.0")
8997
override def transformSchema(schema: StructType): StructType = {
9098
SchemaUtils.checkColumnType(schema, $(inputCol), DoubleType)
9199
SchemaUtils.appendColumn(schema, prepOutputField(schema))
92100
}
93101

102+
@Since("1.4.1")
94103
override def copy(extra: ParamMap): Bucketizer = {
95104
defaultCopy[Bucketizer](extra).setParent(parent)
96105
}
97106
}
98107

108+
@Since("1.6.0")
99109
object Bucketizer extends DefaultParamsReadable[Bucketizer] {
100110

101111
/** We require splits to be of length >= 3 and to be in strictly increasing order. */

mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,21 +62,27 @@ private[feature] trait ChiSqSelectorParams extends Params
6262
* categorical label.
6363
*/
6464
@Experimental
65-
final class ChiSqSelector(override val uid: String)
65+
@Since("1.6.0")
66+
final class ChiSqSelector @Since("1.6.0") (@Since("1.6.0") override val uid: String)
6667
extends Estimator[ChiSqSelectorModel] with ChiSqSelectorParams with DefaultParamsWritable {
6768

69+
@Since("1.6.0")
6870
def this() = this(Identifiable.randomUID("chiSqSelector"))
6971

7072
/** @group setParam */
73+
@Since("1.6.0")
7174
def setNumTopFeatures(value: Int): this.type = set(numTopFeatures, value)
7275

7376
/** @group setParam */
77+
@Since("1.6.0")
7478
def setFeaturesCol(value: String): this.type = set(featuresCol, value)
7579

7680
/** @group setParam */
81+
@Since("1.6.0")
7782
def setOutputCol(value: String): this.type = set(outputCol, value)
7883

7984
/** @group setParam */
85+
@Since("1.6.0")
8086
def setLabelCol(value: String): this.type = set(labelCol, value)
8187

8288
@Since("2.0.0")
@@ -91,12 +97,14 @@ final class ChiSqSelector(override val uid: String)
9197
copyValues(new ChiSqSelectorModel(uid, chiSqSelector).setParent(this))
9298
}
9399

100+
@Since("1.6.0")
94101
override def transformSchema(schema: StructType): StructType = {
95102
SchemaUtils.checkColumnType(schema, $(featuresCol), new VectorUDT)
96103
SchemaUtils.checkNumericType(schema, $(labelCol))
97104
SchemaUtils.appendColumn(schema, $(outputCol), new VectorUDT)
98105
}
99106

107+
@Since("1.6.0")
100108
override def copy(extra: ParamMap): ChiSqSelector = defaultCopy(extra)
101109
}
102110

@@ -112,23 +120,28 @@ object ChiSqSelector extends DefaultParamsReadable[ChiSqSelector] {
112120
* Model fitted by [[ChiSqSelector]].
113121
*/
114122
@Experimental
123+
@Since("1.6.0")
115124
final class ChiSqSelectorModel private[ml] (
116-
override val uid: String,
125+
@Since("1.6.0") override val uid: String,
117126
private val chiSqSelector: feature.ChiSqSelectorModel)
118127
extends Model[ChiSqSelectorModel] with ChiSqSelectorParams with MLWritable {
119128

120129
import ChiSqSelectorModel._
121130

122131
/** list of indices to select (filter). Must be ordered asc */
132+
@Since("1.6.0")
123133
val selectedFeatures: Array[Int] = chiSqSelector.selectedFeatures
124134

125135
/** @group setParam */
136+
@Since("1.6.0")
126137
def setFeaturesCol(value: String): this.type = set(featuresCol, value)
127138

128139
/** @group setParam */
140+
@Since("1.6.0")
129141
def setOutputCol(value: String): this.type = set(outputCol, value)
130142

131143
/** @group setParam */
144+
@Since("1.6.0")
132145
def setLabelCol(value: String): this.type = set(labelCol, value)
133146

134147
@Since("2.0.0")
@@ -143,6 +156,7 @@ final class ChiSqSelectorModel private[ml] (
143156
dataset.withColumn($(outputCol), selector(col($(featuresCol))), newField.metadata)
144157
}
145158

159+
@Since("1.6.0")
146160
override def transformSchema(schema: StructType): StructType = {
147161
SchemaUtils.checkColumnType(schema, $(featuresCol), new VectorUDT)
148162
val newField = prepOutputField(schema)
@@ -165,6 +179,7 @@ final class ChiSqSelectorModel private[ml] (
165179
newAttributeGroup.toStructField()
166180
}
167181

182+
@Since("1.6.0")
168183
override def copy(extra: ParamMap): ChiSqSelectorModel = {
169184
val copied = new ChiSqSelectorModel(uid, chiSqSelector)
170185
copyValues(copied, extra).setParent(parent)

mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,27 +120,35 @@ private[feature] trait CountVectorizerParams extends Params with HasInputCol wit
120120
* Extracts a vocabulary from document collections and generates a [[CountVectorizerModel]].
121121
*/
122122
@Experimental
123-
class CountVectorizer(override val uid: String)
123+
@Since("1.5.0")
124+
class CountVectorizer @Since("1.5.0") (@Since("1.5.0") override val uid: String)
124125
extends Estimator[CountVectorizerModel] with CountVectorizerParams with DefaultParamsWritable {
125126

127+
@Since("1.5.0")
126128
def this() = this(Identifiable.randomUID("cntVec"))
127129

128130
/** @group setParam */
131+
@Since("1.5.0")
129132
def setInputCol(value: String): this.type = set(inputCol, value)
130133

131134
/** @group setParam */
135+
@Since("1.5.0")
132136
def setOutputCol(value: String): this.type = set(outputCol, value)
133137

134138
/** @group setParam */
139+
@Since("1.5.0")
135140
def setVocabSize(value: Int): this.type = set(vocabSize, value)
136141

137142
/** @group setParam */
143+
@Since("1.5.0")
138144
def setMinDF(value: Double): this.type = set(minDF, value)
139145

140146
/** @group setParam */
147+
@Since("1.5.0")
141148
def setMinTF(value: Double): this.type = set(minTF, value)
142149

143150
/** @group setParam */
151+
@Since("2.0.0")
144152
def setBinary(value: Boolean): this.type = set(binary, value)
145153

146154
@Since("2.0.0")
@@ -176,10 +184,12 @@ class CountVectorizer(override val uid: String)
176184
copyValues(new CountVectorizerModel(uid, vocab).setParent(this))
177185
}
178186

187+
@Since("1.5.0")
179188
override def transformSchema(schema: StructType): StructType = {
180189
validateAndTransformSchema(schema)
181190
}
182191

192+
@Since("1.5.0")
183193
override def copy(extra: ParamMap): CountVectorizer = defaultCopy(extra)
184194
}
185195

@@ -196,26 +206,34 @@ object CountVectorizer extends DefaultParamsReadable[CountVectorizer] {
196206
* @param vocabulary An Array over terms. Only the terms in the vocabulary will be counted.
197207
*/
198208
@Experimental
199-
class CountVectorizerModel(override val uid: String, val vocabulary: Array[String])
209+
@Since("1.5.0")
210+
class CountVectorizerModel(
211+
@Since("1.5.0") override val uid: String,
212+
@Since("1.5.0") val vocabulary: Array[String])
200213
extends Model[CountVectorizerModel] with CountVectorizerParams with MLWritable {
201214

202215
import CountVectorizerModel._
203216

217+
@Since("1.5.0")
204218
def this(vocabulary: Array[String]) = {
205219
this(Identifiable.randomUID("cntVecModel"), vocabulary)
206220
set(vocabSize, vocabulary.length)
207221
}
208222

209223
/** @group setParam */
224+
@Since("1.5.0")
210225
def setInputCol(value: String): this.type = set(inputCol, value)
211226

212227
/** @group setParam */
228+
@Since("1.5.0")
213229
def setOutputCol(value: String): this.type = set(outputCol, value)
214230

215231
/** @group setParam */
232+
@Since("1.5.0")
216233
def setMinTF(value: Double): this.type = set(minTF, value)
217234

218235
/** @group setParam */
236+
@Since("2.0.0")
219237
def setBinary(value: Boolean): this.type = set(binary, value)
220238

221239
/** Dictionary created from [[vocabulary]] and its indices, broadcast once for [[transform()]] */
@@ -252,10 +270,12 @@ class CountVectorizerModel(override val uid: String, val vocabulary: Array[Strin
252270
dataset.withColumn($(outputCol), vectorizer(col($(inputCol))))
253271
}
254272

273+
@Since("1.5.0")
255274
override def transformSchema(schema: StructType): StructType = {
256275
validateAndTransformSchema(schema)
257276
}
258277

278+
@Since("1.5.0")
259279
override def copy(extra: ParamMap): CountVectorizerModel = {
260280
val copied = new CountVectorizerModel(uid, vocabulary).setParent(parent)
261281
copyValues(copied, extra)

mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,23 +36,28 @@ import org.apache.spark.sql.types.DataType
3636
* More information on [[https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia]].
3737
*/
3838
@Experimental
39-
class DCT(override val uid: String)
39+
@Since("1.5.0")
40+
class DCT @Since("1.5.0") (@Since("1.5.0") override val uid: String)
4041
extends UnaryTransformer[Vector, Vector, DCT] with DefaultParamsWritable {
4142

43+
@Since("1.5.0")
4244
def this() = this(Identifiable.randomUID("dct"))
4345

4446
/**
4547
* Indicates whether to perform the inverse DCT (true) or forward DCT (false).
4648
* Default: false
4749
* @group param
4850
*/
51+
@Since("1.5.0")
4952
def inverse: BooleanParam = new BooleanParam(
5053
this, "inverse", "Set transformer to perform inverse DCT")
5154

5255
/** @group setParam */
56+
@Since("1.5.0")
5357
def setInverse(value: Boolean): this.type = set(inverse, value)
5458

5559
/** @group getParam */
60+
@Since("1.5.0")
5661
def getInverse: Boolean = $(inverse)
5762

5863
setDefault(inverse -> false)

mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,21 +33,26 @@ import org.apache.spark.sql.types.DataType
3333
* multiplier.
3434
*/
3535
@Experimental
36-
class ElementwiseProduct(override val uid: String)
36+
@Since("2.0.0")
37+
class ElementwiseProduct @Since("2.0.0") (@Since("2.0.0") override val uid: String)
3738
extends UnaryTransformer[Vector, Vector, ElementwiseProduct] with DefaultParamsWritable {
3839

40+
@Since("2.0.0")
3941
def this() = this(Identifiable.randomUID("elemProd"))
4042

4143
/**
4244
* the vector to multiply with input vectors
4345
* @group param
4446
*/
47+
@Since("2.0.0")
4548
val scalingVec: Param[Vector] = new Param(this, "scalingVec", "vector for hadamard product")
4649

4750
/** @group setParam */
51+
@Since("2.0.0")
4852
def setScalingVec(value: Vector): this.type = set(scalingVec, value)
4953

5054
/** @group getParam */
55+
@Since("2.0.0")
5156
def getScalingVec: Vector = getOrDefault(scalingVec)
5257

5358
override protected def createTransformFunc: Vector => Vector = {

0 commit comments

Comments
 (0)