Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,11 @@ import org.apache.spark.sql.types._
* Binarize a column of continuous features given a threshold.
*/
@Experimental
final class Binarizer(override val uid: String)
@Since("1.4.0")
final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {

@Since("1.4.0")
def this() = this(Identifiable.randomUID("binarizer"))

/**
Expand All @@ -47,21 +49,26 @@ final class Binarizer(override val uid: String)
* Default: 0.0
* @group param
*/
@Since("1.4.0")
val threshold: DoubleParam =
new DoubleParam(this, "threshold", "threshold used to binarize continuous features")

/** @group getParam */
@Since("1.4.0")
def getThreshold: Double = $(threshold)

/** @group setParam */
@Since("1.4.0")
def setThreshold(value: Double): this.type = set(threshold, value)

setDefault(threshold -> 0.0)

/** @group setParam */
@Since("1.4.0")
def setInputCol(value: String): this.type = set(inputCol, value)

/** @group setParam */
@Since("1.4.0")
def setOutputCol(value: String): this.type = set(outputCol, value)

@Since("2.0.0")
Expand Down Expand Up @@ -96,6 +103,7 @@ final class Binarizer(override val uid: String)
}
}

@Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
val inputType = schema($(inputCol)).dataType
val outputColName = $(outputCol)
Expand All @@ -115,6 +123,7 @@ final class Binarizer(override val uid: String)
StructType(schema.fields :+ outCol)
}

@Since("1.4.1")
override def copy(extra: ParamMap): Binarizer = defaultCopy(extra)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,11 @@ import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
* `Bucketizer` maps a column of continuous features to a column of feature buckets.
*/
@Experimental
final class Bucketizer(override val uid: String)
@Since("1.4.0")
final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Model[Bucketizer] with HasInputCol with HasOutputCol with DefaultParamsWritable {

@Since("1.4.0")
def this() = this(Identifiable.randomUID("bucketizer"))

/**
Expand All @@ -48,6 +50,7 @@ final class Bucketizer(override val uid: String)
* otherwise, values outside the splits specified will be treated as errors.
* @group param
*/
@Since("1.4.0")
val splits: DoubleArrayParam = new DoubleArrayParam(this, "splits",
"Split points for mapping continuous features into buckets. With n+1 splits, there are n " +
"buckets. A bucket defined by splits x,y holds values in the range [x,y) except the last " +
Expand All @@ -57,15 +60,19 @@ final class Bucketizer(override val uid: String)
Bucketizer.checkSplits)

/** @group getParam */
@Since("1.4.0")
def getSplits: Array[Double] = $(splits)

/** @group setParam */
@Since("1.4.0")
def setSplits(value: Array[Double]): this.type = set(splits, value)

/** @group setParam */
@Since("1.4.0")
def setInputCol(value: String): this.type = set(inputCol, value)

/** @group setParam */
@Since("1.4.0")
def setOutputCol(value: String): this.type = set(outputCol, value)

@Since("2.0.0")
Expand All @@ -86,16 +93,19 @@ final class Bucketizer(override val uid: String)
attr.toStructField()
}

@Since("1.4.0")
override def transformSchema(schema: StructType): StructType = {
SchemaUtils.checkColumnType(schema, $(inputCol), DoubleType)
SchemaUtils.appendColumn(schema, prepOutputField(schema))
}

@Since("1.4.1")
override def copy(extra: ParamMap): Bucketizer = {
defaultCopy[Bucketizer](extra).setParent(parent)
}
}

@Since("1.6.0")
object Bucketizer extends DefaultParamsReadable[Bucketizer] {

/** We require splits to be of length >= 3 and to be in strictly increasing order. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,21 +62,27 @@ private[feature] trait ChiSqSelectorParams extends Params
* categorical label.
*/
@Experimental
final class ChiSqSelector(override val uid: String)
@Since("1.6.0")
final class ChiSqSelector @Since("1.6.0") (@Since("1.6.0") override val uid: String)
extends Estimator[ChiSqSelectorModel] with ChiSqSelectorParams with DefaultParamsWritable {

@Since("1.6.0")
def this() = this(Identifiable.randomUID("chiSqSelector"))

/** @group setParam */
@Since("1.6.0")
def setNumTopFeatures(value: Int): this.type = set(numTopFeatures, value)

/** @group setParam */
@Since("1.6.0")
def setFeaturesCol(value: String): this.type = set(featuresCol, value)

/** @group setParam */
@Since("1.6.0")
def setOutputCol(value: String): this.type = set(outputCol, value)

/** @group setParam */
@Since("1.6.0")
def setLabelCol(value: String): this.type = set(labelCol, value)

@Since("2.0.0")
Expand All @@ -91,12 +97,14 @@ final class ChiSqSelector(override val uid: String)
copyValues(new ChiSqSelectorModel(uid, chiSqSelector).setParent(this))
}

@Since("1.6.0")
override def transformSchema(schema: StructType): StructType = {
SchemaUtils.checkColumnType(schema, $(featuresCol), new VectorUDT)
SchemaUtils.checkNumericType(schema, $(labelCol))
SchemaUtils.appendColumn(schema, $(outputCol), new VectorUDT)
}

@Since("1.6.0")
override def copy(extra: ParamMap): ChiSqSelector = defaultCopy(extra)
}

Expand All @@ -112,23 +120,28 @@ object ChiSqSelector extends DefaultParamsReadable[ChiSqSelector] {
* Model fitted by [[ChiSqSelector]].
*/
@Experimental
@Since("1.6.0")
final class ChiSqSelectorModel private[ml] (
override val uid: String,
@Since("1.6.0") override val uid: String,
private val chiSqSelector: feature.ChiSqSelectorModel)
extends Model[ChiSqSelectorModel] with ChiSqSelectorParams with MLWritable {

import ChiSqSelectorModel._

/** list of indices to select (filter). Must be ordered asc */
@Since("1.6.0")
val selectedFeatures: Array[Int] = chiSqSelector.selectedFeatures

/** @group setParam */
@Since("1.6.0")
def setFeaturesCol(value: String): this.type = set(featuresCol, value)

/** @group setParam */
@Since("1.6.0")
def setOutputCol(value: String): this.type = set(outputCol, value)

/** @group setParam */
@Since("1.6.0")
def setLabelCol(value: String): this.type = set(labelCol, value)

@Since("2.0.0")
Expand All @@ -143,6 +156,7 @@ final class ChiSqSelectorModel private[ml] (
dataset.withColumn($(outputCol), selector(col($(featuresCol))), newField.metadata)
}

@Since("1.6.0")
override def transformSchema(schema: StructType): StructType = {
SchemaUtils.checkColumnType(schema, $(featuresCol), new VectorUDT)
val newField = prepOutputField(schema)
Expand All @@ -165,6 +179,7 @@ final class ChiSqSelectorModel private[ml] (
newAttributeGroup.toStructField()
}

@Since("1.6.0")
override def copy(extra: ParamMap): ChiSqSelectorModel = {
val copied = new ChiSqSelectorModel(uid, chiSqSelector)
copyValues(copied, extra).setParent(parent)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,27 +120,35 @@ private[feature] trait CountVectorizerParams extends Params with HasInputCol wit
* Extracts a vocabulary from document collections and generates a [[CountVectorizerModel]].
*/
@Experimental
class CountVectorizer(override val uid: String)
@Since("1.5.0")
class CountVectorizer @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends Estimator[CountVectorizerModel] with CountVectorizerParams with DefaultParamsWritable {

@Since("1.5.0")
def this() = this(Identifiable.randomUID("cntVec"))

/** @group setParam */
@Since("1.5.0")
def setInputCol(value: String): this.type = set(inputCol, value)

/** @group setParam */
@Since("1.5.0")
def setOutputCol(value: String): this.type = set(outputCol, value)

/** @group setParam */
@Since("1.5.0")
def setVocabSize(value: Int): this.type = set(vocabSize, value)

/** @group setParam */
@Since("1.5.0")
def setMinDF(value: Double): this.type = set(minDF, value)

/** @group setParam */
@Since("1.5.0")
def setMinTF(value: Double): this.type = set(minTF, value)

/** @group setParam */
@Since("2.0.0")
def setBinary(value: Boolean): this.type = set(binary, value)

@Since("2.0.0")
Expand Down Expand Up @@ -176,10 +184,12 @@ class CountVectorizer(override val uid: String)
copyValues(new CountVectorizerModel(uid, vocab).setParent(this))
}

@Since("1.5.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}

@Since("1.5.0")
override def copy(extra: ParamMap): CountVectorizer = defaultCopy(extra)
}

Expand All @@ -196,26 +206,34 @@ object CountVectorizer extends DefaultParamsReadable[CountVectorizer] {
* @param vocabulary An Array over terms. Only the terms in the vocabulary will be counted.
*/
@Experimental
class CountVectorizerModel(override val uid: String, val vocabulary: Array[String])
@Since("1.5.0")
class CountVectorizerModel(
@Since("1.5.0") override val uid: String,
@Since("1.5.0") val vocabulary: Array[String])
extends Model[CountVectorizerModel] with CountVectorizerParams with MLWritable {

import CountVectorizerModel._

@Since("1.5.0")
def this(vocabulary: Array[String]) = {
this(Identifiable.randomUID("cntVecModel"), vocabulary)
set(vocabSize, vocabulary.length)
}

/** @group setParam */
@Since("1.5.0")
def setInputCol(value: String): this.type = set(inputCol, value)

/** @group setParam */
@Since("1.5.0")
def setOutputCol(value: String): this.type = set(outputCol, value)

/** @group setParam */
@Since("1.5.0")
def setMinTF(value: Double): this.type = set(minTF, value)

/** @group setParam */
@Since("2.0.0")
def setBinary(value: Boolean): this.type = set(binary, value)

/** Dictionary created from [[vocabulary]] and its indices, broadcast once for [[transform()]] */
Expand Down Expand Up @@ -252,10 +270,12 @@ class CountVectorizerModel(override val uid: String, val vocabulary: Array[Strin
dataset.withColumn($(outputCol), vectorizer(col($(inputCol))))
}

@Since("1.5.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema)
}

@Since("1.5.0")
override def copy(extra: ParamMap): CountVectorizerModel = {
val copied = new CountVectorizerModel(uid, vocabulary).setParent(parent)
copyValues(copied, extra)
Expand Down
7 changes: 6 additions & 1 deletion mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
Original file line number Diff line number Diff line change
Expand Up @@ -36,23 +36,28 @@ import org.apache.spark.sql.types.DataType
* More information on [[https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia]].
*/
@Experimental
class DCT(override val uid: String)
@Since("1.5.0")
class DCT @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends UnaryTransformer[Vector, Vector, DCT] with DefaultParamsWritable {

@Since("1.5.0")
def this() = this(Identifiable.randomUID("dct"))

/**
* Indicates whether to perform the inverse DCT (true) or forward DCT (false).
* Default: false
* @group param
*/
@Since("1.5.0")
def inverse: BooleanParam = new BooleanParam(
this, "inverse", "Set transformer to perform inverse DCT")

/** @group setParam */
@Since("1.5.0")
def setInverse(value: Boolean): this.type = set(inverse, value)

/** @group getParam */
@Since("1.5.0")
def getInverse: Boolean = $(inverse)

setDefault(inverse -> false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,26 @@ import org.apache.spark.sql.types.DataType
* multiplier.
*/
@Experimental
class ElementwiseProduct(override val uid: String)
@Since("2.0.0")
class ElementwiseProduct @Since("2.0.0") (@Since("2.0.0") override val uid: String)
extends UnaryTransformer[Vector, Vector, ElementwiseProduct] with DefaultParamsWritable {

@Since("2.0.0")
def this() = this(Identifiable.randomUID("elemProd"))

/**
* the vector to multiply with input vectors
* @group param
*/
@Since("2.0.0")
val scalingVec: Param[Vector] = new Param(this, "scalingVec", "vector for hadamard product")

/** @group setParam */
@Since("2.0.0")
def setScalingVec(value: Vector): this.type = set(scalingVec, value)

/** @group getParam */
@Since("2.0.0")
def getScalingVec: Vector = getOrDefault(scalingVec)

override protected def createTransformFunc: Vector => Vector = {
Expand Down
Loading