From c33fbf62afeec39c2408477ea9461f952aaf67a5 Mon Sep 17 00:00:00 2001 From: Martin Brown Date: Sat, 19 Sep 2015 18:28:03 -0700 Subject: [PATCH 1/5] @Since() annotations for spark/ml/feature --- .../apache/spark/ml/feature/Binarizer.scala | 9 +++++++++ .../apache/spark/ml/feature/Bucketizer.scala | 9 +++++++++ .../spark/ml/feature/CountVectorizer.scala | 18 ++++++++++++++++++ .../org/apache/spark/ml/feature/DCT.scala | 8 ++++++++ .../spark/ml/feature/ElementwiseProduct.scala | 6 ++++++ .../apache/spark/ml/feature/HashingTF.scala | 9 +++++++++ .../org/apache/spark/ml/feature/IDF.scala | 8 ++++++++ .../apache/spark/ml/feature/MinMaxScaler.scala | 9 +++++++++ .../org/apache/spark/ml/feature/NGram.scala | 7 +++++++ .../apache/spark/ml/feature/Normalizer.scala | 6 ++++++ .../spark/ml/feature/OneHotEncoder.scala | 8 ++++++++ .../org/apache/spark/ml/feature/PCA.scala | 8 ++++++++ .../spark/ml/feature/PolynomialExpansion.scala | 7 +++++++ .../org/apache/spark/ml/feature/RFormula.scala | 10 ++++++++++ .../spark/ml/feature/StandardScaler.scala | 9 +++++++++ .../spark/ml/feature/StopWordsRemover.scala | 11 +++++++++++ .../spark/ml/feature/StringIndexer.scala | 16 ++++++++++++++++ .../apache/spark/ml/feature/Tokenizer.scala | 18 ++++++++++++++++++ .../spark/ml/feature/VectorAssembler.scala | 7 +++++++ .../spark/ml/feature/VectorIndexer.scala | 8 ++++++++ .../apache/spark/ml/feature/VectorSlicer.scala | 12 ++++++++++++ .../org/apache/spark/ml/feature/Word2Vec.scala | 13 +++++++++++++ 22 files changed, 216 insertions(+) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala index edad754436455..7fd668deb8558 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala @@ -31,10 +31,12 @@ import org.apache.spark.sql.types.{DoubleType, StructType} * :: Experimental :: * Binarize a column of continuous features given a threshold. */ +@Since("1.4.0") @Experimental final class Binarizer(override val uid: String) extends Transformer with HasInputCol with HasOutputCol { + @Since("1.4.0") def this() = this(Identifiable.randomUID("binarizer")) /** @@ -48,19 +50,24 @@ final class Binarizer(override val uid: String) new DoubleParam(this, "threshold", "threshold used to binarize continuous features") /** @group getParam */ + @Since("1.4.0") def getThreshold: Double = $(threshold) /** @group setParam */ + @Since("1.4.0") def setThreshold(value: Double): this.type = set(threshold, value) setDefault(threshold -> 0.0) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) + @Since("1.4.0") override def transform(dataset: DataFrame): DataFrame = { transformSchema(dataset.schema, logging = true) val td = $(threshold) @@ -71,6 +78,7 @@ final class Binarizer(override val uid: String) binarizer(col($(inputCol))).as(outputColName, metadata)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { SchemaUtils.checkColumnType(schema, $(inputCol), DoubleType) @@ -85,5 +93,6 @@ final class Binarizer(override val uid: String) StructType(outputFields) } + @Since("1.4.1") override def copy(extra: ParamMap): Binarizer = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala index 6fdf25b015b0b..334197be872ab 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala @@ -34,10 +34,12 @@ import org.apache.spark.sql.types.{DoubleType, StructField, StructType} * :: Experimental :: * `Bucketizer` maps a column of continuous features to a column of feature buckets. */ +@Since("1.4.0") @Experimental final class Bucketizer(override val uid: String) extends Model[Bucketizer] with HasInputCol with HasOutputCol { + @Since("1.4.0") def this() = this(Identifiable.randomUID("bucketizer")) /** @@ -57,17 +59,22 @@ final class Bucketizer(override val uid: String) Bucketizer.checkSplits) /** @group getParam */ + @Since("1.4.0") def getSplits: Array[Double] = $(splits) /** @group setParam */ + @Since("1.4.0") def setSplits(value: Array[Double]): this.type = set(splits, value) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) + @Since("1.4.0") override def transform(dataset: DataFrame): DataFrame = { transformSchema(dataset.schema) val bucketizer = udf { feature: Double => @@ -85,11 +92,13 @@ final class Bucketizer(override val uid: String) attr.toStructField() } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { SchemaUtils.checkColumnType(schema, $(inputCol), DoubleType) SchemaUtils.appendColumn(schema, prepOutputField(schema)) } + @Since("1.4.1") override def copy(extra: ParamMap): Bucketizer = { defaultCopy[Bucketizer](extra).setParent(parent) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala index 49028e4b85064..182cfd27ab1fb 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala @@ -103,29 +103,37 @@ private[feature] trait CountVectorizerParams extends Params with HasInputCol wit * :: Experimental :: * Extracts a vocabulary from document collections and generates a [[CountVectorizerModel]]. */ +@Since("1.5.0") @Experimental class CountVectorizer(override val uid: String) extends Estimator[CountVectorizerModel] with CountVectorizerParams { + @Since("1.5.0") def this() = this(Identifiable.randomUID("cntVec")) /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.5.0") def setVocabSize(value: Int): this.type = set(vocabSize, value) /** @group setParam */ + @Since("1.5.0") def setMinDF(value: Double): this.type = set(minDF, value) /** @group setParam */ + @Since("1.5.0") def setMinTF(value: Double): this.type = set(minTF, value) setDefault(vocabSize -> (1 << 18), minDF -> 1) + @Since("1.5.0") override def fit(dataset: DataFrame): CountVectorizerModel = { transformSchema(dataset.schema, logging = true) val vocSize = $(vocabSize) @@ -164,10 +172,12 @@ class CountVectorizer(override val uid: String) copyValues(new CountVectorizerModel(uid, vocab).setParent(this)) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.5.0") override def copy(extra: ParamMap): CountVectorizer = defaultCopy(extra) } @@ -176,27 +186,33 @@ class CountVectorizer(override val uid: String) * Converts a text document to a sparse vector of token counts. * @param vocabulary An Array over terms. Only the terms in the vocabulary will be counted. */ +@Since("1.5.0") @Experimental class CountVectorizerModel(override val uid: String, val vocabulary: Array[String]) extends Model[CountVectorizerModel] with CountVectorizerParams { + @Since("1.5.0") def this(vocabulary: Array[String]) = { this(Identifiable.randomUID("cntVecModel"), vocabulary) set(vocabSize, vocabulary.length) } /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.5.0") def setMinTF(value: Double): this.type = set(minTF, value) /** Dictionary created from [[vocabulary]] and its indices, broadcast once for [[transform()]] */ private var broadcastDict: Option[Broadcast[Map[String, Int]]] = None + @Since("1.5.0") override def transform(dataset: DataFrame): DataFrame = { if (broadcastDict.isEmpty) { val dict = vocabulary.zipWithIndex.toMap @@ -224,10 +240,12 @@ class CountVectorizerModel(override val uid: String, val vocabulary: Array[Strin dataset.withColumn($(outputCol), vectorizer(col($(inputCol)))) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.5.0") override def copy(extra: ParamMap): CountVectorizerModel = { val copied = new CountVectorizerModel(uid, vocabulary).setParent(parent) copyValues(copied, extra) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala index 228347635c92b..8d525fab7d5ef 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala @@ -35,10 +35,12 @@ import org.apache.spark.sql.types.DataType * * More information on [[https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia]]. */ +@Since("1.5.0") @Experimental class DCT(override val uid: String) extends UnaryTransformer[Vector, Vector, DCT] { + @Since("1.5.0") def this() = this(Identifiable.randomUID("dct")) /** @@ -46,17 +48,21 @@ class DCT(override val uid: String) * Default: false * @group param */ + @Since("1.5.0") def inverse: BooleanParam = new BooleanParam( this, "inverse", "Set transformer to perform inverse DCT") /** @group setParam */ + @Since("1.5.0") def setInverse(value: Boolean): this.type = set(inverse, value) /** @group getParam */ + @Since("1.5.0") def getInverse: Boolean = $(inverse) setDefault(inverse -> false) + @Since("1.5.0") override protected def createTransformFunc: Vector => Vector = { vec => val result = vec.toArray val jTransformer = new DoubleDCT_1D(result.length) @@ -64,9 +70,11 @@ class DCT(override val uid: String) Vectors.dense(result) } + @Since("1.5.0") override protected def validateInputType(inputType: DataType): Unit = { require(inputType.isInstanceOf[VectorUDT], s"Input type must be VectorUDT but got $inputType.") } + @Since("1.5.0") override protected def outputDataType: DataType = new VectorUDT } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala index a359cb8f37ec3..69c947186540e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala @@ -31,10 +31,12 @@ import org.apache.spark.sql.types.DataType * provided "weight" vector. In other words, it scales each column of the dataset by a scalar * multiplier. */ +@Since("1.4.0") @Experimental class ElementwiseProduct(override val uid: String) extends UnaryTransformer[Vector, Vector, ElementwiseProduct] { + @Since("1.4.0") def this() = this(Identifiable.randomUID("elemProd")) /** @@ -44,16 +46,20 @@ class ElementwiseProduct(override val uid: String) val scalingVec: Param[Vector] = new Param(this, "scalingVec", "vector for hadamard product") /** @group setParam */ + @Since("1.4.0") def setScalingVec(value: Vector): this.type = set(scalingVec, value) /** @group getParam */ + @Since("1.4.0") def getScalingVec: Vector = getOrDefault(scalingVec) + @Since("1.4.0") override protected def createTransformFunc: Vector => Vector = { require(params.contains(scalingVec), s"transformation requires a weight vector") val elemScaler = new feature.ElementwiseProduct($(scalingVec)) elemScaler.transform } + @Since("1.4.0") override protected def outputDataType: DataType = new VectorUDT() } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala index 319d23e46cef4..eb2eac5a7cb22 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala @@ -32,15 +32,19 @@ import org.apache.spark.sql.types.{ArrayType, StructType} * :: Experimental :: * Maps a sequence of terms to their term frequencies using the hashing trick. */ +@Since("1.2.0") @Experimental class HashingTF(override val uid: String) extends Transformer with HasInputCol with HasOutputCol { + @Since("1.4.0") def this() = this(Identifiable.randomUID("hashingTF")) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @@ -54,11 +58,14 @@ class HashingTF(override val uid: String) extends Transformer with HasInputCol w setDefault(numFeatures -> (1 << 18)) /** @group getParam */ + @Since("1.2.0") def getNumFeatures: Int = $(numFeatures) /** @group setParam */ + @Since("1.2.0") def setNumFeatures(value: Int): this.type = set(numFeatures, value) + @Since("1.4.0") override def transform(dataset: DataFrame): DataFrame = { val outputSchema = transformSchema(dataset.schema) val hashingTF = new feature.HashingTF($(numFeatures)) @@ -67,6 +74,7 @@ class HashingTF(override val uid: String) extends Transformer with HasInputCol w dataset.select(col("*"), t(col($(inputCol))).as($(outputCol), metadata)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { val inputType = schema($(inputCol)).dataType require(inputType.isInstanceOf[ArrayType], @@ -75,5 +83,6 @@ class HashingTF(override val uid: String) extends Transformer with HasInputCol w SchemaUtils.appendColumn(schema, attrGroup.toStructField()) } + @Since("1.4.1") override def copy(extra: ParamMap): HashingTF = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala index 4c36df75d8aa0..6dfaeccf2c5f9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala @@ -59,20 +59,26 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol * :: Experimental :: * Compute the Inverse Document Frequency (IDF) given a collection of documents. */ +@Since("1.4.0") @Experimental final class IDF(override val uid: String) extends Estimator[IDFModel] with IDFBase { + @Since("1.4.0") def this() = this(Identifiable.randomUID("idf")) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.4.0") def setMinDocFreq(value: Int): this.type = set(minDocFreq, value) + @Since("1.4.0") override def fit(dataset: DataFrame): IDFModel = { transformSchema(dataset.schema, logging = true) val input = dataset.select($(inputCol)).map { case Row(v: Vector) => v } @@ -80,10 +86,12 @@ final class IDF(override val uid: String) extends Estimator[IDFModel] with IDFBa copyValues(new IDFModel(uid, idf).setParent(this)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.4.1") override def copy(extra: ParamMap): IDF = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala index 1b494ec8b1727..96ea993a418b1 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala @@ -83,26 +83,33 @@ private[feature] trait MinMaxScalerParams extends Params with HasInputCol with H * Note that since zero values will probably be transformed to non-zero values, output of the * transformer will be DenseVector even for sparse input. */ +@Since("1.5.0") @Experimental class MinMaxScaler(override val uid: String) extends Estimator[MinMaxScalerModel] with MinMaxScalerParams { + @Since("1.5.0") def this() = this(Identifiable.randomUID("minMaxScal")) setDefault(min -> 0.0, max -> 1.0) /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.5.0") def setMin(value: Double): this.type = set(min, value) /** @group setParam */ + @Since("1.5.0") def setMax(value: Double): this.type = set(max, value) + @Since("1.5.0") override def fit(dataset: DataFrame): MinMaxScalerModel = { transformSchema(dataset.schema, logging = true) val input = dataset.select($(inputCol)).map { case Row(v: Vector) => v } @@ -110,10 +117,12 @@ class MinMaxScaler(override val uid: String) copyValues(new MinMaxScalerModel(uid, summary.min, summary.max).setParent(this)) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.5.0") override def copy(extra: ParamMap): MinMaxScaler = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala index 8de10eb51f923..6fc3425ecfed6 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala @@ -34,10 +34,12 @@ import org.apache.spark.sql.types.{ArrayType, DataType, StringType} * When the input array length is less than n (number of elements per n-gram), no n-grams are * returned. */ +@Since("1.5.0") @Experimental class NGram(override val uid: String) extends UnaryTransformer[Seq[String], Seq[String], NGram] { + @Since("1.5.0") def this() = this(Identifiable.randomUID("ngram")) /** @@ -49,21 +51,26 @@ class NGram(override val uid: String) ParamValidators.gtEq(1)) /** @group setParam */ + @Since("1.5.0") def setN(value: Int): this.type = set(n, value) /** @group getParam */ + @Since("1.5.0") def getN: Int = $(n) setDefault(n -> 2) + @Since("1.5.0") override protected def createTransformFunc: Seq[String] => Seq[String] = { _.iterator.sliding($(n)).withPartial(false).map(_.mkString(" ")).toSeq } + @Since("1.5.0") override protected def validateInputType(inputType: DataType): Unit = { require(inputType.sameType(ArrayType(StringType)), s"Input type must be ArrayType(StringType) but got $inputType.") } + @Since("1.5.0") override protected def outputDataType: DataType = new ArrayType(StringType, false) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala index 8282e5ffa17f7..cfb2388a39513 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala @@ -29,9 +29,11 @@ import org.apache.spark.sql.types.DataType * :: Experimental :: * Normalize a vector to have unit norm using the given p-norm. */ +@Since("1.4.0") @Experimental class Normalizer(override val uid: String) extends UnaryTransformer[Vector, Vector, Normalizer] { + @Since("1.4.0") def this() = this(Identifiable.randomUID("normalizer")) /** @@ -44,15 +46,19 @@ class Normalizer(override val uid: String) extends UnaryTransformer[Vector, Vect setDefault(p -> 2.0) /** @group getParam */ + @Since("1.4.0") def getP: Double = $(p) /** @group setParam */ + @Since("1.4.0") def setP(value: Double): this.type = set(p, value) + @Since("1.4.0") override protected def createTransformFunc: Vector => Vector = { val normalizer = new feature.Normalizer($(p)) normalizer.transform } + @Since("1.4.0") override protected def outputDataType: DataType = new VectorUDT() } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala index 9c60d4084ec46..e434038d06569 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala @@ -42,10 +42,12 @@ import org.apache.spark.sql.types.{DoubleType, StructType} * * @see [[StringIndexer]] for converting categorical values into category indices */ +@Since("1.4.0") @Experimental class OneHotEncoder(override val uid: String) extends Transformer with HasInputCol with HasOutputCol { + @Since("1.4.0") def this() = this(Identifiable.randomUID("oneHot")) /** @@ -57,14 +59,18 @@ class OneHotEncoder(override val uid: String) extends Transformer setDefault(dropLast -> true) /** @group setParam */ + @Since("1.4.0") def setDropLast(value: Boolean): this.type = set(dropLast, value) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { val inputColName = $(inputCol) val outputColName = $(outputCol) @@ -120,6 +126,7 @@ class OneHotEncoder(override val uid: String) extends Transformer StructType(outputFields) } + @Since("1.4.0") override def transform(dataset: DataFrame): DataFrame = { // schema transformation val inputColName = $(inputCol) @@ -164,5 +171,6 @@ class OneHotEncoder(override val uid: String) extends Transformer dataset.select(col("*"), encode(col(inputColName).cast(DoubleType)).as(outputColName, metadata)) } + @Since("1.4.1") override def copy(extra: ParamMap): OneHotEncoder = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala index 539084704b653..1fad233df4c4a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala @@ -48,23 +48,29 @@ private[feature] trait PCAParams extends Params with HasInputCol with HasOutputC * :: Experimental :: * PCA trains a model to project vectors to a low-dimensional space using PCA. */ +@Since("1.5.0") @Experimental class PCA (override val uid: String) extends Estimator[PCAModel] with PCAParams { + @Since("1.5.0") def this() = this(Identifiable.randomUID("pca")) /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.5.0") def setK(value: Int): this.type = set(k, value) /** * Computes a [[PCAModel]] that contains the principal components of the input vectors. */ + @Since("1.5.0") override def fit(dataset: DataFrame): PCAModel = { transformSchema(dataset.schema, logging = true) val input = dataset.select($(inputCol)).map { case Row(v: Vector) => v} @@ -73,6 +79,7 @@ class PCA (override val uid: String) extends Estimator[PCAModel] with PCAParams copyValues(new PCAModel(uid, pcaModel).setParent(this)) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { val inputType = schema($(inputCol)).dataType require(inputType.isInstanceOf[VectorUDT], @@ -83,6 +90,7 @@ class PCA (override val uid: String) extends Estimator[PCAModel] with PCAParams StructType(outputFields) } + @Since("1.5.0") override def copy(extra: ParamMap): PCA = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala index d85e468562d4a..66a1939e2e106 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala @@ -34,10 +34,12 @@ import org.apache.spark.sql.types.DataType * multiplication distributes over addition". Take a 2-variable feature vector as an example: * `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`. */ +@Since("1.4.0") @Experimental class PolynomialExpansion(override val uid: String) extends UnaryTransformer[Vector, Vector, PolynomialExpansion] { + @Since("1.4.0") def this() = this(Identifiable.randomUID("poly")) /** @@ -51,17 +53,22 @@ class PolynomialExpansion(override val uid: String) setDefault(degree -> 2) /** @group getParam */ + @Since("1.4.0") def getDegree: Int = $(degree) /** @group setParam */ + @Since("1.4.0") def setDegree(value: Int): this.type = set(degree, value) + @Since("1.4.0") override protected def createTransformFunc: Vector => Vector = { v => PolynomialExpansion.expand(v, $(degree)) } + @Since("1.4.0") override protected def outputDataType: DataType = new VectorUDT() + @Since("1.4.1") override def copy(extra: ParamMap): PolynomialExpansion = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala index 5c43a41bee3b4..8339edb9dcfc3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala @@ -46,9 +46,11 @@ private[feature] trait RFormulaBase extends HasFeaturesCol with HasLabelCol { * we support a limited subset of the R operators, including '~', '.', ':', '+', and '-'. Also see * the R formula docs here: http://stat.ethz.ch/R-manual/R-patched/library/stats/html/formula.html */ +@Since("1.5.0") @Experimental class RFormula(override val uid: String) extends Estimator[RFormulaModel] with RFormulaBase { + @Since("1.5.0") def this() = this(Identifiable.randomUID("rFormula")) /** @@ -62,15 +64,19 @@ class RFormula(override val uid: String) extends Estimator[RFormulaModel] with R * @group setParam * @param value an R formula in string form (e.g. "y ~ x + z") */ + @Since("1.5.0") def setFormula(value: String): this.type = set(formula, value) /** @group getParam */ + @Since("1.5.0") def getFormula: String = $(formula) /** @group setParam */ + @Since("1.5.0") def setFeaturesCol(value: String): this.type = set(featuresCol, value) /** @group setParam */ + @Since("1.5.0") def setLabelCol(value: String): this.type = set(labelCol, value) /** Whether the formula specifies fitting an intercept. */ @@ -79,6 +85,7 @@ class RFormula(override val uid: String) extends Estimator[RFormulaModel] with R RFormulaParser.parse($(formula)).hasIntercept } + @Since("1.5.0") override def fit(dataset: DataFrame): RFormulaModel = { require(isDefined(formula), "Formula must be defined first.") val parsedFormula = RFormulaParser.parse($(formula)) @@ -145,6 +152,7 @@ class RFormula(override val uid: String) extends Estimator[RFormulaModel] with R } // optimistic schema; does not contain any ML attributes + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { if (hasLabelCol(schema)) { StructType(schema.fields :+ StructField($(featuresCol), new VectorUDT, true)) @@ -154,8 +162,10 @@ class RFormula(override val uid: String) extends Estimator[RFormulaModel] with R } } + @Since("1.5.0") override def copy(extra: ParamMap): RFormula = defaultCopy(extra) + @Since("1.5.0") override def toString: String = s"RFormula(${get(formula)}) (uid=$uid)" } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala index f6d0b0c0e9e75..e368810b77fab 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala @@ -55,26 +55,33 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with * Standardizes features by removing the mean and scaling to unit variance using column summary * statistics on the samples in the training set. */ +@Since("1.2.0") @Experimental class StandardScaler(override val uid: String) extends Estimator[StandardScalerModel] with StandardScalerParams { + @Since("1.4.0") def this() = this(Identifiable.randomUID("stdScal")) setDefault(withMean -> false, withStd -> true) /** @group setParam */ + @Since("1.2.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.2.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.4.0") def setWithMean(value: Boolean): this.type = set(withMean, value) /** @group setParam */ + @Since("1.4.0") def setWithStd(value: Boolean): this.type = set(withStd, value) + @Since("1.2.0") override def fit(dataset: DataFrame): StandardScalerModel = { transformSchema(dataset.schema, logging = true) val input = dataset.select($(inputCol)).map { case Row(v: Vector) => v } @@ -83,6 +90,7 @@ class StandardScaler(override val uid: String) extends Estimator[StandardScalerM copyValues(new StandardScalerModel(uid, scalerModel).setParent(this)) } + @Since("1.3.0") override def transformSchema(schema: StructType): StructType = { val inputType = schema($(inputCol)).dataType require(inputType.isInstanceOf[VectorUDT], @@ -93,6 +101,7 @@ class StandardScaler(override val uid: String) extends Estimator[StandardScalerM StructType(outputFields) } + @Since("1.4.1") override def copy(extra: ParamMap): StandardScaler = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala index 2a79582625e9a..6465e63f3ae9b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala @@ -84,16 +84,20 @@ private[spark] object StopWords { * Note: null values from input array are preserved unless adding null to stopWords explicitly. * @see [[http://en.wikipedia.org/wiki/Stop_words]] */ +@Since("1.5.0") @Experimental class StopWordsRemover(override val uid: String) extends Transformer with HasInputCol with HasOutputCol { + @Since("1.5.0") def this() = this(Identifiable.randomUID("stopWords")) /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @@ -104,9 +108,11 @@ class StopWordsRemover(override val uid: String) val stopWords: StringArrayParam = new StringArrayParam(this, "stopWords", "stop words") /** @group setParam */ + @Since("1.5.0") def setStopWords(value: Array[String]): this.type = set(stopWords, value) /** @group getParam */ + @Since("1.5.0") def getStopWords: Array[String] = $(stopWords) /** @@ -118,13 +124,16 @@ class StopWordsRemover(override val uid: String) "whether to do case-sensitive comparison during filtering") /** @group setParam */ + @Since("1.5.0") def setCaseSensitive(value: Boolean): this.type = set(caseSensitive, value) /** @group getParam */ + @Since("1.5.0") def getCaseSensitive: Boolean = $(caseSensitive) setDefault(stopWords -> StopWords.English, caseSensitive -> false) + @Since("1.5.0") override def transform(dataset: DataFrame): DataFrame = { val outputSchema = transformSchema(dataset.schema) val t = if ($(caseSensitive)) { @@ -144,6 +153,7 @@ class StopWordsRemover(override val uid: String) dataset.select(col("*"), t(col($(inputCol))).as($(outputCol), metadata)) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { val inputType = schema($(inputCol)).dataType require(inputType.sameType(ArrayType(StringType)), @@ -153,5 +163,6 @@ class StopWordsRemover(override val uid: String) StructType(outputFields) } + @Since("1.5.0") override def copy(extra: ParamMap): StopWordsRemover = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala index 486274cd75a14..0011b1683322a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala @@ -62,23 +62,29 @@ private[feature] trait StringIndexerBase extends Params with HasInputCol with Ha * * @see [[IndexToString]] for the inverse transformation */ +@Since("1.4.0") @Experimental class StringIndexer(override val uid: String) extends Estimator[StringIndexerModel] with StringIndexerBase { + @Since("1.4.0") def this() = this(Identifiable.randomUID("strIdx")) /** @group setParam */ + @Since("1.4.0") def setHandleInvalid(value: String): this.type = set(handleInvalid, value) setDefault(handleInvalid, "error") /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) + @Since("1.4.0") override def fit(dataset: DataFrame): StringIndexerModel = { val counts = dataset.select(col($(inputCol)).cast(StringType)) .map(_.getString(0)) @@ -87,10 +93,12 @@ class StringIndexer(override val uid: String) extends Estimator[StringIndexerMod copyValues(new StringIndexerModel(uid, labels).setParent(this)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.4.1") override def copy(extra: ParamMap): StringIndexer = defaultCopy(extra) } @@ -104,11 +112,13 @@ class StringIndexer(override val uid: String) extends Estimator[StringIndexerMod * * @param labels Ordered list of labels, corresponding to indices to be assigned. */ +@Since("1.4.0") @Experimental class StringIndexerModel ( override val uid: String, val labels: Array[String]) extends Model[StringIndexerModel] with StringIndexerBase { + @Since("1.4.0") def this(labels: Array[String]) = this(Identifiable.randomUID("strIdx"), labels) private val labelToIndex: OpenHashMap[String, Double] = { @@ -123,15 +133,19 @@ class StringIndexerModel ( } /** @group setParam */ + @Since("1.4.0") def setHandleInvalid(value: String): this.type = set(handleInvalid, value) setDefault(handleInvalid, "error") /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) + @Since("1.4.0") override def transform(dataset: DataFrame): DataFrame = { if (!dataset.schema.fieldNames.contains($(inputCol))) { logInfo(s"Input column ${$(inputCol)} does not exist during transformation. " + @@ -163,6 +177,7 @@ class StringIndexerModel ( indexer(dataset($(inputCol)).cast(StringType)).as($(outputCol), metadata)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { if (schema.fieldNames.contains($(inputCol))) { validateAndTransformSchema(schema) @@ -172,6 +187,7 @@ class StringIndexerModel ( } } + @Since("1.4.1") override def copy(extra: ParamMap): StringIndexerModel = { val copied = new StringIndexerModel(uid, labels) copyValues(copied, extra).setParent(parent) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala index 248288ca73e99..70895519a492b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala @@ -29,21 +29,27 @@ import org.apache.spark.sql.types.{ArrayType, DataType, StringType} * * @see [[RegexTokenizer]] */ +@Since("1.2.0") @Experimental class Tokenizer(override val uid: String) extends UnaryTransformer[String, Seq[String], Tokenizer] { + @Since("1.4.0") def this() = this(Identifiable.randomUID("tok")) + @Since("1.2.0") override protected def createTransformFunc: String => Seq[String] = { _.toLowerCase.split("\\s") } + @Since("1.2.0") override protected def validateInputType(inputType: DataType): Unit = { require(inputType == StringType, s"Input type must be string type but got $inputType.") } + @Since("1.2.0") override protected def outputDataType: DataType = new ArrayType(StringType, true) + @Since("1.4.1") override def copy(extra: ParamMap): Tokenizer = defaultCopy(extra) } @@ -54,10 +60,12 @@ class Tokenizer(override val uid: String) extends UnaryTransformer[String, Seq[S * Optional parameters also allow filtering tokens using a minimal length. * It returns an array of strings that can be empty. */ +@Since("1.4.0") @Experimental class RegexTokenizer(override val uid: String) extends UnaryTransformer[String, Seq[String], RegexTokenizer] { + @Since("1.4.0") def this() = this(Identifiable.randomUID("regexTok")) /** @@ -69,9 +77,11 @@ class RegexTokenizer(override val uid: String) ParamValidators.gtEq(0)) /** @group setParam */ + @Since("1.4.0") def setMinTokenLength(value: Int): this.type = set(minTokenLength, value) /** @group getParam */ + @Since("1.4.0") def getMinTokenLength: Int = $(minTokenLength) /** @@ -82,9 +92,11 @@ class RegexTokenizer(override val uid: String) val gaps: BooleanParam = new BooleanParam(this, "gaps", "Set regex to match gaps or tokens") /** @group setParam */ + @Since("1.4.0") def setGaps(value: Boolean): this.type = set(gaps, value) /** @group getParam */ + @Since("1.4.0") def getGaps: Boolean = $(gaps) /** @@ -95,13 +107,16 @@ class RegexTokenizer(override val uid: String) val pattern: Param[String] = new Param(this, "pattern", "regex pattern used for tokenizing") /** @group setParam */ + @Since("1.4.0") def setPattern(value: String): this.type = set(pattern, value) /** @group getParam */ + @Since("1.4.0") def getPattern: String = $(pattern) setDefault(minTokenLength -> 1, gaps -> true, pattern -> "\\s+") + @Since("1.4.0") override protected def createTransformFunc: String => Seq[String] = { str => val re = $(pattern).r val tokens = if ($(gaps)) re.split(str).toSeq else re.findAllIn(str).toSeq @@ -109,11 +124,14 @@ class RegexTokenizer(override val uid: String) tokens.filter(_.length >= minLength) } + @Since("1.4.0") override protected def validateInputType(inputType: DataType): Unit = { require(inputType == StringType, s"Input type must be string type but got $inputType.") } + @Since("1.4.0") override protected def outputDataType: DataType = new ArrayType(StringType, true) + @Since("1.4.1") override def copy(extra: ParamMap): RegexTokenizer = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala index 086917fa680f8..b2555a7bdb621 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala @@ -35,18 +35,23 @@ import org.apache.spark.sql.types._ * :: Experimental :: * A feature transformer that merges multiple columns into a vector column. */ +@Since("1.4.0") @Experimental class VectorAssembler(override val uid: String) extends Transformer with HasInputCols with HasOutputCol { + @Since("1.4.0") def this() = this(Identifiable.randomUID("vecAssembler")) /** @group setParam */ + @Since("1.4.0") def setInputCols(value: Array[String]): this.type = set(inputCols, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) + @Since("1.4.0") override def transform(dataset: DataFrame): DataFrame = { // Schema transformation. val schema = dataset.schema @@ -103,6 +108,7 @@ class VectorAssembler(override val uid: String) dataset.select(col("*"), assembleFunc(struct(args : _*)).as($(outputCol), metadata)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { val inputColNames = $(inputCols) val outputColName = $(outputCol) @@ -119,6 +125,7 @@ class VectorAssembler(override val uid: String) StructType(schema.fields :+ new StructField(outputColName, new VectorUDT, true)) } + @Since("1.4.1") override def copy(extra: ParamMap): VectorAssembler = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala index 52e0599e38d83..dd58f99e3dee9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala @@ -91,21 +91,27 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu * - Add warning if a categorical feature has only 1 category. * - Add option for allowing unknown categories. */ +@Since("1.4.0") @Experimental class VectorIndexer(override val uid: String) extends Estimator[VectorIndexerModel] with VectorIndexerParams { + @Since("1.4.0") def this() = this(Identifiable.randomUID("vecIdx")) /** @group setParam */ + @Since("1.4.0") def setMaxCategories(value: Int): this.type = set(maxCategories, value) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) + @Since("1.4.0") override def fit(dataset: DataFrame): VectorIndexerModel = { transformSchema(dataset.schema, logging = true) val firstRow = dataset.select($(inputCol)).take(1) @@ -123,6 +129,7 @@ class VectorIndexer(override val uid: String) extends Estimator[VectorIndexerMod copyValues(model) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { // We do not transfer feature metadata since we do not know what types of features we will // produce in transform(). @@ -133,6 +140,7 @@ class VectorIndexer(override val uid: String) extends Estimator[VectorIndexerMod SchemaUtils.appendColumn(schema, $(outputCol), dataType) } + @Since("1.4.1") override def copy(extra: ParamMap): VectorIndexer = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala index fb3387d4aa9be..b6f6e84b94799 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala @@ -40,10 +40,12 @@ import org.apache.spark.sql.types.StructType * The output vector will order features with the selected indices first (in the order given), * followed by the selected names (in the order given). */ +@Since("1.5.0") @Experimental final class VectorSlicer(override val uid: String) extends Transformer with HasInputCol with HasOutputCol { + @Since("1.5.0") def this() = this(Identifiable.randomUID("vectorSlicer")) /** @@ -59,9 +61,11 @@ final class VectorSlicer(override val uid: String) setDefault(indices -> Array.empty[Int]) /** @group getParam */ + @Since("1.5.0") def getIndices: Array[Int] = $(indices) /** @group setParam */ + @Since("1.5.0") def setIndices(value: Array[Int]): this.type = set(indices, value) /** @@ -78,22 +82,28 @@ final class VectorSlicer(override val uid: String) setDefault(names -> Array.empty[String]) /** @group getParam */ + @Since("1.5.0") def getNames: Array[String] = $(names) /** @group setParam */ + @Since("1.5.0") def setNames(value: Array[String]): this.type = set(names, value) /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) + @Since("1.5.0") override def validateParams(): Unit = { require($(indices).length > 0 || $(names).length > 0, s"VectorSlicer requires that at least one feature be selected.") } + @Since("1.5.0") override def transform(dataset: DataFrame): DataFrame = { // Validity checks transformSchema(dataset.schema) @@ -138,6 +148,7 @@ final class VectorSlicer(override val uid: String) indFeatures ++ nameFeatures } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { SchemaUtils.checkColumnType(schema, $(inputCol), new VectorUDT) @@ -150,6 +161,7 @@ final class VectorSlicer(override val uid: String) StructType(outputFields) } + @Since("1.5.0") override def copy(extra: ParamMap): VectorSlicer = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala index 9edab3af913ca..1bc963007bd19 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala @@ -91,35 +91,46 @@ private[feature] trait Word2VecBase extends Params * Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further * natural language processing or machine learning process. */ +@Since("1.4.0") @Experimental final class Word2Vec(override val uid: String) extends Estimator[Word2VecModel] with Word2VecBase { + @Since("1.4.0") def this() = this(Identifiable.randomUID("w2v")) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.4.0") def setVectorSize(value: Int): this.type = set(vectorSize, value) /** @group setParam */ + @Since("1.4.0") def setStepSize(value: Double): this.type = set(stepSize, value) /** @group setParam */ + @Since("1.4.0") def setNumPartitions(value: Int): this.type = set(numPartitions, value) /** @group setParam */ + @Since("1.4.0") def setMaxIter(value: Int): this.type = set(maxIter, value) /** @group setParam */ + @Since("1.4.0") def setSeed(value: Long): this.type = set(seed, value) /** @group setParam */ + @Since("1.4.0") def setMinCount(value: Int): this.type = set(minCount, value) + @Since("1.4.0") override def fit(dataset: DataFrame): Word2VecModel = { transformSchema(dataset.schema, logging = true) val input = dataset.select($(inputCol)).map(_.getAs[Seq[String]](0)) @@ -134,10 +145,12 @@ final class Word2Vec(override val uid: String) extends Estimator[Word2VecModel] copyValues(new Word2VecModel(uid, wordVectors).setParent(this)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.4.1") override def copy(extra: ParamMap): Word2Vec = defaultCopy(extra) } From a4b17036c24be1b32a6ac115071a6465f1ce7008 Mon Sep 17 00:00:00 2001 From: Martin Brown Date: Wed, 16 Sep 2015 09:29:19 -0700 Subject: [PATCH 2/5] annotating constructor and constructor public variables --- .../main/scala/org/apache/spark/ml/feature/Binarizer.scala | 2 +- .../main/scala/org/apache/spark/ml/feature/Bucketizer.scala | 2 +- .../scala/org/apache/spark/ml/feature/CountVectorizer.scala | 5 +++-- mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala | 2 +- .../org/apache/spark/ml/feature/ElementwiseProduct.scala | 2 +- .../main/scala/org/apache/spark/ml/feature/HashingTF.scala | 2 +- mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala | 2 +- .../scala/org/apache/spark/ml/feature/MinMaxScaler.scala | 2 +- mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala | 2 +- .../main/scala/org/apache/spark/ml/feature/Normalizer.scala | 2 +- .../scala/org/apache/spark/ml/feature/OneHotEncoder.scala | 2 +- mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala | 2 +- .../org/apache/spark/ml/feature/PolynomialExpansion.scala | 2 +- .../main/scala/org/apache/spark/ml/feature/RFormula.scala | 2 +- .../scala/org/apache/spark/ml/feature/StandardScaler.scala | 2 +- .../scala/org/apache/spark/ml/feature/StopWordsRemover.scala | 2 +- .../scala/org/apache/spark/ml/feature/StringIndexer.scala | 2 +- .../main/scala/org/apache/spark/ml/feature/Tokenizer.scala | 4 ++-- .../scala/org/apache/spark/ml/feature/VectorAssembler.scala | 2 +- .../scala/org/apache/spark/ml/feature/VectorIndexer.scala | 2 +- .../scala/org/apache/spark/ml/feature/VectorSlicer.scala | 2 +- .../main/scala/org/apache/spark/ml/feature/Word2Vec.scala | 2 +- 22 files changed, 25 insertions(+), 24 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala index 7fd668deb8558..87508d56a66c4 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala @@ -33,7 +33,7 @@ import org.apache.spark.sql.types.{DoubleType, StructType} */ @Since("1.4.0") @Experimental -final class Binarizer(override val uid: String) +final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol { @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala index 334197be872ab..56c9fc85235b7 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala @@ -36,7 +36,7 @@ import org.apache.spark.sql.types.{DoubleType, StructField, StructType} */ @Since("1.4.0") @Experimental -final class Bucketizer(override val uid: String) +final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Model[Bucketizer] with HasInputCol with HasOutputCol { @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala index 182cfd27ab1fb..6314c8fb5aa7d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala @@ -105,7 +105,7 @@ private[feature] trait CountVectorizerParams extends Params with HasInputCol wit */ @Since("1.5.0") @Experimental -class CountVectorizer(override val uid: String) +class CountVectorizer @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Estimator[CountVectorizerModel] with CountVectorizerParams { @Since("1.5.0") @@ -188,7 +188,8 @@ class CountVectorizer(override val uid: String) */ @Since("1.5.0") @Experimental -class CountVectorizerModel(override val uid: String, val vocabulary: Array[String]) +class CountVectorizerModel @Since("1.5.0") (@Since("1.5.0") override val uid: +String, @Since("1.5.0") val vocabulary: Array[String]) extends Model[CountVectorizerModel] with CountVectorizerParams { @Since("1.5.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala index 8d525fab7d5ef..8764ed4657cbf 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala @@ -37,7 +37,7 @@ import org.apache.spark.sql.types.DataType */ @Since("1.5.0") @Experimental -class DCT(override val uid: String) +class DCT @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends UnaryTransformer[Vector, Vector, DCT] { @Since("1.5.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala index 69c947186540e..16dc9b3fb1581 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala @@ -33,7 +33,7 @@ import org.apache.spark.sql.types.DataType */ @Since("1.4.0") @Experimental -class ElementwiseProduct(override val uid: String) +class ElementwiseProduct @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[Vector, Vector, ElementwiseProduct] { @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala index eb2eac5a7cb22..dc66d5d398fef 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.types.{ArrayType, StructType} */ @Since("1.2.0") @Experimental -class HashingTF(override val uid: String) extends Transformer with HasInputCol with HasOutputCol { +class HashingTF @Since("1.2.0") (@Since("1.4.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol { @Since("1.4.0") def this() = this(Identifiable.randomUID("hashingTF")) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala index 6dfaeccf2c5f9..2b52bcb26e868 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala @@ -61,7 +61,7 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol */ @Since("1.4.0") @Experimental -final class IDF(override val uid: String) extends Estimator[IDFModel] with IDFBase { +final class IDF @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Estimator[IDFModel] with IDFBase { @Since("1.4.0") def this() = this(Identifiable.randomUID("idf")) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala index 96ea993a418b1..3ace1b9f5d661 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala @@ -85,7 +85,7 @@ private[feature] trait MinMaxScalerParams extends Params with HasInputCol with H */ @Since("1.5.0") @Experimental -class MinMaxScaler(override val uid: String) +class MinMaxScaler @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Estimator[MinMaxScalerModel] with MinMaxScalerParams { @Since("1.5.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala index 6fc3425ecfed6..cc1ab425d988f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala @@ -36,7 +36,7 @@ import org.apache.spark.sql.types.{ArrayType, DataType, StringType} */ @Since("1.5.0") @Experimental -class NGram(override val uid: String) +class NGram @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends UnaryTransformer[Seq[String], Seq[String], NGram] { @Since("1.5.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala index cfb2388a39513..fa3c7d0f2529e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala @@ -31,7 +31,7 @@ import org.apache.spark.sql.types.DataType */ @Since("1.4.0") @Experimental -class Normalizer(override val uid: String) extends UnaryTransformer[Vector, Vector, Normalizer] { +class Normalizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[Vector, Vector, Normalizer] { @Since("1.4.0") def this() = this(Identifiable.randomUID("normalizer")) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala index e434038d06569..b40bc3bca2864 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala @@ -44,7 +44,7 @@ import org.apache.spark.sql.types.{DoubleType, StructType} */ @Since("1.4.0") @Experimental -class OneHotEncoder(override val uid: String) extends Transformer +class OneHotEncoder @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol { @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala index 1fad233df4c4a..68b4012ef51a8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala @@ -50,7 +50,7 @@ private[feature] trait PCAParams extends Params with HasInputCol with HasOutputC */ @Since("1.5.0") @Experimental -class PCA (override val uid: String) extends Estimator[PCAModel] with PCAParams { +class PCA @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Estimator[PCAModel] with PCAParams { @Since("1.5.0") def this() = this(Identifiable.randomUID("pca")) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala index 66a1939e2e106..e9e0580c29203 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala @@ -36,7 +36,7 @@ import org.apache.spark.sql.types.DataType */ @Since("1.4.0") @Experimental -class PolynomialExpansion(override val uid: String) +class PolynomialExpansion @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[Vector, Vector, PolynomialExpansion] { @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala index 8339edb9dcfc3..103161dee708e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala @@ -48,7 +48,7 @@ private[feature] trait RFormulaBase extends HasFeaturesCol with HasLabelCol { */ @Since("1.5.0") @Experimental -class RFormula(override val uid: String) extends Estimator[RFormulaModel] with RFormulaBase { +class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Estimator[RFormulaModel] with RFormulaBase { @Since("1.5.0") def this() = this(Identifiable.randomUID("rFormula")) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala index e368810b77fab..81c9cb170bd21 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala @@ -57,7 +57,7 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with */ @Since("1.2.0") @Experimental -class StandardScaler(override val uid: String) extends Estimator[StandardScalerModel] +class StandardScaler @Since("1.2.0") (@Since("1.4.0") override val uid: String) extends Estimator[StandardScalerModel] with StandardScalerParams { @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala index 6465e63f3ae9b..5570cf482636e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala @@ -86,7 +86,7 @@ private[spark] object StopWords { */ @Since("1.5.0") @Experimental -class StopWordsRemover(override val uid: String) +class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol { @Since("1.5.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala index 0011b1683322a..55e1a27373686 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala @@ -64,7 +64,7 @@ private[feature] trait StringIndexerBase extends Params with HasInputCol with Ha */ @Since("1.4.0") @Experimental -class StringIndexer(override val uid: String) extends Estimator[StringIndexerModel] +class StringIndexer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Estimator[StringIndexerModel] with StringIndexerBase { @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala index 70895519a492b..dc6fd37d1ad69 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala @@ -31,7 +31,7 @@ import org.apache.spark.sql.types.{ArrayType, DataType, StringType} */ @Since("1.2.0") @Experimental -class Tokenizer(override val uid: String) extends UnaryTransformer[String, Seq[String], Tokenizer] { +class Tokenizer @Since("1.2.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[String, Seq[String], Tokenizer] { @Since("1.4.0") def this() = this(Identifiable.randomUID("tok")) @@ -62,7 +62,7 @@ class Tokenizer(override val uid: String) extends UnaryTransformer[String, Seq[S */ @Since("1.4.0") @Experimental -class RegexTokenizer(override val uid: String) +class RegexTokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[String, Seq[String], RegexTokenizer] { @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala index b2555a7bdb621..311abc4b9dc05 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala @@ -37,7 +37,7 @@ import org.apache.spark.sql.types._ */ @Since("1.4.0") @Experimental -class VectorAssembler(override val uid: String) +class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer with HasInputCols with HasOutputCol { @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala index dd58f99e3dee9..1024d739e589e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala @@ -93,7 +93,7 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu */ @Since("1.4.0") @Experimental -class VectorIndexer(override val uid: String) extends Estimator[VectorIndexerModel] +class VectorIndexer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Estimator[VectorIndexerModel] with VectorIndexerParams { @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala index b6f6e84b94799..ac2b1782b076c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala @@ -42,7 +42,7 @@ import org.apache.spark.sql.types.StructType */ @Since("1.5.0") @Experimental -final class VectorSlicer(override val uid: String) +final class VectorSlicer @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol { @Since("1.5.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala index 1bc963007bd19..d4ff23d18da84 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala @@ -93,7 +93,7 @@ private[feature] trait Word2VecBase extends Params */ @Since("1.4.0") @Experimental -final class Word2Vec(override val uid: String) extends Estimator[Word2VecModel] with Word2VecBase { +final class Word2Vec @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Estimator[Word2VecModel] with Word2VecBase { @Since("1.4.0") def this() = this(Identifiable.randomUID("w2v")) From e69acd45b80af6dfcaa98df5f7b531f87c4702ad Mon Sep 17 00:00:00 2001 From: Martin Brown Date: Fri, 18 Sep 2015 18:32:28 -0700 Subject: [PATCH 3/5] fixing stylecheck errors (lines >100 chars) --- .../src/main/scala/org/apache/spark/ml/feature/HashingTF.scala | 3 ++- mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala | 3 ++- .../main/scala/org/apache/spark/ml/feature/Normalizer.scala | 3 ++- mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala | 3 ++- .../src/main/scala/org/apache/spark/ml/feature/RFormula.scala | 3 ++- .../scala/org/apache/spark/ml/feature/StandardScaler.scala | 3 ++- .../main/scala/org/apache/spark/ml/feature/StringIndexer.scala | 3 ++- .../src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala | 3 ++- .../main/scala/org/apache/spark/ml/feature/VectorIndexer.scala | 3 ++- .../src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala | 3 ++- 10 files changed, 20 insertions(+), 10 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala index dc66d5d398fef..66d7ea2f96cf7 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala @@ -34,7 +34,8 @@ import org.apache.spark.sql.types.{ArrayType, StructType} */ @Since("1.2.0") @Experimental -class HashingTF @Since("1.2.0") (@Since("1.4.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol { +class HashingTF @Since("1.2.0") (@Since("1.4.0") override val uid: String) extends Transformer + with HasInputCol with HasOutputCol { @Since("1.4.0") def this() = this(Identifiable.randomUID("hashingTF")) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala index 2b52bcb26e868..26967b2cf37e3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala @@ -61,7 +61,8 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol */ @Since("1.4.0") @Experimental -final class IDF @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Estimator[IDFModel] with IDFBase { +final class IDF @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends + Estimator[IDFModel] with IDFBase { @Since("1.4.0") def this() = this(Identifiable.randomUID("idf")) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala index fa3c7d0f2529e..d77dfe598d442 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala @@ -31,7 +31,8 @@ import org.apache.spark.sql.types.DataType */ @Since("1.4.0") @Experimental -class Normalizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[Vector, Vector, Normalizer] { +class Normalizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends + UnaryTransformer[Vector, Vector, Normalizer] { @Since("1.4.0") def this() = this(Identifiable.randomUID("normalizer")) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala index 68b4012ef51a8..2daab44135071 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala @@ -50,7 +50,8 @@ private[feature] trait PCAParams extends Params with HasInputCol with HasOutputC */ @Since("1.5.0") @Experimental -class PCA @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Estimator[PCAModel] with PCAParams { +class PCA @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Estimator[PCAModel] + with PCAParams { @Since("1.5.0") def this() = this(Identifiable.randomUID("pca")) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala index 103161dee708e..27ab27abc7232 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala @@ -48,7 +48,8 @@ private[feature] trait RFormulaBase extends HasFeaturesCol with HasLabelCol { */ @Since("1.5.0") @Experimental -class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Estimator[RFormulaModel] with RFormulaBase { +class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends + Estimator[RFormulaModel] with RFormulaBase { @Since("1.5.0") def this() = this(Identifiable.randomUID("rFormula")) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala index 81c9cb170bd21..36fdf8992107e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala @@ -57,7 +57,8 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with */ @Since("1.2.0") @Experimental -class StandardScaler @Since("1.2.0") (@Since("1.4.0") override val uid: String) extends Estimator[StandardScalerModel] +class StandardScaler @Since("1.2.0") (@Since("1.4.0") override val uid: String) extends + Estimator[StandardScalerModel] with StandardScalerParams { @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala index 55e1a27373686..9824d73fb2db5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala @@ -64,7 +64,8 @@ private[feature] trait StringIndexerBase extends Params with HasInputCol with Ha */ @Since("1.4.0") @Experimental -class StringIndexer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Estimator[StringIndexerModel] +class StringIndexer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends + Estimator[StringIndexerModel] with StringIndexerBase { @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala index dc6fd37d1ad69..dbeb91c7f3dad 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala @@ -31,7 +31,8 @@ import org.apache.spark.sql.types.{ArrayType, DataType, StringType} */ @Since("1.2.0") @Experimental -class Tokenizer @Since("1.2.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[String, Seq[String], Tokenizer] { +class Tokenizer @Since("1.2.0") (@Since("1.4.0") override val uid: String) extends + UnaryTransformer[String, Seq[String], Tokenizer] { @Since("1.4.0") def this() = this(Identifiable.randomUID("tok")) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala index 1024d739e589e..c09327b387bd5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala @@ -93,7 +93,8 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu */ @Since("1.4.0") @Experimental -class VectorIndexer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Estimator[VectorIndexerModel] +class VectorIndexer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends + Estimator[VectorIndexerModel] with VectorIndexerParams { @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala index d4ff23d18da84..80482f288eca3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala @@ -93,7 +93,8 @@ private[feature] trait Word2VecBase extends Params */ @Since("1.4.0") @Experimental -final class Word2Vec @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Estimator[Word2VecModel] with Word2VecBase { +final class Word2Vec @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends + Estimator[Word2VecModel] with Word2VecBase { @Since("1.4.0") def this() = this(Identifiable.randomUID("w2v")) From b0881a30c82da80cbf0facac9c4732b45b1e7b48 Mon Sep 17 00:00:00 2001 From: Martin Brown Date: Mon, 21 Sep 2015 14:44:39 -0700 Subject: [PATCH 4/5] Missing import of custom Since(..) annotation --- .../src/main/scala/org/apache/spark/ml/feature/Binarizer.scala | 2 +- .../src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala | 2 +- .../scala/org/apache/spark/ml/feature/CountVectorizer.scala | 2 +- mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala | 2 +- .../scala/org/apache/spark/ml/feature/ElementwiseProduct.scala | 2 +- .../src/main/scala/org/apache/spark/ml/feature/HashingTF.scala | 2 +- mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala | 2 +- .../main/scala/org/apache/spark/ml/feature/Interaction.scala | 2 +- .../main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala | 2 +- mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala | 2 +- .../src/main/scala/org/apache/spark/ml/feature/Normalizer.scala | 2 +- .../main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala | 2 +- mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala | 2 +- .../scala/org/apache/spark/ml/feature/PolynomialExpansion.scala | 2 +- mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala | 2 +- .../main/scala/org/apache/spark/ml/feature/SQLTransformer.scala | 2 +- .../main/scala/org/apache/spark/ml/feature/StandardScaler.scala | 2 +- .../scala/org/apache/spark/ml/feature/StopWordsRemover.scala | 2 +- .../main/scala/org/apache/spark/ml/feature/StringIndexer.scala | 2 +- .../src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala | 2 +- .../scala/org/apache/spark/ml/feature/VectorAssembler.scala | 2 +- .../main/scala/org/apache/spark/ml/feature/VectorIndexer.scala | 2 +- .../main/scala/org/apache/spark/ml/feature/VectorSlicer.scala | 2 +- mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala | 2 +- 24 files changed, 24 insertions(+), 24 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala index 87508d56a66c4..7e9d4a44273ef 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.Transformer import org.apache.spark.ml.attribute.BinaryAttribute import org.apache.spark.ml.param._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala index 56c9fc85235b7..be525e84b3b40 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml.feature import java.{util => ju} import org.apache.spark.SparkException -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.Model import org.apache.spark.ml.attribute.NominalAttribute import org.apache.spark.ml.param._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala index 6314c8fb5aa7d..f2dd827e5cf4f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.ml.feature -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.broadcast.Broadcast import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala index 8764ed4657cbf..dd8d9f8768562 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala @@ -19,7 +19,7 @@ package org.apache.spark.ml.feature import edu.emory.mathcs.jtransforms.dct._ -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param.BooleanParam import org.apache.spark.ml.util.Identifiable diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala index 16dc9b3fb1581..9db1c61ee415b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param.{ParamMap, Param} import org.apache.spark.ml.util.Identifiable diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala index 66d7ea2f96cf7..490c5a5625864 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.Transformer import org.apache.spark.ml.attribute.AttributeGroup import org.apache.spark.ml.param.{IntParam, ParamMap, ParamValidators} diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala index 26967b2cf37e3..b5d6f2c98973e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml._ import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala index 37f7862476cfe..1cfab19e1553d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml.feature import scala.collection.mutable.ArrayBuilder import org.apache.spark.SparkException -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.attribute._ import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala index 3ace1b9f5d661..b43b121d4183c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} import org.apache.spark.ml.param.{ParamMap, DoubleParam, Params} import org.apache.spark.ml.util.Identifiable diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala index cc1ab425d988f..7ad9f6809e556 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param._ import org.apache.spark.ml.util.Identifiable diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala index d77dfe598d442..9d83aa0957a07 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param.{DoubleParam, ParamValidators} import org.apache.spark.ml.util.Identifiable diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala index b40bc3bca2864..ed218df267c9d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.Transformer import org.apache.spark.ml.attribute._ import org.apache.spark.ml.param._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala index 2daab44135071..37f15a77f33d4 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml._ import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala index e9e0580c29203..4ffe7b63ee997 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala @@ -19,7 +19,7 @@ package org.apache.spark.ml.feature import scala.collection.mutable -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param.{ParamMap, IntParam, ParamValidators} import org.apache.spark.ml.util.Identifiable diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala index 27ab27abc7232..74c9d75348a10 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml.feature import scala.collection.mutable import scala.collection.mutable.ArrayBuffer -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.attribute.AttributeGroup import org.apache.spark.ml.{Estimator, Model, Pipeline, PipelineModel, PipelineStage, Transformer} import org.apache.spark.ml.param.{Param, ParamMap} diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala index 95e4305638730..c7e1a956b169c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala @@ -18,7 +18,7 @@ package org.apache.spark.ml.feature import org.apache.spark.SparkContext -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.param.{ParamMap, Param} import org.apache.spark.ml.Transformer import org.apache.spark.ml.util.Identifiable diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala index 36fdf8992107e..b9002f7c605a7 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml._ import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala index 5570cf482636e..19865777ed170 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.Transformer import org.apache.spark.ml.param.{BooleanParam, ParamMap, StringArrayParam} import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala index 9824d73fb2db5..004038ab01628 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala @@ -18,7 +18,7 @@ package org.apache.spark.ml.feature import org.apache.spark.SparkException -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.attribute.{Attribute, NominalAttribute} import org.apache.spark.ml.param._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala index dbeb91c7f3dad..1e2772650b3ee 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param._ import org.apache.spark.ml.util.Identifiable diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala index 311abc4b9dc05..211965d3fbdcb 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml.feature import scala.collection.mutable.ArrayBuilder import org.apache.spark.SparkException -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.Transformer import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NumericAttribute, UnresolvedAttribute} import org.apache.spark.ml.param.ParamMap diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala index c09327b387bd5..e58fd9f7da6bc 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala @@ -22,7 +22,7 @@ import java.util.{Map => JMap} import scala.collection.JavaConverters._ -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.attribute._ import org.apache.spark.ml.param.{IntParam, ParamMap, ParamValidators, Params} diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala index ac2b1782b076c..8ce24b6edfe68 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.Transformer import org.apache.spark.ml.attribute.{Attribute, AttributeGroup} import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala index 80482f288eca3..298d97340e8c2 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.SparkContext import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.param._ From b2cc944273e60d1e61322feb6949754aeecf8ddc Mon Sep 17 00:00:00 2001 From: Martin Brown Date: Thu, 5 Nov 2015 18:22:10 -0800 Subject: [PATCH 5/5] correcting version for default constructors --- .../src/main/scala/org/apache/spark/ml/feature/HashingTF.scala | 2 +- .../main/scala/org/apache/spark/ml/feature/StandardScaler.scala | 2 +- .../src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala index 490c5a5625864..e223cbb83bbe0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala @@ -37,7 +37,7 @@ import org.apache.spark.sql.types.{ArrayType, StructType} class HashingTF @Since("1.2.0") (@Since("1.4.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol { - @Since("1.4.0") + @Since("1.2.0") def this() = this(Identifiable.randomUID("hashingTF")) /** @group setParam */ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala index b9002f7c605a7..d70c156dc0560 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala @@ -61,7 +61,7 @@ class StandardScaler @Since("1.2.0") (@Since("1.4.0") override val uid: String) Estimator[StandardScalerModel] with StandardScalerParams { - @Since("1.4.0") + @Since("1.2.0") def this() = this(Identifiable.randomUID("stdScal")) setDefault(withMean -> false, withStd -> true) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala index 1e2772650b3ee..1f8994db25987 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.types.{ArrayType, DataType, StringType} class Tokenizer @Since("1.2.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[String, Seq[String], Tokenizer] { - @Since("1.4.0") + @Since("1.2.0") def this() = this(Identifiable.randomUID("tok")) @Since("1.2.0")