From c8cf4533a168849ed62248c57da2d2195b482ea6 Mon Sep 17 00:00:00 2001 From: Nick Pentreath Date: Fri, 3 Jun 2016 15:54:12 -0700 Subject: [PATCH 1/6] Add missing @Since annotations to ml.feature package --- .../apache/spark/ml/feature/Binarizer.scala | 8 +++++- .../apache/spark/ml/feature/Bucketizer.scala | 9 ++++++- .../spark/ml/feature/ChiSqSelector.scala | 15 +++++++++-- .../spark/ml/feature/CountVectorizer.scala | 20 ++++++++++++-- .../org/apache/spark/ml/feature/DCT.scala | 6 ++++- .../spark/ml/feature/ElementwiseProduct.scala | 6 ++++- .../apache/spark/ml/feature/HashingTF.scala | 10 ++++++- .../org/apache/spark/ml/feature/IDF.scala | 14 +++++++--- .../apache/spark/ml/feature/Interaction.scala | 4 +-- .../spark/ml/feature/MaxAbsScaler.scala | 16 ++++++++---- .../spark/ml/feature/MinMaxScaler.scala | 19 +++++++++++--- .../org/apache/spark/ml/feature/NGram.scala | 6 ++++- .../apache/spark/ml/feature/Normalizer.scala | 6 ++++- .../spark/ml/feature/OneHotEncoder.scala | 7 ++++- .../org/apache/spark/ml/feature/PCA.scala | 19 ++++++++++---- .../ml/feature/PolynomialExpansion.scala | 6 ++++- .../ml/feature/QuantileDiscretizer.scala | 8 +++++- .../apache/spark/ml/feature/RFormula.scala | 11 ++++++-- .../spark/ml/feature/SQLTransformer.scala | 2 +- .../spark/ml/feature/StandardScaler.scala | 20 ++++++++++---- .../spark/ml/feature/StopWordsRemover.scala | 10 ++++++- .../spark/ml/feature/StringIndexer.scala | 26 +++++++++++++++---- .../apache/spark/ml/feature/Tokenizer.scala | 16 ++++++++++-- .../spark/ml/feature/VectorAssembler.scala | 6 ++++- .../spark/ml/feature/VectorIndexer.scala | 20 ++++++++++---- .../spark/ml/feature/VectorSlicer.scala | 10 ++++++- .../apache/spark/ml/feature/Word2Vec.scala | 25 +++++++++++++++--- python/pyspark/ml/feature.py | 2 +- 28 files changed, 267 insertions(+), 60 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala index 318c8b8b2f7d6..a9156a44a99ce 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala @@ -35,9 +35,11 @@ import org.apache.spark.sql.types._ * Binarize a column of continuous features given a threshold. */ @Experimental -final class Binarizer(override val uid: String) +@Since("1.4.0") +final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("binarizer")) /** @@ -51,17 +53,21 @@ final class Binarizer(override val uid: String) new DoubleParam(this, "threshold", "threshold used to binarize continuous features") /** @group getParam */ + @Since("1.4.0") def getThreshold: Double = $(threshold) /** @group setParam */ + @Since("1.4.0") def setThreshold(value: Double): this.type = set(threshold, value) setDefault(threshold -> 0.0) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala index ff988cc815160..2310659c89276 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala @@ -35,9 +35,11 @@ import org.apache.spark.sql.types.{DoubleType, StructField, StructType} * `Bucketizer` maps a column of continuous features to a column of feature buckets. */ @Experimental -final class Bucketizer(override val uid: String) +@Since("1.4.0") +final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Model[Bucketizer] with HasInputCol with HasOutputCol with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("bucketizer")) /** @@ -57,15 +59,19 @@ final class Bucketizer(override val uid: String) Bucketizer.checkSplits) /** @group getParam */ + @Since("1.4.0") def getSplits: Array[Double] = $(splits) /** @group setParam */ + @Since("1.4.0") def setSplits(value: Array[Double]): this.type = set(splits, value) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") @@ -96,6 +102,7 @@ final class Bucketizer(override val uid: String) } } +@Since("1.6.0") object Bucketizer extends DefaultParamsReadable[Bucketizer] { /** We require splits to be of length >= 3 and to be in strictly increasing order. */ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala index e73a8f5d66087..c7c6926e95e24 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala @@ -62,21 +62,27 @@ private[feature] trait ChiSqSelectorParams extends Params * categorical label. */ @Experimental -final class ChiSqSelector(override val uid: String) +@Since("1.6.0") +final class ChiSqSelector @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Estimator[ChiSqSelectorModel] with ChiSqSelectorParams with DefaultParamsWritable { + @Since("1.6.0") def this() = this(Identifiable.randomUID("chiSqSelector")) /** @group setParam */ + @Since("1.6.0") def setNumTopFeatures(value: Int): this.type = set(numTopFeatures, value) /** @group setParam */ + @Since("1.6.0") def setFeaturesCol(value: String): this.type = set(featuresCol, value) /** @group setParam */ + @Since("1.6.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.6.0") def setLabelCol(value: String): this.type = set(labelCol, value) @Since("2.0.0") @@ -112,23 +118,28 @@ object ChiSqSelector extends DefaultParamsReadable[ChiSqSelector] { * Model fitted by [[ChiSqSelector]]. */ @Experimental +@Since("1.6.0") final class ChiSqSelectorModel private[ml] ( - override val uid: String, + @Since("1.6.0") override val uid: String, private val chiSqSelector: feature.ChiSqSelectorModel) extends Model[ChiSqSelectorModel] with ChiSqSelectorParams with MLWritable { import ChiSqSelectorModel._ /** list of indices to select (filter). Must be ordered asc */ + @Since("1.6.0") val selectedFeatures: Array[Int] = chiSqSelector.selectedFeatures /** @group setParam */ + @Since("1.6.0") def setFeaturesCol(value: String): this.type = set(featuresCol, value) /** @group setParam */ + @Since("1.6.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.6.0") def setLabelCol(value: String): this.type = set(labelCol, value) @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala index 272567d09cda7..1c217512be99a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala @@ -120,27 +120,35 @@ private[feature] trait CountVectorizerParams extends Params with HasInputCol wit * Extracts a vocabulary from document collections and generates a [[CountVectorizerModel]]. */ @Experimental -class CountVectorizer(override val uid: String) +@Since("1.5.0") +class CountVectorizer @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Estimator[CountVectorizerModel] with CountVectorizerParams with DefaultParamsWritable { + @Since("1.5.0") def this() = this(Identifiable.randomUID("cntVec")) /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.5.0") def setVocabSize(value: Int): this.type = set(vocabSize, value) /** @group setParam */ + @Since("1.5.0") def setMinDF(value: Double): this.type = set(minDF, value) /** @group setParam */ + @Since("1.5.0") def setMinTF(value: Double): this.type = set(minTF, value) /** @group setParam */ + @Since("2.0.0") def setBinary(value: Boolean): this.type = set(binary, value) @Since("2.0.0") @@ -196,26 +204,34 @@ object CountVectorizer extends DefaultParamsReadable[CountVectorizer] { * @param vocabulary An Array over terms. Only the terms in the vocabulary will be counted. */ @Experimental -class CountVectorizerModel(override val uid: String, val vocabulary: Array[String]) +@Since("1.5.0") +class CountVectorizerModel( + @Since("1.5.0") override val uid: String, + @Since("1.5.0") val vocabulary: Array[String]) extends Model[CountVectorizerModel] with CountVectorizerParams with MLWritable { import CountVectorizerModel._ + @Since("1.5.0") def this(vocabulary: Array[String]) = { this(Identifiable.randomUID("cntVecModel"), vocabulary) set(vocabSize, vocabulary.length) } /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.5.0") def setMinTF(value: Double): this.type = set(minTF, value) /** @group setParam */ + @Since("2.0.0") def setBinary(value: Boolean): this.type = set(binary, value) /** Dictionary created from [[vocabulary]] and its indices, broadcast once for [[transform()]] */ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala index 301358ef1226c..46a3de0926eea 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala @@ -36,9 +36,11 @@ import org.apache.spark.sql.types.DataType * More information on [[https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia]]. */ @Experimental -class DCT(override val uid: String) +@Since("1.5.0") +class DCT @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends UnaryTransformer[Vector, Vector, DCT] with DefaultParamsWritable { + @Since("1.5.0") def this() = this(Identifiable.randomUID("dct")) /** @@ -50,9 +52,11 @@ class DCT(override val uid: String) this, "inverse", "Set transformer to perform inverse DCT") /** @group setParam */ + @Since("1.5.0") def setInverse(value: Boolean): this.type = set(inverse, value) /** @group getParam */ + @Since("1.5.0") def getInverse: Boolean = $(inverse) setDefault(inverse -> false) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala index 9d2e60fa3f1e4..abef6c0990ea9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala @@ -33,9 +33,11 @@ import org.apache.spark.sql.types.DataType * multiplier. */ @Experimental -class ElementwiseProduct(override val uid: String) +@Since("1.4.0") +class ElementwiseProduct @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[Vector, Vector, ElementwiseProduct] with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("elemProd")) /** @@ -45,9 +47,11 @@ class ElementwiseProduct(override val uid: String) val scalingVec: Param[Vector] = new Param(this, "scalingVec", "vector for hadamard product") /** @group setParam */ + @Since("1.4.0") def setScalingVec(value: Vector): this.type = set(scalingVec, value) /** @group getParam */ + @Since("1.4.0") def getScalingVec: Vector = getOrDefault(scalingVec) override protected def createTransformFunc: Vector => Vector = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala index 94e1825ba61e4..269510b5c6cb4 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala @@ -38,15 +38,19 @@ import org.apache.spark.sql.types.{ArrayType, StructType} * otherwise the features will not be mapped evenly to the columns. */ @Experimental -class HashingTF(override val uid: String) +@Since("1.2.0") +class HashingTF @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable { + @Since("1.2.0") def this() = this(Identifiable.randomUID("hashingTF")) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @@ -71,15 +75,19 @@ class HashingTF(override val uid: String) setDefault(numFeatures -> (1 << 18), binary -> false) /** @group getParam */ + @Since("1.2.0") def getNumFeatures: Int = $(numFeatures) /** @group setParam */ + @Since("1.2.0") def setNumFeatures(value: Int): this.type = set(numFeatures, value) /** @group getParam */ + @Since("2.0.0") def getBinary: Boolean = $(binary) /** @group setParam */ + @Since("2.0.0") def setBinary(value: Boolean): this.type = set(binary, value) @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala index 08beda6d7515d..38d0da1c1d7fa 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala @@ -64,18 +64,23 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol * Compute the Inverse Document Frequency (IDF) given a collection of documents. */ @Experimental -final class IDF(override val uid: String) extends Estimator[IDFModel] with IDFBase - with DefaultParamsWritable { +@Since("1.4.0") +final class IDF @Since("1.4.0") (@Since("1.4.0") override val uid: String) + extends Estimator[IDFModel] with IDFBase with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("idf")) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.4.0") def setMinDocFreq(value: Int): this.type = set(minDocFreq, value) @Since("2.0.0") @@ -107,17 +112,20 @@ object IDF extends DefaultParamsReadable[IDF] { * Model fitted by [[IDF]]. */ @Experimental +@Since("1.4.0") class IDFModel private[ml] ( - override val uid: String, + @Since("1.4.0") override val uid: String, idfModel: feature.IDFModel) extends Model[IDFModel] with IDFBase with MLWritable { import IDFModel._ /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala index fa65ff987917e..dca28b5c5d34f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala @@ -42,9 +42,9 @@ import org.apache.spark.sql.types._ * `Vector(6, 8)` if all input features were numeric. If the first feature was instead nominal * with four categories, the output would then be `Vector(0, 0, 0, 0, 3, 4, 0, 0)`. */ -@Since("1.6.0") @Experimental -class Interaction @Since("1.6.0") (override val uid: String) extends Transformer +@Since("1.6.0") +class Interaction @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Transformer with HasInputCols with HasOutputCol with DefaultParamsWritable { @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala index 7298a18ff83b5..006d6413cbeb6 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala @@ -54,16 +54,19 @@ private[feature] trait MaxAbsScalerParams extends Params with HasInputCol with H * any sparsity. */ @Experimental -class MaxAbsScaler @Since("2.0.0") (override val uid: String) +@Since("2.0.0") +class MaxAbsScaler @Since("2.0.0") (@Since("2.0.0") override val uid: String) extends Estimator[MaxAbsScalerModel] with MaxAbsScalerParams with DefaultParamsWritable { @Since("2.0.0") def this() = this(Identifiable.randomUID("maxAbsScal")) /** @group setParam */ + @Since("2.0.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("2.0.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") @@ -88,7 +91,7 @@ class MaxAbsScaler @Since("2.0.0") (override val uid: String) override def copy(extra: ParamMap): MaxAbsScaler = defaultCopy(extra) } -@Since("1.6.0") +@Since("2.0.0") object MaxAbsScaler extends DefaultParamsReadable[MaxAbsScaler] { @Since("1.6.0") @@ -101,17 +104,20 @@ object MaxAbsScaler extends DefaultParamsReadable[MaxAbsScaler] { * */ @Experimental +@Since("2.0.0") class MaxAbsScalerModel private[ml] ( - override val uid: String, - val maxAbs: Vector) + @Since("2.0.0") override val uid: String, + @Since("2.0.0") val maxAbs: Vector) extends Model[MaxAbsScalerModel] with MaxAbsScalerParams with MLWritable { import MaxAbsScalerModel._ /** @group setParam */ + @Since("2.0.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("2.0.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") @@ -139,7 +145,7 @@ class MaxAbsScalerModel private[ml] ( override def write: MLWriter = new MaxAbsScalerModelWriter(this) } -@Since("1.6.0") +@Since("2.0.0") object MaxAbsScalerModel extends MLReadable[MaxAbsScalerModel] { private[MaxAbsScalerModel] diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala index a27bed5333c56..0260c9d6f13aa 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala @@ -85,23 +85,29 @@ private[feature] trait MinMaxScalerParams extends Params with HasInputCol with H * transformer will be DenseVector even for sparse input. */ @Experimental -class MinMaxScaler(override val uid: String) +@Since("1.5.0") +class MinMaxScaler @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Estimator[MinMaxScalerModel] with MinMaxScalerParams with DefaultParamsWritable { + @Since("1.5.0") def this() = this(Identifiable.randomUID("minMaxScal")) setDefault(min -> 0.0, max -> 1.0) /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.5.0") def setMin(value: Double): this.type = set(min, value) /** @group setParam */ + @Since("1.5.0") def setMax(value: Double): this.type = set(max, value) @Since("2.0.0") @@ -138,24 +144,29 @@ object MinMaxScaler extends DefaultParamsReadable[MinMaxScaler] { * TODO: The transformer does not yet set the metadata in the output column (SPARK-8529). */ @Experimental +@Since("1.5.0") class MinMaxScalerModel private[ml] ( - override val uid: String, - val originalMin: Vector, - val originalMax: Vector) + @Since("1.5.0") override val uid: String, + @Since("1.5.0") val originalMin: Vector, + @Since("1.5.0") val originalMax: Vector) extends Model[MinMaxScalerModel] with MinMaxScalerParams with MLWritable { import MinMaxScalerModel._ /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.5.0") def setMin(value: Double): this.type = set(min, value) /** @group setParam */ + @Since("1.5.0") def setMax(value: Double): this.type = set(max, value) @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala index f8bc7e3f0c031..0b8d8c6146528 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala @@ -35,9 +35,11 @@ import org.apache.spark.sql.types.{ArrayType, DataType, StringType} * returned. */ @Experimental -class NGram(override val uid: String) +@Since("1.5.0") +class NGram @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends UnaryTransformer[Seq[String], Seq[String], NGram] with DefaultParamsWritable { + @Since("1.5.0") def this() = this(Identifiable.randomUID("ngram")) /** @@ -49,9 +51,11 @@ class NGram(override val uid: String) ParamValidators.gtEq(1)) /** @group setParam */ + @Since("1.5.0") def setN(value: Int): this.type = set(n, value) /** @group getParam */ + @Since("1.5.0") def getN: Int = $(n) setDefault(n -> 2) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala index 942ac7ebdb3bd..2f650e5c70a7c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala @@ -31,9 +31,11 @@ import org.apache.spark.sql.types.DataType * Normalize a vector to have unit norm using the given p-norm. */ @Experimental -class Normalizer(override val uid: String) +@Since("1.4.0") +class Normalizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[Vector, Vector, Normalizer] with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("normalizer")) /** @@ -46,9 +48,11 @@ class Normalizer(override val uid: String) setDefault(p -> 2.0) /** @group getParam */ + @Since("1.4.0") def getP: Double = $(p) /** @group setParam */ + @Since("1.4.0") def setP(value: Double): this.type = set(p, value) override protected def createTransformFunc: Vector => Vector = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala index 3d1e6dd818829..4f2e826505be4 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala @@ -43,9 +43,11 @@ import org.apache.spark.sql.types.{DoubleType, NumericType, StructType} * @see [[StringIndexer]] for converting categorical values into category indices */ @Experimental -class OneHotEncoder(override val uid: String) extends Transformer +@Since("1.4.0") +class OneHotEncoder @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("oneHot")) /** @@ -57,12 +59,15 @@ class OneHotEncoder(override val uid: String) extends Transformer setDefault(dropLast -> true) /** @group setParam */ + @Since("1.4.0") def setDropLast(value: Boolean): this.type = set(dropLast, value) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) override def transformSchema(schema: StructType): StructType = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala index 2f667af9d10bd..479f49b0abaf4 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala @@ -65,18 +65,24 @@ private[feature] trait PCAParams extends Params with HasInputCol with HasOutputC * principal components. */ @Experimental -class PCA (override val uid: String) extends Estimator[PCAModel] with PCAParams - with DefaultParamsWritable { +@Since("1.5.0") +class PCA @Since("1.5.0") ( + @Since("1.5.0") override val uid: String) + extends Estimator[PCAModel] with PCAParams with DefaultParamsWritable { + @Since("1.5.0") def this() = this(Identifiable.randomUID("pca")) /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.5.0") def setK(value: Int): this.type = set(k, value) /** @@ -116,18 +122,21 @@ object PCA extends DefaultParamsReadable[PCA] { * each principal component. */ @Experimental +@Since("1.5.0") class PCAModel private[ml] ( - override val uid: String, - val pc: DenseMatrix, - val explainedVariance: DenseVector) + @Since("1.5.0") override val uid: String, + @Since("1.6.0") val pc: DenseMatrix, + @Since("2.0.0") val explainedVariance: DenseVector) extends Model[PCAModel] with PCAParams with MLWritable { import PCAModel._ /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala index a01867701bd8b..284dfb36e050a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala @@ -35,9 +35,11 @@ import org.apache.spark.sql.types.DataType * `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`. */ @Experimental -class PolynomialExpansion(override val uid: String) +@Since("1.4.0") +class PolynomialExpansion @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[Vector, Vector, PolynomialExpansion] with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("poly")) /** @@ -51,9 +53,11 @@ class PolynomialExpansion(override val uid: String) setDefault(degree -> 2) /** @group getParam */ + @Since("1.4.0") def getDegree: Int = $(degree) /** @group setParam */ + @Since("1.4.0") def setDegree(value: Int): this.type = set(degree, value) override protected def createTransformFunc: Vector => Vector = { v => diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala index 1fefaa1fdd8df..50ac9a7e76128 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala @@ -74,21 +74,27 @@ private[feature] trait QuantileDiscretizerBase extends Params * covering all real values. */ @Experimental -final class QuantileDiscretizer(override val uid: String) +@Since("1.6.0") +final class QuantileDiscretizer @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Estimator[Bucketizer] with QuantileDiscretizerBase with DefaultParamsWritable { + @Since("1.6.0") def this() = this(Identifiable.randomUID("quantileDiscretizer")) /** @group setParam */ + @Since("2.0.0") def setRelativeError(value: Double): this.type = set(relativeError, value) /** @group setParam */ + @Since("1.6.0") def setNumBuckets(value: Int): this.type = set(numBuckets, value) /** @group setParam */ + @Since("1.6.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.6.0") def setOutputCol(value: String): this.type = set(outputCol, value) override def transformSchema(schema: StructType): StructType = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala index a7ca0fe252b0d..085a83f103869 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala @@ -70,9 +70,11 @@ private[feature] trait RFormulaBase extends HasFeaturesCol with HasLabelCol { * will be created from the specified response variable in the formula. */ @Experimental -class RFormula(override val uid: String) +@Since("1.5.0") +class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Estimator[RFormulaModel] with RFormulaBase with DefaultParamsWritable { + @Since("1.5.0") def this() = this(Identifiable.randomUID("rFormula")) /** @@ -86,15 +88,19 @@ class RFormula(override val uid: String) * @group setParam * @param value an R formula in string form (e.g. "y ~ x + z") */ + @Since("1.5.0") def setFormula(value: String): this.type = set(formula, value) /** @group getParam */ + @Since("1.5.0") def getFormula: String = $(formula) /** @group setParam */ + @Since("1.5.0") def setFeaturesCol(value: String): this.type = set(featuresCol, value) /** @group setParam */ + @Since("1.5.0") def setLabelCol(value: String): this.type = set(labelCol, value) /** Whether the formula specifies fitting an intercept. */ @@ -201,8 +207,9 @@ object RFormula extends DefaultParamsReadable[RFormula] { * @param pipelineModel the fitted feature model, including factor to index mappings. */ @Experimental +@Since("1.5.0") class RFormulaModel private[feature]( - override val uid: String, + @Since("1.5.0") override val uid: String, private[ml] val resolvedFormula: ResolvedRFormula, private[ml] val pipelineModel: PipelineModel) extends Model[RFormulaModel] with RFormulaBase with MLWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala index bd8f9494fb193..b8715746fee5b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala @@ -39,7 +39,7 @@ import org.apache.spark.sql.types.StructType */ @Experimental @Since("1.6.0") -class SQLTransformer @Since("1.6.0") (override val uid: String) extends Transformer +class SQLTransformer @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Transformer with DefaultParamsWritable { @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala index 7cec369c23a8f..aab24ae692831 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala @@ -85,21 +85,28 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with * which is computed as the square root of the unbiased sample variance. */ @Experimental -class StandardScaler(override val uid: String) extends Estimator[StandardScalerModel] - with StandardScalerParams with DefaultParamsWritable { +@Since("1.2.0") +class StandardScaler @Since("1.2.0") ( + @Since("1.4.0") override val uid: String) + extends Estimator[StandardScalerModel] with StandardScalerParams with DefaultParamsWritable { + @Since("1.2.0") def this() = this(Identifiable.randomUID("stdScal")) /** @group setParam */ + @Since("1.2.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.2.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.4.0") def setWithMean(value: Boolean): this.type = set(withMean, value) /** @group setParam */ + @Since("1.4.0") def setWithStd(value: Boolean): this.type = set(withStd, value) @Since("2.0.0") @@ -135,18 +142,21 @@ object StandardScaler extends DefaultParamsReadable[StandardScaler] { * @param mean Mean of the StandardScalerModel */ @Experimental +@Since("1.2.0") class StandardScalerModel private[ml] ( - override val uid: String, - val std: Vector, - val mean: Vector) + @Since("1.4.0") override val uid: String, + @Since("1.5.0") val std: Vector, + @Since("1.5.0") val mean: Vector) extends Model[StandardScalerModel] with StandardScalerParams with MLWritable { import StandardScalerModel._ /** @group setParam */ + @Since("1.2.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.2.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala index 11864cb8f439a..1f111ccab8fc5 100755 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala @@ -33,15 +33,19 @@ import org.apache.spark.sql.types.{ArrayType, StringType, StructType} * @see [[http://en.wikipedia.org/wiki/Stop_words]] */ @Experimental -class StopWordsRemover(override val uid: String) +@Since("1.5.0") +class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable { + @Since("1.5.0") def this() = this(Identifiable.randomUID("stopWords")) /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @@ -54,9 +58,11 @@ class StopWordsRemover(override val uid: String) new StringArrayParam(this, "stopWords", "the words to be filtered out") /** @group setParam */ + @Since("1.5.0") def setStopWords(value: Array[String]): this.type = set(stopWords, value) /** @group getParam */ + @Since("1.5.0") def getStopWords: Array[String] = $(stopWords) /** @@ -68,9 +74,11 @@ class StopWordsRemover(override val uid: String) "whether to do a case-sensitive comparison over the stop words") /** @group setParam */ + @Since("1.5.0") def setCaseSensitive(value: Boolean): this.type = set(caseSensitive, value) /** @group getParam */ + @Since("1.5.0") def getCaseSensitive: Boolean = $(caseSensitive) setDefault(stopWords -> StopWordsRemover.loadDefaultStopWords("english"), caseSensitive -> false) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala index cc0571fd7e39b..c7931c50f2cdf 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala @@ -64,22 +64,27 @@ private[feature] trait StringIndexerBase extends Params with HasInputCol with Ha * @see [[IndexToString]] for the inverse transformation */ @Experimental -class StringIndexer(override val uid: String) extends Estimator[StringIndexerModel] +@Since("1.4.0") +class StringIndexer @Since("1.4.0") ( + @Since("1.4.0") override val uid: String) extends Estimator[StringIndexerModel] with StringIndexerBase with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("strIdx")) /** @group setParam */ + @Since("1.6.0") def setHandleInvalid(value: String): this.type = set(handleInvalid, value) setDefault(handleInvalid, "error") /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) - @Since("2.0.0") override def fit(dataset: Dataset[_]): StringIndexerModel = { val counts = dataset.select(col($(inputCol)).cast(StringType)) @@ -115,13 +120,15 @@ object StringIndexer extends DefaultParamsReadable[StringIndexer] { * @param labels Ordered list of labels, corresponding to indices to be assigned. */ @Experimental +@Since("1.4.0") class StringIndexerModel ( - override val uid: String, - val labels: Array[String]) + @Since("1.4.0") override val uid: String, + @Since("1.5.0") val labels: Array[String]) extends Model[StringIndexerModel] with StringIndexerBase with MLWritable { import StringIndexerModel._ + @Since("1.5.0") def this(labels: Array[String]) = this(Identifiable.randomUID("strIdx"), labels) private val labelToIndex: OpenHashMap[String, Double] = { @@ -136,13 +143,16 @@ class StringIndexerModel ( } /** @group setParam */ + @Since("1.6.0") def setHandleInvalid(value: String): this.type = set(handleInvalid, value) setDefault(handleInvalid, "error") /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") @@ -245,19 +255,24 @@ object StringIndexerModel extends MLReadable[StringIndexerModel] { * @see [[StringIndexer]] for converting strings into indices */ @Experimental -class IndexToString private[ml] (override val uid: String) +@Since("1.5.0") +class IndexToString private[ml] (@Since("1.5.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable { + @Since("1.5.0") def this() = this(Identifiable.randomUID("idxToStr")) /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.5.0") def setLabels(value: Array[String]): this.type = set(labels, value) /** @@ -271,6 +286,7 @@ class IndexToString private[ml] (override val uid: String) " If not provided or if empty, then metadata from inputCol is used instead.") /** @group getParam */ + @Since("1.5.0") final def getLabels: Array[String] = $(labels) override def transformSchema(schema: StructType): StructType = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala index 8456a0e915804..64ac592397cea 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala @@ -30,9 +30,11 @@ import org.apache.spark.sql.types.{ArrayType, DataType, StringType} * @see [[RegexTokenizer]] */ @Experimental -class Tokenizer(override val uid: String) +@Since("1.2.0") +class Tokenizer @Since("1.2.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[String, Seq[String], Tokenizer] with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("tok")) override protected def createTransformFunc: String => Seq[String] = { @@ -63,9 +65,11 @@ object Tokenizer extends DefaultParamsReadable[Tokenizer] { * It returns an array of strings that can be empty. */ @Experimental -class RegexTokenizer(override val uid: String) +@Since("1.4.0") +class RegexTokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[String, Seq[String], RegexTokenizer] with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("regexTok")) /** @@ -77,9 +81,11 @@ class RegexTokenizer(override val uid: String) ParamValidators.gtEq(0)) /** @group setParam */ + @Since("1.4.0") def setMinTokenLength(value: Int): this.type = set(minTokenLength, value) /** @group getParam */ + @Since("1.4.0") def getMinTokenLength: Int = $(minTokenLength) /** @@ -90,9 +96,11 @@ class RegexTokenizer(override val uid: String) val gaps: BooleanParam = new BooleanParam(this, "gaps", "Set regex to match gaps or tokens") /** @group setParam */ + @Since("1.4.0") def setGaps(value: Boolean): this.type = set(gaps, value) /** @group getParam */ + @Since("1.4.0") def getGaps: Boolean = $(gaps) /** @@ -103,9 +111,11 @@ class RegexTokenizer(override val uid: String) val pattern: Param[String] = new Param(this, "pattern", "regex pattern used for tokenizing") /** @group setParam */ + @Since("1.4.0") def setPattern(value: String): this.type = set(pattern, value) /** @group getParam */ + @Since("1.4.0") def getPattern: String = $(pattern) /** @@ -117,9 +127,11 @@ class RegexTokenizer(override val uid: String) "whether to convert all characters to lowercase before tokenizing.") /** @group setParam */ + @Since("1.6.0") def setToLowercase(value: Boolean): this.type = set(toLowercase, value) /** @group getParam */ + @Since("1.6.0") def getToLowercase: Boolean = $(toLowercase) setDefault(minTokenLength -> 1, gaps -> true, pattern -> "\\s+", toLowercase -> true) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala index 1bc24202b7615..f5dd26f712a03 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala @@ -36,15 +36,19 @@ import org.apache.spark.sql.types._ * A feature transformer that merges multiple columns into a vector column. */ @Experimental -class VectorAssembler(override val uid: String) +@Since("1.4.0") +class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer with HasInputCols with HasOutputCol with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("vecAssembler")) /** @group setParam */ + @Since("1.4.0") def setInputCols(value: Array[String]): this.type = set(inputCols, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala index d814528ec48d1..aae71d759eca6 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala @@ -94,18 +94,24 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu * - Add option for allowing unknown categories. */ @Experimental -class VectorIndexer(override val uid: String) extends Estimator[VectorIndexerModel] - with VectorIndexerParams with DefaultParamsWritable { +@Since("1.4.0") +class VectorIndexer @Since("1.4.0") ( + @Since("1.4.0") override val uid: String) + extends Estimator[VectorIndexerModel] with VectorIndexerParams with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("vecIdx")) /** @group setParam */ + @Since("1.4.0") def setMaxCategories(value: Int): this.type = set(maxCategories, value) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") @@ -256,15 +262,17 @@ object VectorIndexer extends DefaultParamsReadable[VectorIndexer] { * If a feature is not in this map, it is treated as continuous. */ @Experimental +@Since("1.4.0") class VectorIndexerModel private[ml] ( - override val uid: String, - val numFeatures: Int, - val categoryMaps: Map[Int, Map[Double, Int]]) + @Since("1.4.0") override val uid: String, + @Since("1.4.0") val numFeatures: Int, + @Since("1.4.0") val categoryMaps: Map[Int, Map[Double, Int]]) extends Model[VectorIndexerModel] with VectorIndexerParams with MLWritable { import VectorIndexerModel._ /** Java-friendly version of [[categoryMaps]] */ + @Since("1.4.0") def javaCategoryMaps: JMap[JInt, JMap[JDouble, JInt]] = { categoryMaps.mapValues(_.asJava).asJava.asInstanceOf[JMap[JInt, JMap[JDouble, JInt]]] } @@ -342,9 +350,11 @@ class VectorIndexerModel private[ml] ( } /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala index 103738cd91c09..78902e72893fc 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala @@ -41,9 +41,11 @@ import org.apache.spark.sql.types.StructType * followed by the selected names (in the order given). */ @Experimental -final class VectorSlicer(override val uid: String) +@Since("1.5.0") +final class VectorSlicer @Since("1.5.0") (@Since("1.5.0") override val uid: String) extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable { + @Since("1.5.0") def this() = this(Identifiable.randomUID("vectorSlicer")) /** @@ -59,9 +61,11 @@ final class VectorSlicer(override val uid: String) setDefault(indices -> Array.empty[Int]) /** @group getParam */ + @Since("1.5.0") def getIndices: Array[Int] = $(indices) /** @group setParam */ + @Since("1.5.0") def setIndices(value: Array[Int]): this.type = set(indices, value) /** @@ -78,15 +82,19 @@ final class VectorSlicer(override val uid: String) setDefault(names -> Array.empty[String]) /** @group getParam */ + @Since("1.5.0") def getNames: Array[String] = $(names) /** @group setParam */ + @Since("1.5.0") def setNames(value: Array[String]): this.type = set(names, value) /** @group setParam */ + @Since("1.5.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.5.0") def setOutputCol(value: String): this.type = set(outputCol, value) @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala index 33515b22400be..803ff4202b0a5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala @@ -120,39 +120,52 @@ private[feature] trait Word2VecBase extends Params * natural language processing or machine learning process. */ @Experimental -final class Word2Vec(override val uid: String) extends Estimator[Word2VecModel] with Word2VecBase - with DefaultParamsWritable { +@Since("1.4.0") +final class Word2Vec @Since("1.4.0") ( + @Since("1.4.0") override val uid: String) + extends Estimator[Word2VecModel] with Word2VecBase with DefaultParamsWritable { + @Since("1.4.0") def this() = this(Identifiable.randomUID("w2v")) /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** @group setParam */ + @Since("1.4.0") def setVectorSize(value: Int): this.type = set(vectorSize, value) /** @group expertSetParam */ + @Since("1.6.0") def setWindowSize(value: Int): this.type = set(windowSize, value) /** @group setParam */ + @Since("1.4.0") def setStepSize(value: Double): this.type = set(stepSize, value) /** @group setParam */ + @Since("1.4.0") def setNumPartitions(value: Int): this.type = set(numPartitions, value) /** @group setParam */ + @Since("1.4.0") def setMaxIter(value: Int): this.type = set(maxIter, value) /** @group setParam */ + @Since("1.4.0") def setSeed(value: Long): this.type = set(seed, value) /** @group setParam */ + @Since("1.4.0") def setMinCount(value: Int): this.type = set(minCount, value) /** @group setParam */ + @Since("2.0.0") def setMaxSentenceLength(value: Int): this.type = set(maxSentenceLength, value) @Since("2.0.0") @@ -191,8 +204,9 @@ object Word2Vec extends DefaultParamsReadable[Word2Vec] { * Model fitted by [[Word2Vec]]. */ @Experimental +@Since("1.4.0") class Word2VecModel private[ml] ( - override val uid: String, + @Since("1.4.0") override val uid: String, @transient private val wordVectors: feature.Word2VecModel) extends Model[Word2VecModel] with Word2VecBase with MLWritable { @@ -202,6 +216,7 @@ class Word2VecModel private[ml] ( * Returns a dataframe with two fields, "word" and "vector", with "word" being a String and * and the vector the DenseVector that it is mapped to. */ + @Since("1.5.0") @transient lazy val getVectors: DataFrame = { val spark = SparkSession.builder().getOrCreate() val wordVec = wordVectors.getVectors.mapValues(vec => Vectors.dense(vec.map(_.toDouble))) @@ -213,6 +228,7 @@ class Word2VecModel private[ml] ( * Returns a dataframe with the words and the cosine similarities between the * synonyms and the given word. */ + @Since("1.5.0") def findSynonyms(word: String, num: Int): DataFrame = { findSynonyms(wordVectors.transform(word), num) } @@ -222,15 +238,18 @@ class Word2VecModel private[ml] ( * of the word. Returns a dataframe with the words and the cosine similarities between the * synonyms and the given word vector. */ + @Since("1.5.0") def findSynonyms(word: Vector, num: Int): DataFrame = { val spark = SparkSession.builder().getOrCreate() spark.createDataFrame(wordVectors.findSynonyms(word, num)).toDF("word", "similarity") } /** @group setParam */ + @Since("1.4.0") def setInputCol(value: String): this.type = set(inputCol, value) /** @group setParam */ + @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) /** diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index a28764a7528dc..1b9b2b9465a0f 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -149,7 +149,7 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav >>> loadedBucketizer.getSplits() == bucketizer.getSplits() True - .. versionadded:: 1.3.0 + .. versionadded:: 1.4.0 """ splits = \ From 86766a9ed79f6d7df59dad53c1f22f568a77d00a Mon Sep 17 00:00:00 2001 From: Nick Pentreath Date: Mon, 13 Jun 2016 13:53:00 +0200 Subject: [PATCH 2/6] Fix 'uid' contstructor annotations --- .../scala/org/apache/spark/ml/feature/StandardScaler.scala | 2 +- .../main/scala/org/apache/spark/ml/feature/Tokenizer.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala index aab24ae692831..6e175ae4528fb 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala @@ -86,7 +86,7 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with */ @Experimental @Since("1.2.0") -class StandardScaler @Since("1.2.0") ( +class StandardScaler @Since("1.4.0") ( @Since("1.4.0") override val uid: String) extends Estimator[StandardScalerModel] with StandardScalerParams with DefaultParamsWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala index 64ac592397cea..ff98341a6f683 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala @@ -31,10 +31,10 @@ import org.apache.spark.sql.types.{ArrayType, DataType, StringType} */ @Experimental @Since("1.2.0") -class Tokenizer @Since("1.2.0") (@Since("1.4.0") override val uid: String) +class Tokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[String, Seq[String], Tokenizer] with DefaultParamsWritable { - @Since("1.4.0") + @Since("1.2.0") def this() = this(Identifiable.randomUID("tok")) override protected def createTransformFunc: String => Seq[String] = { From 86ab82b97d386bf8e1356640e2fb3e1a80d68014 Mon Sep 17 00:00:00 2001 From: Nick Pentreath Date: Fri, 17 Jun 2016 10:14:11 +0200 Subject: [PATCH 3/6] Add annotations for param vals --- .../main/scala/org/apache/spark/ml/feature/Binarizer.scala | 1 + .../main/scala/org/apache/spark/ml/feature/Bucketizer.scala | 1 + mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala | 1 + .../org/apache/spark/ml/feature/ElementwiseProduct.scala | 1 + .../main/scala/org/apache/spark/ml/feature/HashingTF.scala | 2 ++ mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala | 1 + .../main/scala/org/apache/spark/ml/feature/Normalizer.scala | 1 + .../scala/org/apache/spark/ml/feature/OneHotEncoder.scala | 1 + .../org/apache/spark/ml/feature/PolynomialExpansion.scala | 1 + .../src/main/scala/org/apache/spark/ml/feature/RFormula.scala | 1 + .../scala/org/apache/spark/ml/feature/StopWordsRemover.scala | 2 ++ .../scala/org/apache/spark/ml/feature/StringIndexer.scala | 1 + .../main/scala/org/apache/spark/ml/feature/Tokenizer.scala | 4 ++++ .../main/scala/org/apache/spark/ml/feature/VectorSlicer.scala | 2 ++ 14 files changed, 20 insertions(+) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala index a9156a44a99ce..9867c6f355a0a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala @@ -49,6 +49,7 @@ final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) * Default: 0.0 * @group param */ + @Since("1.4.0") val threshold: DoubleParam = new DoubleParam(this, "threshold", "threshold used to binarize continuous features") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala index 2310659c89276..599bc69c64bcf 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala @@ -50,6 +50,7 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String * otherwise, values outside the splits specified will be treated as errors. * @group param */ + @Since("1.4.0") val splits: DoubleArrayParam = new DoubleArrayParam(this, "splits", "Split points for mapping continuous features into buckets. With n+1 splits, there are n " + "buckets. A bucket defined by splits x,y holds values in the range [x,y) except the last " + diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala index 46a3de0926eea..9605145e12c27 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala @@ -48,6 +48,7 @@ class DCT @Since("1.5.0") (@Since("1.5.0") override val uid: String) * Default: false * @group param */ + @Since("1.5.0") def inverse: BooleanParam = new BooleanParam( this, "inverse", "Set transformer to perform inverse DCT") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala index abef6c0990ea9..c018679a112c9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala @@ -44,6 +44,7 @@ class ElementwiseProduct @Since("1.4.0") (@Since("1.4.0") override val uid: Stri * the vector to multiply with input vectors * @group param */ + @Since("1.4.0") val scalingVec: Param[Vector] = new Param(this, "scalingVec", "vector for hadamard product") /** @group setParam */ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala index 269510b5c6cb4..cb9ec3cc87d8e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala @@ -58,6 +58,7 @@ class HashingTF @Since("1.4.0") (@Since("1.4.0") override val uid: String) * (default = 2^18^) * @group param */ + @Since("1.2.0") val numFeatures = new IntParam(this, "numFeatures", "number of features (> 0)", ParamValidators.gt(0)) @@ -68,6 +69,7 @@ class HashingTF @Since("1.4.0") (@Since("1.4.0") override val uid: String) * (default = false) * @group param */ + @Since("2.0.0") val binary = new BooleanParam(this, "binary", "If true, all non zero counts are set to 1. " + "This is useful for discrete probabilistic models that model binary events rather " + "than integer counts") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala index 0b8d8c6146528..9c1f1ad443bba 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala @@ -47,6 +47,7 @@ class NGram @Since("1.5.0") (@Since("1.5.0") override val uid: String) * Default: 2, bigram features * @group param */ + @Since("1.5.0") val n: IntParam = new IntParam(this, "n", "number elements per n-gram (>=1)", ParamValidators.gtEq(1)) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala index 2f650e5c70a7c..f9cbad90c9f3f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala @@ -43,6 +43,7 @@ class Normalizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) * (default: p = 2) * @group param */ + @Since("1.4.0") val p = new DoubleParam(this, "p", "the p norm value", ParamValidators.gtEq(1)) setDefault(p -> 2.0) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala index 4f2e826505be4..63ed92e04bec7 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala @@ -54,6 +54,7 @@ class OneHotEncoder @Since("1.4.0") (@Since("1.4.0") override val uid: String) e * Whether to drop the last category in the encoded vector (default: true) * @group param */ + @Since("1.4.0") final val dropLast: BooleanParam = new BooleanParam(this, "dropLast", "whether to drop the last category") setDefault(dropLast -> true) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala index 284dfb36e050a..0185cdda40f88 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala @@ -47,6 +47,7 @@ class PolynomialExpansion @Since("1.4.0") (@Since("1.4.0") override val uid: Str * Default: 2 * @group param */ + @Since("1.4.0") val degree = new IntParam(this, "degree", "the polynomial degree to expand (>= 1)", ParamValidators.gtEq(1)) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala index 085a83f103869..8df5567bf77b4 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala @@ -81,6 +81,7 @@ class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String) * R formula parameter. The formula is provided in string form. * @group param */ + @Since("1.5.0") val formula: Param[String] = new Param(this, "formula", "R model formula") /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala index 1f111ccab8fc5..b69383ef57643 100755 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala @@ -54,6 +54,7 @@ class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String * @see [[StopWordsRemover.loadDefaultStopWords()]] * @group param */ + @Since("1.5.0") val stopWords: StringArrayParam = new StringArrayParam(this, "stopWords", "the words to be filtered out") @@ -70,6 +71,7 @@ class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String * Default: false * @group param */ + @Since("1.5.0") val caseSensitive: BooleanParam = new BooleanParam(this, "caseSensitive", "whether to do a case-sensitive comparison over the stop words") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala index c7931c50f2cdf..79de67a4a2e3c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala @@ -281,6 +281,7 @@ class IndexToString private[ml] (@Since("1.5.0") override val uid: String) * Default: Not specified, in which case [[inputCol]] metadata is used for labels. * @group param */ + @Since("1.5.0") final val labels: StringArrayParam = new StringArrayParam(this, "labels", "Optional array of labels specifying index-string mapping." + " If not provided or if empty, then metadata from inputCol is used instead.") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala index ff98341a6f683..7b4305a222ce1 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala @@ -77,6 +77,7 @@ class RegexTokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) * Default: 1, to avoid returning empty strings * @group param */ + @Since("1.4.0") val minTokenLength: IntParam = new IntParam(this, "minTokenLength", "minimum token length (>= 0)", ParamValidators.gtEq(0)) @@ -93,6 +94,7 @@ class RegexTokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) * Default: true * @group param */ + @Since("1.4.0") val gaps: BooleanParam = new BooleanParam(this, "gaps", "Set regex to match gaps or tokens") /** @group setParam */ @@ -108,6 +110,7 @@ class RegexTokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) * Default: `"\\s+"` * @group param */ + @Since("1.4.0") val pattern: Param[String] = new Param(this, "pattern", "regex pattern used for tokenizing") /** @group setParam */ @@ -123,6 +126,7 @@ class RegexTokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) * Default: true * @group param */ + @Since("1.6.0") final val toLowercase: BooleanParam = new BooleanParam(this, "toLowercase", "whether to convert all characters to lowercase before tokenizing.") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala index 78902e72893fc..862c7f0d8f2f9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala @@ -54,6 +54,7 @@ final class VectorSlicer @Since("1.5.0") (@Since("1.5.0") override val uid: Stri * Default: Empty array * @group param */ + @Since("1.5.0") val indices = new IntArrayParam(this, "indices", "An array of indices to select features from a vector column." + " There can be no overlap with names.", VectorSlicer.validIndices) @@ -75,6 +76,7 @@ final class VectorSlicer @Since("1.5.0") (@Since("1.5.0") override val uid: Stri * Default: Empty Array * @group param */ + @Since("1.5.0") val names = new StringArrayParam(this, "names", "An array of feature names to select features from a vector column." + " There can be no overlap with indices.", VectorSlicer.validNames) From 2fc676aafd8d5ceaa37cfae01cbd42ee41836fd2 Mon Sep 17 00:00:00 2001 From: Nick Pentreath Date: Fri, 17 Jun 2016 11:13:20 +0200 Subject: [PATCH 4/6] Fix annotations for save/load for max abs scaler --- .../scala/org/apache/spark/ml/feature/MaxAbsScaler.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala index 006d6413cbeb6..261d45d7a82a8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala @@ -94,7 +94,7 @@ class MaxAbsScaler @Since("2.0.0") (@Since("2.0.0") override val uid: String) @Since("2.0.0") object MaxAbsScaler extends DefaultParamsReadable[MaxAbsScaler] { - @Since("1.6.0") + @Since("2.0.0") override def load(path: String): MaxAbsScaler = super.load(path) } @@ -177,9 +177,9 @@ object MaxAbsScalerModel extends MLReadable[MaxAbsScalerModel] { } } - @Since("1.6.0") + @Since("2.0.0") override def read: MLReader[MaxAbsScalerModel] = new MaxAbsScalerModelReader - @Since("1.6.0") + @Since("2.0.0") override def load(path: String): MaxAbsScalerModel = super.load(path) } From 8c24513b4e3179fa9773573e7513c6112faea4f9 Mon Sep 17 00:00:00 2001 From: Nick Pentreath Date: Fri, 17 Jun 2016 11:45:20 +0200 Subject: [PATCH 5/6] Add annotations for 'transformSchema' and 'copy' --- .../main/scala/org/apache/spark/ml/feature/Binarizer.scala | 2 ++ .../main/scala/org/apache/spark/ml/feature/Bucketizer.scala | 2 ++ .../scala/org/apache/spark/ml/feature/ChiSqSelector.scala | 4 ++++ .../scala/org/apache/spark/ml/feature/CountVectorizer.scala | 4 ++++ .../main/scala/org/apache/spark/ml/feature/HashingTF.scala | 2 ++ mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala | 4 ++++ .../scala/org/apache/spark/ml/feature/MaxAbsScaler.scala | 4 ++++ .../scala/org/apache/spark/ml/feature/MinMaxScaler.scala | 4 ++++ .../scala/org/apache/spark/ml/feature/OneHotEncoder.scala | 2 ++ mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala | 4 ++++ .../org/apache/spark/ml/feature/PolynomialExpansion.scala | 1 + .../org/apache/spark/ml/feature/QuantileDiscretizer.scala | 2 ++ .../main/scala/org/apache/spark/ml/feature/RFormula.scala | 6 ++++++ .../scala/org/apache/spark/ml/feature/StandardScaler.scala | 4 ++++ .../org/apache/spark/ml/feature/StopWordsRemover.scala | 2 ++ .../scala/org/apache/spark/ml/feature/StringIndexer.scala | 6 ++++++ .../main/scala/org/apache/spark/ml/feature/Tokenizer.scala | 2 ++ .../scala/org/apache/spark/ml/feature/VectorAssembler.scala | 2 ++ .../scala/org/apache/spark/ml/feature/VectorIndexer.scala | 4 ++++ .../scala/org/apache/spark/ml/feature/VectorSlicer.scala | 2 ++ .../main/scala/org/apache/spark/ml/feature/Word2Vec.scala | 4 ++++ 21 files changed, 67 insertions(+) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala index 9867c6f355a0a..fa9634fdfa7e9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala @@ -103,6 +103,7 @@ final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) } } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { val inputType = schema($(inputCol)).dataType val outputColName = $(outputCol) @@ -122,6 +123,7 @@ final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) StructType(schema.fields :+ outCol) } + @Since("1.4.1") override def copy(extra: ParamMap): Binarizer = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala index 599bc69c64bcf..caffc39e2be14 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala @@ -93,11 +93,13 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String attr.toStructField() } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { SchemaUtils.checkColumnType(schema, $(inputCol), DoubleType) SchemaUtils.appendColumn(schema, prepOutputField(schema)) } + @Since("1.4.1") override def copy(extra: ParamMap): Bucketizer = { defaultCopy[Bucketizer](extra).setParent(parent) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala index c7c6926e95e24..1c329267d70d8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala @@ -97,12 +97,14 @@ final class ChiSqSelector @Since("1.6.0") (@Since("1.6.0") override val uid: Str copyValues(new ChiSqSelectorModel(uid, chiSqSelector).setParent(this)) } + @Since("1.6.0") override def transformSchema(schema: StructType): StructType = { SchemaUtils.checkColumnType(schema, $(featuresCol), new VectorUDT) SchemaUtils.checkNumericType(schema, $(labelCol)) SchemaUtils.appendColumn(schema, $(outputCol), new VectorUDT) } + @Since("1.6.0") override def copy(extra: ParamMap): ChiSqSelector = defaultCopy(extra) } @@ -154,6 +156,7 @@ final class ChiSqSelectorModel private[ml] ( dataset.withColumn($(outputCol), selector(col($(featuresCol))), newField.metadata) } + @Since("1.6.0") override def transformSchema(schema: StructType): StructType = { SchemaUtils.checkColumnType(schema, $(featuresCol), new VectorUDT) val newField = prepOutputField(schema) @@ -176,6 +179,7 @@ final class ChiSqSelectorModel private[ml] ( newAttributeGroup.toStructField() } + @Since("1.6.0") override def copy(extra: ParamMap): ChiSqSelectorModel = { val copied = new ChiSqSelectorModel(uid, chiSqSelector) copyValues(copied, extra).setParent(parent) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala index 1c217512be99a..3250fe55980d0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala @@ -184,10 +184,12 @@ class CountVectorizer @Since("1.5.0") (@Since("1.5.0") override val uid: String) copyValues(new CountVectorizerModel(uid, vocab).setParent(this)) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.5.0") override def copy(extra: ParamMap): CountVectorizer = defaultCopy(extra) } @@ -268,10 +270,12 @@ class CountVectorizerModel( dataset.withColumn($(outputCol), vectorizer(col($(inputCol)))) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.5.0") override def copy(extra: ParamMap): CountVectorizerModel = { val copied = new CountVectorizerModel(uid, vocabulary).setParent(parent) copyValues(copied, extra) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala index cb9ec3cc87d8e..6ca7336cd048e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala @@ -102,6 +102,7 @@ class HashingTF @Since("1.4.0") (@Since("1.4.0") override val uid: String) dataset.select(col("*"), t(col($(inputCol))).as($(outputCol), metadata)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { val inputType = schema($(inputCol)).dataType require(inputType.isInstanceOf[ArrayType], @@ -110,6 +111,7 @@ class HashingTF @Since("1.4.0") (@Since("1.4.0") override val uid: String) SchemaUtils.appendColumn(schema, attrGroup.toStructField()) } + @Since("1.4.1") override def copy(extra: ParamMap): HashingTF = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala index 38d0da1c1d7fa..680f55be26a8d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala @@ -93,10 +93,12 @@ final class IDF @Since("1.4.0") (@Since("1.4.0") override val uid: String) copyValues(new IDFModel(uid, idf).setParent(this)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.4.1") override def copy(extra: ParamMap): IDF = defaultCopy(extra) } @@ -136,10 +138,12 @@ class IDFModel private[ml] ( dataset.withColumn($(outputCol), idf(col($(inputCol)))) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.4.1") override def copy(extra: ParamMap): IDFModel = { val copied = new IDFModel(uid, idfModel) copyValues(copied, extra).setParent(parent) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala index 261d45d7a82a8..31a58152671cd 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala @@ -84,10 +84,12 @@ class MaxAbsScaler @Since("2.0.0") (@Since("2.0.0") override val uid: String) copyValues(new MaxAbsScalerModel(uid, Vectors.dense(maxAbs)).setParent(this)) } + @Since("2.0.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("2.0.0") override def copy(extra: ParamMap): MaxAbsScaler = defaultCopy(extra) } @@ -132,10 +134,12 @@ class MaxAbsScalerModel private[ml] ( dataset.withColumn($(outputCol), reScale(col($(inputCol)))) } + @Since("2.0.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("2.0.0") override def copy(extra: ParamMap): MaxAbsScalerModel = { val copied = new MaxAbsScalerModel(uid, maxAbs) copyValues(copied, extra).setParent(parent) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala index 0260c9d6f13aa..9d6361726652b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala @@ -120,10 +120,12 @@ class MinMaxScaler @Since("1.5.0") (@Since("1.5.0") override val uid: String) copyValues(new MinMaxScalerModel(uid, summary.min, summary.max).setParent(this)) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.5.0") override def copy(extra: ParamMap): MinMaxScaler = defaultCopy(extra) } @@ -192,10 +194,12 @@ class MinMaxScalerModel private[ml] ( dataset.withColumn($(outputCol), reScale(col($(inputCol)))) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.5.0") override def copy(extra: ParamMap): MinMaxScalerModel = { val copied = new MinMaxScalerModel(uid, originalMin, originalMax) copyValues(copied, extra).setParent(parent) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala index 63ed92e04bec7..4fafc1e349c22 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala @@ -71,6 +71,7 @@ class OneHotEncoder @Since("1.4.0") (@Since("1.4.0") override val uid: String) e @Since("1.4.0") def setOutputCol(value: String): this.type = set(outputCol, value) + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { val inputColName = $(inputCol) val outputColName = $(outputCol) @@ -174,6 +175,7 @@ class OneHotEncoder @Since("1.4.0") (@Since("1.4.0") override val uid: String) e dataset.select(col("*"), encode(col(inputColName).cast(DoubleType)).as(outputColName, metadata)) } + @Since("1.4.1") override def copy(extra: ParamMap): OneHotEncoder = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala index 479f49b0abaf4..130e3586e263d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala @@ -99,10 +99,12 @@ class PCA @Since("1.5.0") ( copyValues(new PCAModel(uid, pcaModel.pc, pcaModel.explainedVariance).setParent(this)) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.5.0") override def copy(extra: ParamMap): PCA = defaultCopy(extra) } @@ -158,10 +160,12 @@ class PCAModel private[ml] ( dataset.withColumn($(outputCol), pcaOp(col($(inputCol)))) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.5.0") override def copy(extra: ParamMap): PCAModel = { val copied = new PCAModel(uid, pc, explainedVariance) copyValues(copied, extra).setParent(parent) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala index 0185cdda40f88..7b35fdeaf40c6 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala @@ -67,6 +67,7 @@ class PolynomialExpansion @Since("1.4.0") (@Since("1.4.0") override val uid: Str override protected def outputDataType: DataType = new VectorUDT() + @Since("1.4.1") override def copy(extra: ParamMap): PolynomialExpansion = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala index 50ac9a7e76128..96b8e7d9f7faf 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala @@ -97,6 +97,7 @@ final class QuantileDiscretizer @Since("1.6.0") (@Since("1.6.0") override val ui @Since("1.6.0") def setOutputCol(value: String): this.type = set(outputCol, value) + @Since("1.6.0") override def transformSchema(schema: StructType): StructType = { SchemaUtils.checkColumnType(schema, $(inputCol), DoubleType) val inputFields = schema.fields @@ -118,6 +119,7 @@ final class QuantileDiscretizer @Since("1.6.0") (@Since("1.6.0") override val ui copyValues(bucketizer.setParent(this)) } + @Since("1.6.0") override def copy(extra: ParamMap): QuantileDiscretizer = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala index 8df5567bf77b4..546dc7e8c08ff 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala @@ -177,6 +177,7 @@ class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String) copyValues(new RFormulaModel(uid, resolvedFormula, pipelineModel).setParent(this)) } + @Since("1.5.0") // optimistic schema; does not contain any ML attributes override def transformSchema(schema: StructType): StructType = { if (hasLabelCol(schema)) { @@ -187,8 +188,10 @@ class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String) } } + @Since("1.5.0") override def copy(extra: ParamMap): RFormula = defaultCopy(extra) + @Since("2.0.0") override def toString: String = s"RFormula(${get(formula).getOrElse("")}) (uid=$uid)" } @@ -221,6 +224,7 @@ class RFormulaModel private[feature]( transformLabel(pipelineModel.transform(dataset)) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { checkCanTransform(schema) val withFeatures = pipelineModel.transformSchema(schema) @@ -239,9 +243,11 @@ class RFormulaModel private[feature]( } } + @Since("1.5.0") override def copy(extra: ParamMap): RFormulaModel = copyValues( new RFormulaModel(uid, resolvedFormula, pipelineModel)) + @Since("2.0.0") override def toString: String = s"RFormulaModel($resolvedFormula) (uid=$uid)" private def transformLabel(dataset: Dataset[_]): DataFrame = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala index 6e175ae4528fb..dcb871131b7b8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala @@ -120,10 +120,12 @@ class StandardScaler @Since("1.4.0") ( copyValues(new StandardScalerModel(uid, scalerModel.std, scalerModel.mean).setParent(this)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.4.1") override def copy(extra: ParamMap): StandardScaler = defaultCopy(extra) } @@ -171,10 +173,12 @@ class StandardScalerModel private[ml] ( dataset.withColumn($(outputCol), scale(col($(inputCol)))) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.4.1") override def copy(extra: ParamMap): StandardScalerModel = { val copied = new StandardScalerModel(uid, std, mean) copyValues(copied, extra).setParent(parent) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala index b69383ef57643..1a6f42f773cd7 100755 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala @@ -105,6 +105,7 @@ class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String dataset.select(col("*"), t(col($(inputCol))).as($(outputCol), metadata)) } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { val inputType = schema($(inputCol)).dataType require(inputType.sameType(ArrayType(StringType)), @@ -112,6 +113,7 @@ class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String SchemaUtils.appendColumn(schema, $(outputCol), inputType, schema($(inputCol)).nullable) } + @Since("1.5.0") override def copy(extra: ParamMap): StopWordsRemover = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala index 79de67a4a2e3c..0f7337ce6b554 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala @@ -95,10 +95,12 @@ class StringIndexer @Since("1.4.0") ( copyValues(new StringIndexerModel(uid, labels).setParent(this)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.4.1") override def copy(extra: ParamMap): StringIndexer = defaultCopy(extra) } @@ -187,6 +189,7 @@ class StringIndexerModel ( indexer(dataset($(inputCol)).cast(StringType)).as($(outputCol), metadata)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { if (schema.fieldNames.contains($(inputCol))) { validateAndTransformSchema(schema) @@ -196,6 +199,7 @@ class StringIndexerModel ( } } + @Since("1.4.1") override def copy(extra: ParamMap): StringIndexerModel = { val copied = new StringIndexerModel(uid, labels) copyValues(copied, extra).setParent(parent) @@ -290,6 +294,7 @@ class IndexToString private[ml] (@Since("1.5.0") override val uid: String) @Since("1.5.0") final def getLabels: Array[String] = $(labels) + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { val inputColName = $(inputCol) val inputDataType = schema(inputColName).dataType @@ -327,6 +332,7 @@ class IndexToString private[ml] (@Since("1.5.0") override val uid: String) indexer(dataset($(inputCol)).cast(DoubleType)).as(outputColName)) } + @Since("1.5.0") override def copy(extra: ParamMap): IndexToString = { defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala index 7b4305a222ce1..010c948749f3b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala @@ -47,6 +47,7 @@ class Tokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) override protected def outputDataType: DataType = new ArrayType(StringType, true) + @Since("1.4.1") override def copy(extra: ParamMap): Tokenizer = defaultCopy(extra) } @@ -154,6 +155,7 @@ class RegexTokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) override protected def outputDataType: DataType = new ArrayType(StringType, true) + @Since("1.4.1") override def copy(extra: ParamMap): RegexTokenizer = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala index f5dd26f712a03..4939dabd987ec 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala @@ -110,6 +110,7 @@ class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String) dataset.select(col("*"), assembleFunc(struct(args: _*)).as($(outputCol), metadata)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { val inputColNames = $(inputCols) val outputColName = $(outputCol) @@ -126,6 +127,7 @@ class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String) StructType(schema.fields :+ new StructField(outputColName, new VectorUDT, true)) } + @Since("1.4.1") override def copy(extra: ParamMap): VectorAssembler = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala index aae71d759eca6..52db996c841b3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala @@ -132,6 +132,7 @@ class VectorIndexer @Since("1.4.0") ( copyValues(model) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { // We do not transfer feature metadata since we do not know what types of features we will // produce in transform(). @@ -142,6 +143,7 @@ class VectorIndexer @Since("1.4.0") ( SchemaUtils.appendColumn(schema, $(outputCol), dataType) } + @Since("1.4.1") override def copy(extra: ParamMap): VectorIndexer = defaultCopy(extra) } @@ -366,6 +368,7 @@ class VectorIndexerModel private[ml] ( dataset.withColumn($(outputCol), newCol, newField.metadata) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { val dataType = new VectorUDT require(isDefined(inputCol), @@ -425,6 +428,7 @@ class VectorIndexerModel private[ml] ( newAttributeGroup.toStructField() } + @Since("1.4.1") override def copy(extra: ParamMap): VectorIndexerModel = { val copied = new VectorIndexerModel(uid, numFeatures, categoryMaps) copyValues(copied, extra).setParent(parent) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala index 862c7f0d8f2f9..6769e490c51c7 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala @@ -144,6 +144,7 @@ final class VectorSlicer @Since("1.5.0") (@Since("1.5.0") override val uid: Stri indFeatures ++ nameFeatures } + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { require($(indices).length > 0 || $(names).length > 0, s"VectorSlicer requires that at least one feature be selected.") @@ -158,6 +159,7 @@ final class VectorSlicer @Since("1.5.0") (@Since("1.5.0") override val uid: Stri StructType(outputFields) } + @Since("1.5.0") override def copy(extra: ParamMap): VectorSlicer = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala index 803ff4202b0a5..05c4f2f1a7891 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala @@ -185,10 +185,12 @@ final class Word2Vec @Since("1.4.0") ( copyValues(new Word2VecModel(uid, wordVectors).setParent(this)) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.4.1") override def copy(extra: ParamMap): Word2Vec = defaultCopy(extra) } @@ -281,10 +283,12 @@ class Word2VecModel private[ml] ( dataset.withColumn($(outputCol), word2Vec(col($(inputCol)))) } + @Since("1.4.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema) } + @Since("1.4.1") override def copy(extra: ParamMap): Word2VecModel = { val copied = new Word2VecModel(uid, wordVectors) copyValues(copied, extra).setParent(parent) From 34b5f37d4a111a80c622fb7244ef54220e9167fd Mon Sep 17 00:00:00 2001 From: Nick Pentreath Date: Mon, 20 Jun 2016 10:16:17 +0200 Subject: [PATCH 6/6] Anything with new ml.linalg is since 2.0.0 --- .../apache/spark/ml/feature/ElementwiseProduct.scala | 12 ++++++------ .../main/scala/org/apache/spark/ml/feature/IDF.scala | 2 +- .../org/apache/spark/ml/feature/MinMaxScaler.scala | 4 ++-- .../org/apache/spark/ml/feature/Normalizer.scala | 12 ++++++------ .../main/scala/org/apache/spark/ml/feature/PCA.scala | 2 +- .../spark/ml/feature/PolynomialExpansion.scala | 12 ++++++------ .../org/apache/spark/ml/feature/StandardScaler.scala | 4 ++-- python/pyspark/ml/feature.py | 8 ++++---- 8 files changed, 28 insertions(+), 28 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala index c018679a112c9..92fefb1e6c0f3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala @@ -33,26 +33,26 @@ import org.apache.spark.sql.types.DataType * multiplier. */ @Experimental -@Since("1.4.0") -class ElementwiseProduct @Since("1.4.0") (@Since("1.4.0") override val uid: String) +@Since("2.0.0") +class ElementwiseProduct @Since("2.0.0") (@Since("2.0.0") override val uid: String) extends UnaryTransformer[Vector, Vector, ElementwiseProduct] with DefaultParamsWritable { - @Since("1.4.0") + @Since("2.0.0") def this() = this(Identifiable.randomUID("elemProd")) /** * the vector to multiply with input vectors * @group param */ - @Since("1.4.0") + @Since("2.0.0") val scalingVec: Param[Vector] = new Param(this, "scalingVec", "vector for hadamard product") /** @group setParam */ - @Since("1.4.0") + @Since("2.0.0") def setScalingVec(value: Vector): this.type = set(scalingVec, value) /** @group getParam */ - @Since("1.4.0") + @Since("2.0.0") def getScalingVec: Vector = getOrDefault(scalingVec) override protected def createTransformFunc: Vector => Vector = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala index 680f55be26a8d..cf03a2845ced5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala @@ -150,7 +150,7 @@ class IDFModel private[ml] ( } /** Returns the IDF vector. */ - @Since("1.6.0") + @Since("2.0.0") def idf: Vector = idfModel.idf.asML @Since("1.6.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala index 9d6361726652b..dd5a1f9b41fc8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala @@ -149,8 +149,8 @@ object MinMaxScaler extends DefaultParamsReadable[MinMaxScaler] { @Since("1.5.0") class MinMaxScalerModel private[ml] ( @Since("1.5.0") override val uid: String, - @Since("1.5.0") val originalMin: Vector, - @Since("1.5.0") val originalMax: Vector) + @Since("2.0.0") val originalMin: Vector, + @Since("2.0.0") val originalMax: Vector) extends Model[MinMaxScalerModel] with MinMaxScalerParams with MLWritable { import MinMaxScalerModel._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala index f9cbad90c9f3f..9a4e682890f12 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala @@ -31,11 +31,11 @@ import org.apache.spark.sql.types.DataType * Normalize a vector to have unit norm using the given p-norm. */ @Experimental -@Since("1.4.0") -class Normalizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) +@Since("2.0.0") +class Normalizer @Since("2.0.0") (@Since("2.0.0") override val uid: String) extends UnaryTransformer[Vector, Vector, Normalizer] with DefaultParamsWritable { - @Since("1.4.0") + @Since("2.0.0") def this() = this(Identifiable.randomUID("normalizer")) /** @@ -43,17 +43,17 @@ class Normalizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) * (default: p = 2) * @group param */ - @Since("1.4.0") + @Since("2.0.0") val p = new DoubleParam(this, "p", "the p norm value", ParamValidators.gtEq(1)) setDefault(p -> 2.0) /** @group getParam */ - @Since("1.4.0") + @Since("2.0.0") def getP: Double = $(p) /** @group setParam */ - @Since("1.4.0") + @Since("2.0.0") def setP(value: Double): this.type = set(p, value) override protected def createTransformFunc: Vector => Vector = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala index 130e3586e263d..b89c85991f39a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala @@ -127,7 +127,7 @@ object PCA extends DefaultParamsReadable[PCA] { @Since("1.5.0") class PCAModel private[ml] ( @Since("1.5.0") override val uid: String, - @Since("1.6.0") val pc: DenseMatrix, + @Since("2.0.0") val pc: DenseMatrix, @Since("2.0.0") val explainedVariance: DenseVector) extends Model[PCAModel] with PCAParams with MLWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala index 7b35fdeaf40c6..026014c7d64e3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala @@ -35,11 +35,11 @@ import org.apache.spark.sql.types.DataType * `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`. */ @Experimental -@Since("1.4.0") -class PolynomialExpansion @Since("1.4.0") (@Since("1.4.0") override val uid: String) +@Since("2.0.0") +class PolynomialExpansion @Since("2.0.0") (@Since("2.0.0") override val uid: String) extends UnaryTransformer[Vector, Vector, PolynomialExpansion] with DefaultParamsWritable { - @Since("1.4.0") + @Since("2.0.0") def this() = this(Identifiable.randomUID("poly")) /** @@ -47,18 +47,18 @@ class PolynomialExpansion @Since("1.4.0") (@Since("1.4.0") override val uid: Str * Default: 2 * @group param */ - @Since("1.4.0") + @Since("2.0.0") val degree = new IntParam(this, "degree", "the polynomial degree to expand (>= 1)", ParamValidators.gtEq(1)) setDefault(degree -> 2) /** @group getParam */ - @Since("1.4.0") + @Since("2.0.0") def getDegree: Int = $(degree) /** @group setParam */ - @Since("1.4.0") + @Since("2.0.0") def setDegree(value: Int): this.type = set(degree, value) override protected def createTransformFunc: Vector => Vector = { v => diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala index dcb871131b7b8..5e1bacf876caf 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala @@ -147,8 +147,8 @@ object StandardScaler extends DefaultParamsReadable[StandardScaler] { @Since("1.2.0") class StandardScalerModel private[ml] ( @Since("1.4.0") override val uid: String, - @Since("1.5.0") val std: Vector, - @Since("1.5.0") val mean: Vector) + @Since("2.0.0") val std: Vector, + @Since("2.0.0") val mean: Vector) extends Model[StandardScalerModel] with StandardScalerParams with MLWritable { import StandardScalerModel._ diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 1b9b2b9465a0f..1e9ec0fbb411c 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -486,14 +486,14 @@ def setParams(self, scalingVec=None, inputCol=None, outputCol=None): kwargs = self.setParams._input_kwargs return self._set(**kwargs) - @since("1.5.0") + @since("2.0.0") def setScalingVec(self, value): """ Sets the value of :py:attr:`scalingVec`. """ return self._set(scalingVec=value) - @since("1.5.0") + @since("2.0.0") def getScalingVec(self): """ Gets the value of scalingVec or its default value. @@ -1584,7 +1584,7 @@ class StandardScalerModel(JavaModel, JavaMLReadable, JavaMLWritable): """ @property - @since("1.5.0") + @since("2.0.0") def std(self): """ Standard deviation of the StandardScalerModel. @@ -1592,7 +1592,7 @@ def std(self): return self._call_java("std") @property - @since("1.5.0") + @since("2.0.0") def mean(self): """ Mean of the StandardScalerModel.