From 10632436e107d9bec13f22bf850340a0deab82a8 Mon Sep 17 00:00:00 2001 From: "Ehsan M.Kermani" Date: Fri, 11 Sep 2015 16:49:02 -0700 Subject: [PATCH 1/5] Since annotation for ml.regression --- .../ml/regression/DecisionTreeRegressor.scala | 26 +++++----- .../spark/ml/regression/GBTRegressor.scala | 52 +++++++++++-------- .../ml/regression/IsotonicRegression.scala | 21 +++++--- .../ml/regression/LinearRegression.scala | 15 ++++-- .../ml/regression/RandomForestRegressor.scala | 34 ++++++------ 5 files changed, 86 insertions(+), 62 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala index a2bcd67401d0..77d47e5458e9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.regression -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.{PredictionModel, Predictor} import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.tree.{DecisionTreeModel, DecisionTreeParams, Node, TreeRegressorParams} @@ -36,30 +36,31 @@ import org.apache.spark.sql.DataFrame * for regression. * It supports both continuous and categorical features. */ +@Since("1.4.0") @Experimental -final class DecisionTreeRegressor(override val uid: String) +final class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Predictor[Vector, DecisionTreeRegressor, DecisionTreeRegressionModel] with DecisionTreeParams with TreeRegressorParams { - + @Since("1.4.0") def this() = this(Identifiable.randomUID("dtr")) // Override parameter setters from parent trait for Java API compatibility. - + @Since("1.4.0") override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value) - + @Since("1.4.0") override def setMaxBins(value: Int): this.type = super.setMaxBins(value) - + @Since("1.4.0") override def setMinInstancesPerNode(value: Int): this.type = super.setMinInstancesPerNode(value) - + @Since("1.4.0") override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value) - + @Since("1.4.0") override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value) - + @Since("1.4.0") override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value) - + @Since("1.4.0") override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value) - + @Since("1.4.0") override def setImpurity(value: String): this.type = super.setImpurity(value) override protected def train(dataset: DataFrame): DecisionTreeRegressionModel = { @@ -77,10 +78,11 @@ final class DecisionTreeRegressor(override val uid: String) super.getOldStrategy(categoricalFeatures, numClasses = 0, OldAlgo.Regression, getOldImpurity, subsamplingRate = 1.0) } - + @Since("1.4.0") override def copy(extra: ParamMap): DecisionTreeRegressor = defaultCopy(extra) } +@Since("1.4.0") @Experimental object DecisionTreeRegressor { /** Accessor for supported impurities: variance */ diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala index b66e61f37dd5..662ada9f3cde 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml.regression import com.github.fommil.netlib.BLAS.{getInstance => blas} import org.apache.spark.Logging -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.{PredictionModel, Predictor} import org.apache.spark.ml.param.{Param, ParamMap} import org.apache.spark.ml.tree.{DecisionTreeModel, GBTParams, TreeEnsembleModel, TreeRegressorParams} @@ -42,54 +42,56 @@ import org.apache.spark.sql.types.DoubleType * learning algorithm for regression. * It supports both continuous and categorical features. */ +@Since("1.4.0") @Experimental -final class GBTRegressor(override val uid: String) +final class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Predictor[Vector, GBTRegressor, GBTRegressionModel] with GBTParams with TreeRegressorParams with Logging { - + @Since("1.4.0") def this() = this(Identifiable.randomUID("gbtr")) // Override parameter setters from parent trait for Java API compatibility. // Parameters from TreeRegressorParams: - + @Since("1.4.0") override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value) - + @Since("1.4.0") override def setMaxBins(value: Int): this.type = super.setMaxBins(value) - + @Since("1.4.0") override def setMinInstancesPerNode(value: Int): this.type = super.setMinInstancesPerNode(value) - + @Since("1.4.0") override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value) - + @Since("1.4.0") override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value) - + @Since("1.4.0") override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value) - + @Since("1.4.0") override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value) /** * The impurity setting is ignored for GBT models. * Individual trees are built using impurity "Variance." */ + @Since("1.4.0") override def setImpurity(value: String): this.type = { logWarning("GBTRegressor.setImpurity should NOT be used") this } // Parameters from TreeEnsembleParams: - + @Since("1.4.0") override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value) - + @Since("1.4.0") override def setSeed(value: Long): this.type = { logWarning("The 'seed' parameter is currently ignored by Gradient Boosting.") super.setSeed(value) } // Parameters from GBTParams: - + @Since("1.4.0") override def setMaxIter(value: Int): this.type = super.setMaxIter(value) - + @Since("1.4.0") override def setStepSize(value: Double): this.type = super.setStepSize(value) // Parameters for GBTRegressor: @@ -108,9 +110,11 @@ final class GBTRegressor(override val uid: String) setDefault(lossType -> "squared") /** @group setParam */ + @Since("1.4.0") def setLossType(value: String): this.type = set(lossType, value) /** @group getParam */ + @Since("1.4.0") def getLossType: String = $(lossType).toLowerCase /** (private[ml]) Convert new loss to old loss. */ @@ -133,10 +137,11 @@ final class GBTRegressor(override val uid: String) val oldModel = oldGBT.run(oldDataset) GBTRegressionModel.fromOld(oldModel, this, categoricalFeatures) } - + @Since("1.4.0") override def copy(extra: ParamMap): GBTRegressor = defaultCopy(extra) } +@Since("1.4.0") @Experimental object GBTRegressor { // The losses below should be lowercase. @@ -153,20 +158,21 @@ object GBTRegressor { * @param _trees Decision trees in the ensemble. * @param _treeWeights Weights for the decision trees in the ensemble. */ +@Since("1.4.0") @Experimental -final class GBTRegressionModel( - override val uid: String, - private val _trees: Array[DecisionTreeRegressionModel], - private val _treeWeights: Array[Double]) +final class GBTRegressionModel @Since("1.4.0") ( + @Since("1.4.0") override val uid: String, + private val _trees: Array[DecisionTreeRegressionModel], + private val _treeWeights: Array[Double]) extends PredictionModel[Vector, GBTRegressionModel] with TreeEnsembleModel with Serializable { require(numTrees > 0, "GBTRegressionModel requires at least 1 tree.") require(_trees.length == _treeWeights.length, "GBTRegressionModel given trees, treeWeights of" + s" non-matching lengths (${_trees.length}, ${_treeWeights.length}, respectively).") - + @Since("1.4.0") override def trees: Array[DecisionTreeModel] = _trees.asInstanceOf[Array[DecisionTreeModel]] - + @Since("1.4.0") override def treeWeights: Array[Double] = _treeWeights override protected def transformImpl(dataset: DataFrame): DataFrame = { @@ -183,11 +189,11 @@ final class GBTRegressionModel( val treePredictions = _trees.map(_.rootNode.predictImpl(features).prediction) blas.ddot(numTrees, treePredictions, 1, _treeWeights, 1) } - + @Since("1.4.0") override def copy(extra: ParamMap): GBTRegressionModel = { copyValues(new GBTRegressionModel(uid, _trees, _treeWeights), extra).setParent(parent) } - + @Since("1.4.0") override def toString: String = { s"GBTRegressionModel with $numTrees trees" } diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala index 2ff500f291ab..4236c6d8534d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala @@ -18,7 +18,7 @@ package org.apache.spark.ml.regression import org.apache.spark.Logging -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared.{HasFeaturesCol, HasLabelCol, HasPredictionCol, HasWeightCol} @@ -124,32 +124,39 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures * * Uses [[org.apache.spark.mllib.regression.IsotonicRegression]]. */ +@Since("1.5.0") @Experimental -class IsotonicRegression(override val uid: String) extends Estimator[IsotonicRegressionModel] - with IsotonicRegressionBase { - +class IsotonicRegression @Since("1.5.0") (@Since("1.5.0") override val uid: String) + extends Estimator[IsotonicRegressionModel] with IsotonicRegressionBase { + @Since("1.5.0") def this() = this(Identifiable.randomUID("isoReg")) /** @group setParam */ + @Since("1.5.0") def setLabelCol(value: String): this.type = set(labelCol, value) /** @group setParam */ + @Since("1.5.0") def setFeaturesCol(value: String): this.type = set(featuresCol, value) /** @group setParam */ + @Since("1.5.0") def setPredictionCol(value: String): this.type = set(predictionCol, value) /** @group setParam */ + @Since("1.5.0") def setIsotonic(value: Boolean): this.type = set(isotonic, value) /** @group setParam */ + @Since("1.5.0") def setWeightCol(value: String): this.type = set(weightCol, value) /** @group setParam */ + @Since("1.5.0") def setFeatureIndex(value: Int): this.type = set(featureIndex, value) - + @Since("1.5.0") override def copy(extra: ParamMap): IsotonicRegression = defaultCopy(extra) - + @Since("1.5.0") override def fit(dataset: DataFrame): IsotonicRegressionModel = { validateAndTransformSchema(dataset.schema, fitting = true) // Extract columns from data. If dataset is persisted, do not persist oldDataset. @@ -162,7 +169,7 @@ class IsotonicRegression(override val uid: String) extends Estimator[IsotonicReg copyValues(new IsotonicRegressionModel(uid, oldModel).setParent(this)) } - + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { validateAndTransformSchema(schema, fitting = true) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index e4602d36ccc8..3c1e17e341ee 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -23,7 +23,7 @@ import breeze.linalg.{DenseVector => BDV, norm => brzNorm} import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS, OWLQN => BreezeOWLQN} import org.apache.spark.{Logging, SparkException} -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.PredictorParams import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.param.shared._ @@ -61,11 +61,12 @@ private[regression] trait LinearRegressionParams extends PredictorParams * - L1 (Lasso) * - L2 + L1 (elastic net) */ +@Since("1.3.0") @Experimental -class LinearRegression(override val uid: String) +class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String) extends Regressor[Vector, LinearRegression, LinearRegressionModel] with LinearRegressionParams with Logging { - + @Since("1.4.0") def this() = this(Identifiable.randomUID("linReg")) /** @@ -73,6 +74,7 @@ class LinearRegression(override val uid: String) * Default is 0.0. * @group setParam */ + @Since("1.3.0") def setRegParam(value: Double): this.type = set(regParam, value) setDefault(regParam -> 0.0) @@ -81,6 +83,7 @@ class LinearRegression(override val uid: String) * Default is true. * @group setParam */ + @Since("1.5.0") def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value) setDefault(fitIntercept -> true) @@ -93,6 +96,7 @@ class LinearRegression(override val uid: String) * Default is true. * @group setParam */ + @Since("1.5.0") def setStandardization(value: Boolean): this.type = set(standardization, value) setDefault(standardization -> true) @@ -103,6 +107,7 @@ class LinearRegression(override val uid: String) * Default is 0.0 which is an L2 penalty. * @group setParam */ + @Since("1.4.0") def setElasticNetParam(value: Double): this.type = set(elasticNetParam, value) setDefault(elasticNetParam -> 0.0) @@ -111,6 +116,7 @@ class LinearRegression(override val uid: String) * Default is 100. * @group setParam */ + @Since("1.3.0") def setMaxIter(value: Int): this.type = set(maxIter, value) setDefault(maxIter -> 100) @@ -120,6 +126,7 @@ class LinearRegression(override val uid: String) * Default is 1E-6. * @group setParam */ + @Since("1.4.0") def setTol(value: Double): this.type = set(tol, value) setDefault(tol -> 1E-6) @@ -254,7 +261,7 @@ class LinearRegression(override val uid: String) objectiveHistory) model.setSummary(trainingSummary) } - + @Since("1.3.0") override def copy(extra: ParamMap): LinearRegression = defaultCopy(extra) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala index 2f36da371f57..a546e034aa79 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.regression -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.{PredictionModel, Predictor} import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.tree.{DecisionTreeModel, RandomForestParams, TreeEnsembleModel, TreeRegressorParams} @@ -37,44 +37,45 @@ import org.apache.spark.sql.functions._ * [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] learning algorithm for regression. * It supports both continuous and categorical features. */ +@Since("1.4.0") @Experimental -final class RandomForestRegressor(override val uid: String) +final class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Predictor[Vector, RandomForestRegressor, RandomForestRegressionModel] with RandomForestParams with TreeRegressorParams { - + @Since("1.4.0") def this() = this(Identifiable.randomUID("rfr")) // Override parameter setters from parent trait for Java API compatibility. // Parameters from TreeRegressorParams: - + @Since("1.4.0") override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value) - + @Since("1.4.0") override def setMaxBins(value: Int): this.type = super.setMaxBins(value) - + @Since("1.4.0") override def setMinInstancesPerNode(value: Int): this.type = super.setMinInstancesPerNode(value) - + @Since("1.4.0") override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value) - + @Since("1.4.0") override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value) - + @Since("1.4.0") override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value) - + @Since("1.4.0") override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value) - + @Since("1.4.0") override def setImpurity(value: String): this.type = super.setImpurity(value) // Parameters from TreeEnsembleParams: - + @Since("1.4.0") override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value) - + @Since("1.4.0") override def setSeed(value: Long): this.type = super.setSeed(value) // Parameters from RandomForestParams: - + @Since("1.4.0") override def setNumTrees(value: Int): this.type = super.setNumTrees(value) - + @Since("1.4.0") override def setFeatureSubsetStrategy(value: String): this.type = super.setFeatureSubsetStrategy(value) @@ -90,10 +91,11 @@ final class RandomForestRegressor(override val uid: String) val numFeatures = oldDataset.first().features.size new RandomForestRegressionModel(trees, numFeatures) } - + @Since("1.4.0") override def copy(extra: ParamMap): RandomForestRegressor = defaultCopy(extra) } +@Since("1.4.0") @Experimental object RandomForestRegressor { /** Accessor for supported impurity settings: variance */ From 40dc4f24d356ce246fbf65698578c3d688a790dc Mon Sep 17 00:00:00 2001 From: "Ehsan M.Kermani" Date: Fri, 11 Sep 2015 17:57:03 -0700 Subject: [PATCH 2/5] Since annotation for ml.tuning --- .../spark/ml/tuning/CrossValidator.scala | 20 ++++++++++++------- .../spark/ml/tuning/ParamGridBuilder.scala | 12 ++++++++++- .../ml/tuning/TrainValidationSplit.scala | 20 ++++++++++++------- 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala index 0679bfd0f3ff..67e2a6683142 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml.tuning import com.github.fommil.netlib.F2jBLAS import org.apache.spark.Logging -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml._ import org.apache.spark.ml.evaluation.Evaluator import org.apache.spark.ml.param._ @@ -51,26 +51,32 @@ private[ml] trait CrossValidatorParams extends ValidatorParams { * :: Experimental :: * K-fold cross validation. */ +@Since("1.2.0") @Experimental -class CrossValidator(override val uid: String) extends Estimator[CrossValidatorModel] +class CrossValidator @Since("1.2.0") (@Since("1.2.0") override val uid: String) + extends Estimator[CrossValidatorModel] with CrossValidatorParams with Logging { - + @Since("1.4.0") def this() = this(Identifiable.randomUID("cv")) private val f2jBLAS = new F2jBLAS /** @group setParam */ + @Since("1.2.0") def setEstimator(value: Estimator[_]): this.type = set(estimator, value) /** @group setParam */ + @Since("1.2.0") def setEstimatorParamMaps(value: Array[ParamMap]): this.type = set(estimatorParamMaps, value) /** @group setParam */ + @Since("1.2.0") def setEvaluator(value: Evaluator): this.type = set(evaluator, value) /** @group setParam */ + @Since("1.2.0") def setNumFolds(value: Int): this.type = set(numFolds, value) - + @Since("1.2.0") override def fit(dataset: DataFrame): CrossValidatorModel = { val schema = dataset.schema transformSchema(schema, logging = true) @@ -108,11 +114,11 @@ class CrossValidator(override val uid: String) extends Estimator[CrossValidatorM val bestModel = est.fit(dataset, epm(bestIndex)).asInstanceOf[Model[_]] copyValues(new CrossValidatorModel(uid, bestModel, metrics).setParent(this)) } - + @Since("1.2.0") override def transformSchema(schema: StructType): StructType = { $(estimator).transformSchema(schema) } - + @Since("1.4.0") override def validateParams(): Unit = { super.validateParams() val est = $(estimator) @@ -120,7 +126,7 @@ class CrossValidator(override val uid: String) extends Estimator[CrossValidatorM est.copy(paramMap).validateParams() } } - + @Since("1.4.0") override def copy(extra: ParamMap): CrossValidator = { val copied = defaultCopy(extra).asInstanceOf[CrossValidator] if (copied.isDefined(estimator)) { diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala index 98a8f0330ca4..6b6fcb093a87 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala @@ -20,13 +20,14 @@ package org.apache.spark.ml.tuning import scala.annotation.varargs import scala.collection.mutable -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.param._ /** * :: Experimental :: * Builder for a param grid used in grid search-based model selection. */ +@Since("1.2.0") @Experimental class ParamGridBuilder { @@ -35,6 +36,7 @@ class ParamGridBuilder { /** * Sets the given parameters in this grid to fixed values. */ + @Since("1.2.0") def baseOn(paramMap: ParamMap): this.type = { baseOn(paramMap.toSeq: _*) this @@ -43,6 +45,7 @@ class ParamGridBuilder { /** * Sets the given parameters in this grid to fixed values. */ + @Since("1.2.0") @varargs def baseOn(paramPairs: ParamPair[_]*): this.type = { paramPairs.foreach { p => @@ -54,6 +57,7 @@ class ParamGridBuilder { /** * Adds a param with multiple values (overwrites if the input param exists). */ + @Since("1.2.0") def addGrid[T](param: Param[T], values: Iterable[T]): this.type = { paramGrid.put(param, values) this @@ -64,6 +68,7 @@ class ParamGridBuilder { /** * Adds a double param with multiple values. */ + @Since("1.2.0") def addGrid(param: DoubleParam, values: Array[Double]): this.type = { addGrid[Double](param, values) } @@ -71,6 +76,7 @@ class ParamGridBuilder { /** * Adds a int param with multiple values. */ + @Since("1.2.0") def addGrid(param: IntParam, values: Array[Int]): this.type = { addGrid[Int](param, values) } @@ -78,6 +84,7 @@ class ParamGridBuilder { /** * Adds a float param with multiple values. */ + @Since("1.2.0") def addGrid(param: FloatParam, values: Array[Float]): this.type = { addGrid[Float](param, values) } @@ -85,6 +92,7 @@ class ParamGridBuilder { /** * Adds a long param with multiple values. */ + @Since("1.2.0") def addGrid(param: LongParam, values: Array[Long]): this.type = { addGrid[Long](param, values) } @@ -92,6 +100,7 @@ class ParamGridBuilder { /** * Adds a boolean param with true and false. */ + @Since("1.2.0") def addGrid(param: BooleanParam): this.type = { addGrid[Boolean](param, Array(true, false)) } @@ -99,6 +108,7 @@ class ParamGridBuilder { /** * Builds and returns all combinations of parameters specified by the param grid. */ + @Since("1.2.0") def build(): Array[ParamMap] = { var paramMaps = Array(new ParamMap) paramGrid.foreach { case (param, values) => diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala index 73a14b831015..4a900c4fd432 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala @@ -18,7 +18,7 @@ package org.apache.spark.ml.tuning import org.apache.spark.Logging -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.evaluation.Evaluator import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.param.{DoubleParam, ParamMap, ParamValidators} @@ -51,24 +51,30 @@ private[ml] trait TrainValidationSplitParams extends ValidatorParams { * and uses evaluation metric on the validation set to select the best model. * Similar to [[CrossValidator]], but only splits the set once. */ +@Since("1.5.0") @Experimental -class TrainValidationSplit(override val uid: String) extends Estimator[TrainValidationSplitModel] +class TrainValidationSplit @Since("1.5.0") (@Since("1.5.0") override val uid: String) + extends Estimator[TrainValidationSplitModel] with TrainValidationSplitParams with Logging { - + @Since("1.5.0") def this() = this(Identifiable.randomUID("tvs")) /** @group setParam */ + @Since("1.5.0") def setEstimator(value: Estimator[_]): this.type = set(estimator, value) /** @group setParam */ + @Since("1.5.0") def setEstimatorParamMaps(value: Array[ParamMap]): this.type = set(estimatorParamMaps, value) /** @group setParam */ + @Since("1.5.0") def setEvaluator(value: Evaluator): this.type = set(evaluator, value) /** @group setParam */ + @Since("1.5.0") def setTrainRatio(value: Double): this.type = set(trainRatio, value) - + @Since("1.5.0") override def fit(dataset: DataFrame): TrainValidationSplitModel = { val schema = dataset.schema transformSchema(schema, logging = true) @@ -107,11 +113,11 @@ class TrainValidationSplit(override val uid: String) extends Estimator[TrainVali val bestModel = est.fit(dataset, epm(bestIndex)).asInstanceOf[Model[_]] copyValues(new TrainValidationSplitModel(uid, bestModel, metrics).setParent(this)) } - + @Since("1.5.0") override def transformSchema(schema: StructType): StructType = { $(estimator).transformSchema(schema) } - + @Since("1.5.0") override def validateParams(): Unit = { super.validateParams() val est = $(estimator) @@ -119,7 +125,7 @@ class TrainValidationSplit(override val uid: String) extends Estimator[TrainVali est.copy(paramMap).validateParams() } } - + @Since("1.5.0") override def copy(extra: ParamMap): TrainValidationSplit = { val copied = defaultCopy(extra).asInstanceOf[TrainValidationSplit] if (copied.isDefined(estimator)) { From eb4f58061d93ab9e4fff4500d1db5169d087d1ba Mon Sep 17 00:00:00 2001 From: "Ehsan M.Kermani" Date: Tue, 20 Oct 2015 17:20:25 -0700 Subject: [PATCH 3/5] indentations fixed --- .../scala/org/apache/spark/ml/regression/GBTRegressor.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala index 662ada9f3cde..d99f327b392f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala @@ -161,9 +161,9 @@ object GBTRegressor { @Since("1.4.0") @Experimental final class GBTRegressionModel @Since("1.4.0") ( - @Since("1.4.0") override val uid: String, - private val _trees: Array[DecisionTreeRegressionModel], - private val _treeWeights: Array[Double]) + @Since("1.4.0") override val uid: String, + private val _trees: Array[DecisionTreeRegressionModel], + private val _treeWeights: Array[Double]) extends PredictionModel[Vector, GBTRegressionModel] with TreeEnsembleModel with Serializable { From 338bbf8b00bacc588247b54de1e17800deb95fbb Mon Sep 17 00:00:00 2001 From: "Ehsan M.Kermani" Date: Wed, 21 Oct 2015 12:53:25 -0700 Subject: [PATCH 4/5] coding style fixed --- .../org/apache/spark/ml/regression/DecisionTreeRegressor.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala index 77d47e5458e9..b61495286f07 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala @@ -36,7 +36,7 @@ import org.apache.spark.sql.DataFrame * for regression. * It supports both continuous and categorical features. */ -@Since("1.4.0") +@Since("1.4.0") @Experimental final class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Predictor[Vector, DecisionTreeRegressor, DecisionTreeRegressionModel] From 27c9cc83b571cbab11905849c8c3034844a9a232 Mon Sep 17 00:00:00 2001 From: "Ehsan M.Kermani" Date: Tue, 27 Oct 2015 10:23:33 -0700 Subject: [PATCH 5/5] renewed commits for JIRA-10266 --- .../ml/regression/DecisionTreeRegressor.scala | 2 +- .../spark/ml/regression/GBTRegressor.scala | 18 +++--------------- .../spark/ml/regression/LinearRegression.scala | 4 ---- 3 files changed, 4 insertions(+), 20 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala index 4d7ddba9bb89..705db5c445e1 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala @@ -56,7 +56,7 @@ final class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value) @Since("1.4.0") override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value) - @Since("1.4.0") + @Since("1.4.0") override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value) @Since("1.4.0") override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value) diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala index 1d90e6a4da39..9d648739b89a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala @@ -159,29 +159,18 @@ object GBTRegressor { * @param _trees Decision trees in the ensemble. * @param _treeWeights Weights for the decision trees in the ensemble. */ -@Since("1.4.0") @Experimental -<<<<<<< HEAD -final class GBTRegressionModel @Since("1.4.0") ( - @Since("1.4.0") override val uid: String, - private val _trees: Array[DecisionTreeRegressionModel], - private val _treeWeights: Array[Double]) -======= final class GBTRegressionModel private[ml]( override val uid: String, private val _trees: Array[DecisionTreeRegressionModel], private val _treeWeights: Array[Double], override val numFeatures: Int) ->>>>>>> 360ed832f5213b805ac28cf1d2828be09480f2d6 extends PredictionModel[Vector, GBTRegressionModel] with TreeEnsembleModel with Serializable { require(numTrees > 0, "GBTRegressionModel requires at least 1 tree.") require(_trees.length == _treeWeights.length, "GBTRegressionModel given trees, treeWeights of" + s" non-matching lengths (${_trees.length}, ${_treeWeights.length}, respectively).") -<<<<<<< HEAD - @Since("1.4.0") -======= /** * Construct a GBTRegressionModel @@ -191,9 +180,8 @@ final class GBTRegressionModel private[ml]( def this(uid: String, _trees: Array[DecisionTreeRegressionModel], _treeWeights: Array[Double]) = this(uid, _trees, _treeWeights, -1) ->>>>>>> 360ed832f5213b805ac28cf1d2828be09480f2d6 override def trees: Array[DecisionTreeModel] = _trees.asInstanceOf[Array[DecisionTreeModel]] - @Since("1.4.0") + override def treeWeights: Array[Double] = _treeWeights override protected def transformImpl(dataset: DataFrame): DataFrame = { @@ -210,12 +198,12 @@ final class GBTRegressionModel private[ml]( val treePredictions = _trees.map(_.rootNode.predictImpl(features).prediction) blas.ddot(numTrees, treePredictions, 1, _treeWeights, 1) } - @Since("1.4.0") + override def copy(extra: ParamMap): GBTRegressionModel = { copyValues(new GBTRegressionModel(uid, _trees, _treeWeights, numFeatures), extra).setParent(parent) } - @Since("1.4.0") + override def toString: String = { s"GBTRegressionModel (uid=$uid) with $numTrees trees" } diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index 27292399b11c..19f940c2b7e8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -23,13 +23,9 @@ import breeze.linalg.{DenseVector => BDV} import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS, OWLQN => BreezeOWLQN} import org.apache.spark.{Logging, SparkException} -<<<<<<< HEAD import org.apache.spark.annotation.{Experimental, Since} -======= -import org.apache.spark.annotation.Experimental import org.apache.spark.ml.feature.Instance import org.apache.spark.ml.optim.WeightedLeastSquares ->>>>>>> 360ed832f5213b805ac28cf1d2828be09480f2d6 import org.apache.spark.ml.PredictorParams import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.param.shared._