Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.ml.regression

import org.apache.spark.annotation.Experimental
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.tree.{DecisionTreeModel, DecisionTreeParams, Node, TreeRegressorParams}
Expand All @@ -36,30 +36,31 @@ import org.apache.spark.sql.DataFrame
* for regression.
* It supports both continuous and categorical features.
*/
@Since("1.4.0")
@Experimental
final class DecisionTreeRegressor(override val uid: String)
final class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Predictor[Vector, DecisionTreeRegressor, DecisionTreeRegressionModel]
with DecisionTreeParams with TreeRegressorParams {

@Since("1.4.0")
def this() = this(Identifiable.randomUID("dtr"))

// Override parameter setters from parent trait for Java API compatibility.

@Since("1.4.0")
override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)

@Since("1.4.0")
override def setMaxBins(value: Int): this.type = super.setMaxBins(value)

@Since("1.4.0")
override def setMinInstancesPerNode(value: Int): this.type =
super.setMinInstancesPerNode(value)

@Since("1.4.0")
override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)

@Since("1.4.0")
override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)

@Since("1.4.0")
override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)

@Since("1.4.0")
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)

@Since("1.4.0")
override def setImpurity(value: String): this.type = super.setImpurity(value)

override protected def train(dataset: DataFrame): DecisionTreeRegressionModel = {
Expand All @@ -77,10 +78,11 @@ final class DecisionTreeRegressor(override val uid: String)
super.getOldStrategy(categoricalFeatures, numClasses = 0, OldAlgo.Regression, getOldImpurity,
subsamplingRate = 1.0)
}

@Since("1.4.0")
override def copy(extra: ParamMap): DecisionTreeRegressor = defaultCopy(extra)
}

@Since("1.4.0")
@Experimental
object DecisionTreeRegressor {
/** Accessor for supported impurities: variance */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.spark.ml.regression
import com.github.fommil.netlib.BLAS.{getInstance => blas}

import org.apache.spark.Logging
import org.apache.spark.annotation.Experimental
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.param.{Param, ParamMap}
import org.apache.spark.ml.tree.{DecisionTreeModel, GBTParams, TreeEnsembleModel, TreeRegressorParams}
Expand All @@ -42,54 +42,56 @@ import org.apache.spark.sql.types.DoubleType
* learning algorithm for regression.
* It supports both continuous and categorical features.
*/
@Since("1.4.0")
@Experimental
final class GBTRegressor(override val uid: String)
final class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Predictor[Vector, GBTRegressor, GBTRegressionModel]
with GBTParams with TreeRegressorParams with Logging {

@Since("1.4.0")
def this() = this(Identifiable.randomUID("gbtr"))

// Override parameter setters from parent trait for Java API compatibility.

// Parameters from TreeRegressorParams:

@Since("1.4.0")
override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)

@Since("1.4.0")
override def setMaxBins(value: Int): this.type = super.setMaxBins(value)

@Since("1.4.0")
override def setMinInstancesPerNode(value: Int): this.type =
super.setMinInstancesPerNode(value)

@Since("1.4.0")
override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)

@Since("1.4.0")
override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)

@Since("1.4.0")
override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)

@Since("1.4.0")
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)

/**
* The impurity setting is ignored for GBT models.
* Individual trees are built using impurity "Variance."
*/
@Since("1.4.0")
override def setImpurity(value: String): this.type = {
logWarning("GBTRegressor.setImpurity should NOT be used")
this
}

// Parameters from TreeEnsembleParams:

@Since("1.4.0")
override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value)

@Since("1.4.0")
override def setSeed(value: Long): this.type = {
logWarning("The 'seed' parameter is currently ignored by Gradient Boosting.")
super.setSeed(value)
}

// Parameters from GBTParams:

@Since("1.4.0")
override def setMaxIter(value: Int): this.type = super.setMaxIter(value)

@Since("1.4.0")
override def setStepSize(value: Double): this.type = super.setStepSize(value)

// Parameters for GBTRegressor:
Expand All @@ -108,9 +110,11 @@ final class GBTRegressor(override val uid: String)
setDefault(lossType -> "squared")

/** @group setParam */
@Since("1.4.0")
def setLossType(value: String): this.type = set(lossType, value)

/** @group getParam */
@Since("1.4.0")
def getLossType: String = $(lossType).toLowerCase

/** (private[ml]) Convert new loss to old loss. */
Expand All @@ -134,10 +138,11 @@ final class GBTRegressor(override val uid: String)
val oldModel = oldGBT.run(oldDataset)
GBTRegressionModel.fromOld(oldModel, this, categoricalFeatures, numFeatures)
}

@Since("1.4.0")
override def copy(extra: ParamMap): GBTRegressor = defaultCopy(extra)
}

@Since("1.4.0")
@Experimental
object GBTRegressor {
// The losses below should be lowercase.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.ml.regression

import org.apache.spark.Logging
import org.apache.spark.annotation.Experimental
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.{Estimator, Model}
import org.apache.spark.ml.param._
import org.apache.spark.ml.param.shared.{HasFeaturesCol, HasLabelCol, HasPredictionCol, HasWeightCol}
Expand Down Expand Up @@ -124,32 +124,39 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures
*
* Uses [[org.apache.spark.mllib.regression.IsotonicRegression]].
*/
@Since("1.5.0")
@Experimental
class IsotonicRegression(override val uid: String) extends Estimator[IsotonicRegressionModel]
with IsotonicRegressionBase {

class IsotonicRegression @Since("1.5.0") (@Since("1.5.0") override val uid: String)
extends Estimator[IsotonicRegressionModel] with IsotonicRegressionBase {
@Since("1.5.0")
def this() = this(Identifiable.randomUID("isoReg"))

/** @group setParam */
@Since("1.5.0")
def setLabelCol(value: String): this.type = set(labelCol, value)

/** @group setParam */
@Since("1.5.0")
def setFeaturesCol(value: String): this.type = set(featuresCol, value)

/** @group setParam */
@Since("1.5.0")
def setPredictionCol(value: String): this.type = set(predictionCol, value)

/** @group setParam */
@Since("1.5.0")
def setIsotonic(value: Boolean): this.type = set(isotonic, value)

/** @group setParam */
@Since("1.5.0")
def setWeightCol(value: String): this.type = set(weightCol, value)

/** @group setParam */
@Since("1.5.0")
def setFeatureIndex(value: Int): this.type = set(featureIndex, value)

@Since("1.5.0")
override def copy(extra: ParamMap): IsotonicRegression = defaultCopy(extra)

@Since("1.5.0")
override def fit(dataset: DataFrame): IsotonicRegressionModel = {
validateAndTransformSchema(dataset.schema, fitting = true)
// Extract columns from data. If dataset is persisted, do not persist oldDataset.
Expand All @@ -162,7 +169,7 @@ class IsotonicRegression(override val uid: String) extends Estimator[IsotonicReg

copyValues(new IsotonicRegressionModel(uid, oldModel).setParent(this))
}

@Since("1.5.0")
override def transformSchema(schema: StructType): StructType = {
validateAndTransformSchema(schema, fitting = true)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import breeze.linalg.{DenseVector => BDV}
import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS, OWLQN => BreezeOWLQN}

import org.apache.spark.{Logging, SparkException}
import org.apache.spark.annotation.Experimental
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.feature.Instance
import org.apache.spark.ml.optim.WeightedLeastSquares
import org.apache.spark.ml.PredictorParams
Expand Down Expand Up @@ -60,18 +60,20 @@ private[regression] trait LinearRegressionParams extends PredictorParams
* - L1 (Lasso)
* - L2 + L1 (elastic net)
*/
@Since("1.3.0")
@Experimental
class LinearRegression(override val uid: String)
class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String)
extends Regressor[Vector, LinearRegression, LinearRegressionModel]
with LinearRegressionParams with Logging {

@Since("1.4.0")
def this() = this(Identifiable.randomUID("linReg"))

/**
* Set the regularization parameter.
* Default is 0.0.
* @group setParam
*/
@Since("1.3.0")
def setRegParam(value: Double): this.type = set(regParam, value)
setDefault(regParam -> 0.0)

Expand All @@ -80,6 +82,7 @@ class LinearRegression(override val uid: String)
* Default is true.
* @group setParam
*/
@Since("1.5.0")
def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value)
setDefault(fitIntercept -> true)

Expand All @@ -92,6 +95,7 @@ class LinearRegression(override val uid: String)
* Default is true.
* @group setParam
*/
@Since("1.5.0")
def setStandardization(value: Boolean): this.type = set(standardization, value)
setDefault(standardization -> true)

Expand All @@ -102,6 +106,7 @@ class LinearRegression(override val uid: String)
* Default is 0.0 which is an L2 penalty.
* @group setParam
*/
@Since("1.4.0")
def setElasticNetParam(value: Double): this.type = set(elasticNetParam, value)
setDefault(elasticNetParam -> 0.0)

Expand All @@ -110,6 +115,7 @@ class LinearRegression(override val uid: String)
* Default is 100.
* @group setParam
*/
@Since("1.3.0")
def setMaxIter(value: Int): this.type = set(maxIter, value)
setDefault(maxIter -> 100)

Expand All @@ -119,6 +125,7 @@ class LinearRegression(override val uid: String)
* Default is 1E-6.
* @group setParam
*/
@Since("1.4.0")
def setTol(value: Double): this.type = set(tol, value)
setDefault(tol -> 1E-6)

Expand Down Expand Up @@ -320,7 +327,7 @@ class LinearRegression(override val uid: String)
objectiveHistory)
model.setSummary(trainingSummary)
}

@Since("1.3.0")
override def copy(extra: ParamMap): LinearRegression = defaultCopy(extra)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.ml.regression

import org.apache.spark.annotation.Experimental
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.ml.{PredictionModel, Predictor}
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.tree.{DecisionTreeModel, RandomForestParams, TreeEnsembleModel, TreeRegressorParams}
Expand All @@ -37,44 +37,45 @@ import org.apache.spark.sql.functions._
* [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] learning algorithm for regression.
* It supports both continuous and categorical features.
*/
@Since("1.4.0")
@Experimental
final class RandomForestRegressor(override val uid: String)
final class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
extends Predictor[Vector, RandomForestRegressor, RandomForestRegressionModel]
with RandomForestParams with TreeRegressorParams {

@Since("1.4.0")
def this() = this(Identifiable.randomUID("rfr"))

// Override parameter setters from parent trait for Java API compatibility.

// Parameters from TreeRegressorParams:

@Since("1.4.0")
override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)

@Since("1.4.0")
override def setMaxBins(value: Int): this.type = super.setMaxBins(value)

@Since("1.4.0")
override def setMinInstancesPerNode(value: Int): this.type =
super.setMinInstancesPerNode(value)

@Since("1.4.0")
override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)

@Since("1.4.0")
override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)

@Since("1.4.0")
override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)

@Since("1.4.0")
override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)

@Since("1.4.0")
override def setImpurity(value: String): this.type = super.setImpurity(value)

// Parameters from TreeEnsembleParams:

@Since("1.4.0")
override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value)

@Since("1.4.0")
override def setSeed(value: Long): this.type = super.setSeed(value)

// Parameters from RandomForestParams:

@Since("1.4.0")
override def setNumTrees(value: Int): this.type = super.setNumTrees(value)

@Since("1.4.0")
override def setFeatureSubsetStrategy(value: String): this.type =
super.setFeatureSubsetStrategy(value)

Expand All @@ -90,10 +91,11 @@ final class RandomForestRegressor(override val uid: String)
val numFeatures = oldDataset.first().features.size
new RandomForestRegressionModel(trees, numFeatures)
}

@Since("1.4.0")
override def copy(extra: ParamMap): RandomForestRegressor = defaultCopy(extra)
}

@Since("1.4.0")
@Experimental
object RandomForestRegressor {
/** Accessor for supported impurity settings: variance */
Expand Down
Loading