Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ abstract class Predictor[
case _ => labelCasted
}

copyValues(train(casted).setParent(this))
copyValues(train(casted).setParent(this), extra = ParamMap.empty, copyDefault = false)
}

override def copy(extra: ParamMap): Learner
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.ml.classification

import org.apache.hadoop.fs.Path
import org.json4s.{DefaultFormats, JObject}
import org.json4s.{DefaultFormats, JInt, JObject, JString}
import org.json4s.JsonDSL._

import org.apache.spark.annotation.Since
Expand Down Expand Up @@ -256,8 +256,9 @@ object DecisionTreeClassificationModel extends MLReadable[DecisionTreeClassifica

override protected def saveImpl(path: String): Unit = {
val extraMetadata: JObject = Map(
"numFeatures" -> instance.numFeatures,
"numClasses" -> instance.numClasses)
"numFeatures" -> JInt(instance.numFeatures),
"numClasses" -> JInt(instance.numClasses),
"impurity" -> JString(instance.getImpurity))
DefaultParamsWriter.saveMetadata(instance, path, sc, Some(extraMetadata))
val (nodeData, _) = NodeData.build(instance.rootNode, 0)
val dataPath = new Path(path, "data").toString
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.ml.classification

import com.github.fommil.netlib.BLAS.{getInstance => blas}
import org.json4s.{DefaultFormats, JObject}
import org.json4s.{DefaultFormats, JInt, JObject, JString}
import org.json4s.JsonDSL._

import org.apache.spark.annotation.Since
Expand Down Expand Up @@ -343,6 +343,7 @@ object GBTClassificationModel extends MLReadable[GBTClassificationModel] {

private val numFeaturesKey: String = "numFeatures"
private val numTreesKey: String = "numTrees"
private val impurityKey: String = "impurity"

@Since("2.0.0")
override def read: MLReader[GBTClassificationModel] = new GBTClassificationModelReader
Expand All @@ -356,8 +357,9 @@ object GBTClassificationModel extends MLReadable[GBTClassificationModel] {
override protected def saveImpl(path: String): Unit = {

val extraMetadata: JObject = Map(
numFeaturesKey -> instance.numFeatures,
numTreesKey -> instance.getNumTrees)
numFeaturesKey -> JInt(instance.numFeatures),
numTreesKey -> JInt(instance.getNumTrees),
impurityKey -> JString(instance.getImpurity))
EnsembleModelReadWrite.saveImpl(instance, path, sparkSession, extraMetadata)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,16 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
@Since("2.2.0")
def getUpperBoundsOnIntercepts: Vector = $(upperBoundsOnIntercepts)

setDefault(regParam -> 0.0)
setDefault(elasticNetParam -> 0.0)
setDefault(maxIter -> 100)
setDefault(tol -> 1E-6)
setDefault(fitIntercept -> true)
setDefault(standardization -> true)
setDefault(threshold -> 0.5)
setDefault(aggregationDepth -> 2)
setDefault(family -> "auto")

protected def usingBoundConstrainedOptimization: Boolean = {
isSet(lowerBoundsOnCoefficients) || isSet(upperBoundsOnCoefficients) ||
isSet(lowerBoundsOnIntercepts) || isSet(upperBoundsOnIntercepts)
Expand Down Expand Up @@ -291,7 +301,6 @@ class LogisticRegression @Since("1.2.0") (
*/
@Since("1.2.0")
def setRegParam(value: Double): this.type = set(regParam, value)
setDefault(regParam -> 0.0)

/**
* Set the ElasticNet mixing parameter.
Expand All @@ -307,7 +316,6 @@ class LogisticRegression @Since("1.2.0") (
*/
@Since("1.4.0")
def setElasticNetParam(value: Double): this.type = set(elasticNetParam, value)
setDefault(elasticNetParam -> 0.0)

/**
* Set the maximum number of iterations.
Expand All @@ -317,7 +325,6 @@ class LogisticRegression @Since("1.2.0") (
*/
@Since("1.2.0")
def setMaxIter(value: Int): this.type = set(maxIter, value)
setDefault(maxIter -> 100)

/**
* Set the convergence tolerance of iterations.
Expand All @@ -328,7 +335,6 @@ class LogisticRegression @Since("1.2.0") (
*/
@Since("1.4.0")
def setTol(value: Double): this.type = set(tol, value)
setDefault(tol -> 1E-6)

/**
* Whether to fit an intercept term.
Expand All @@ -338,7 +344,6 @@ class LogisticRegression @Since("1.2.0") (
*/
@Since("1.4.0")
def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value)
setDefault(fitIntercept -> true)

/**
* Sets the value of param [[family]].
Expand All @@ -348,7 +353,6 @@ class LogisticRegression @Since("1.2.0") (
*/
@Since("2.1.0")
def setFamily(value: String): this.type = set(family, value)
setDefault(family -> "auto")

/**
* Whether to standardize the training features before fitting the model.
Expand All @@ -362,11 +366,9 @@ class LogisticRegression @Since("1.2.0") (
*/
@Since("1.5.0")
def setStandardization(value: Boolean): this.type = set(standardization, value)
setDefault(standardization -> true)

@Since("1.5.0")
override def setThreshold(value: Double): this.type = super.setThreshold(value)
setDefault(threshold -> 0.5)

@Since("1.5.0")
override def getThreshold: Double = super.getThreshold
Expand Down Expand Up @@ -397,7 +399,6 @@ class LogisticRegression @Since("1.2.0") (
*/
@Since("2.1.0")
def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value)
setDefault(aggregationDepth -> 2)

/**
* Set the lower bounds on coefficients if fitting under bound constrained optimization.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ private[classification] trait NaiveBayesParams extends PredictorParams with HasW
*/
final val smoothing: DoubleParam = new DoubleParam(this, "smoothing", "The smoothing parameter.",
ParamValidators.gtEq(0))
setDefault(smoothing -> 1.0)

/** @group getParam */
final def getSmoothing: Double = $(smoothing)
Expand All @@ -54,6 +55,7 @@ private[classification] trait NaiveBayesParams extends PredictorParams with HasW
final val modelType: Param[String] = new Param[String](this, "modelType", "The model type " +
"which is a string (case-sensitive). Supported options: multinomial (default) and bernoulli.",
ParamValidators.inArray[String](NaiveBayes.supportedModelTypes.toArray))
setDefault(modelType -> NaiveBayes.Multinomial)

/** @group getParam */
final def getModelType: String = $(modelType)
Expand Down Expand Up @@ -91,7 +93,6 @@ class NaiveBayes @Since("1.5.0") (
*/
@Since("1.5.0")
def setSmoothing(value: Double): this.type = set(smoothing, value)
setDefault(smoothing -> 1.0)

/**
* Set the model type using a string (case-sensitive).
Expand All @@ -101,7 +102,6 @@ class NaiveBayes @Since("1.5.0") (
*/
@Since("1.5.0")
def setModelType(value: String): this.type = set(modelType, value)
setDefault(modelType -> NaiveBayes.Multinomial)

/**
* Sets the value of param [[weightCol]].
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.ml.classification

import org.json4s.{DefaultFormats, JObject}
import org.json4s.{DefaultFormats, JInt, JObject, JString}
import org.json4s.JsonDSL._

import org.apache.spark.annotation.Since
Expand Down Expand Up @@ -293,9 +293,10 @@ object RandomForestClassificationModel extends MLReadable[RandomForestClassifica
override protected def saveImpl(path: String): Unit = {
// Note: numTrees is not currently used, but could be nice to store for fast querying.
val extraMetadata: JObject = Map(
"numFeatures" -> instance.numFeatures,
"numClasses" -> instance.numClasses,
"numTrees" -> instance.getNumTrees)
"numFeatures" -> JInt(instance.numFeatures),
"numClasses" -> JInt(instance.numClasses),
"numTrees" -> JInt(instance.getNumTrees),
"impurity" -> JString(instance.getImpurity))
EnsembleModelReadWrite.saveImpl(instance, path, sparkSession, extraMetadata)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,8 @@ class GaussianMixture @Since("2.0.0") (
new MultivariateGaussian(mean, cov)
}

val model = copyValues(new GaussianMixtureModel(uid, weights, gaussianDists)).setParent(this)
val model = copyValues(new GaussianMixtureModel(uid, weights, gaussianDists),
extra = ParamMap.empty, copyDefault = false).setParent(this)
val summary = new GaussianMixtureSummary(model.transform(dataset),
$(predictionCol), $(probabilityCol), $(featuresCol), $(k), logLikelihood)
model.setSummary(Some(summary))
Expand Down
19 changes: 10 additions & 9 deletions mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,14 @@ private[clustering] trait KMeansParams extends Params with HasMaxIter with HasFe
@Since("1.5.0")
def getInitSteps: Int = $(initSteps)

setDefault(
k -> 2,
maxIter -> 20,
initMode -> MLlibKMeans.K_MEANS_PARALLEL,
initSteps -> 2,
tol -> 1e-4,
distanceMeasure -> DistanceMeasure.EUCLIDEAN)

/**
* Validates and transforms the input schema.
* @param schema input schema
Expand Down Expand Up @@ -264,14 +272,6 @@ class KMeans @Since("1.5.0") (
@Since("1.5.0") override val uid: String)
extends Estimator[KMeansModel] with KMeansParams with DefaultParamsWritable {

setDefault(
k -> 2,
maxIter -> 20,
initMode -> MLlibKMeans.K_MEANS_PARALLEL,
initSteps -> 2,
tol -> 1e-4,
distanceMeasure -> DistanceMeasure.EUCLIDEAN)

@Since("1.5.0")
override def copy(extra: ParamMap): KMeans = defaultCopy(extra)

Expand Down Expand Up @@ -339,7 +339,8 @@ class KMeans @Since("1.5.0") (
.setEpsilon($(tol))
.setDistanceMeasure($(distanceMeasure))
val parentModel = algo.run(instances, Option(instr))
val model = copyValues(new KMeansModel(uid, parentModel).setParent(this))
val model = copyValues(new KMeansModel(uid, parentModel).setParent(this),
extra = ParamMap.empty, copyDefault = false)
val summary = new KMeansSummary(
model.transform(dataset), $(predictionCol), $(featuresCol), $(k))

Expand Down
13 changes: 6 additions & 7 deletions mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,6 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
" with estimates of the topic mixture distribution for each document (often called \"theta\"" +
" in the literature). Returns a vector of zeros for an empty document.")

setDefault(topicDistributionCol -> "topicDistribution")

/** @group getParam */
@Since("1.6.0")
def getTopicDistributionCol: String = $(topicDistributionCol)
Expand Down Expand Up @@ -311,6 +309,11 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
@Since("2.0.0")
def getKeepLastCheckpoint: Boolean = $(keepLastCheckpoint)

setDefault(maxIter -> 20, k -> 10, optimizer -> "online", checkpointInterval -> 10,
learningOffset -> 1024, learningDecay -> 0.51, subsamplingRate -> 0.05,
optimizeDocConcentration -> true, keepLastCheckpoint -> true,
topicDistributionCol -> "topicDistribution")

/**
* Validates and transforms the input schema.
*
Expand Down Expand Up @@ -818,10 +821,6 @@ class LDA @Since("1.6.0") (
@Since("1.6.0")
def this() = this(Identifiable.randomUID("lda"))

setDefault(maxIter -> 20, k -> 10, optimizer -> "online", checkpointInterval -> 10,
learningOffset -> 1024, learningDecay -> 0.51, subsamplingRate -> 0.05,
optimizeDocConcentration -> true, keepLastCheckpoint -> true)

/**
* The features for LDA should be a `Vector` representing the word counts in a document.
* The vector should be of length vocabSize, with counts for each term (word).
Expand Down Expand Up @@ -918,7 +917,7 @@ class LDA @Since("1.6.0") (
}

instr.logNumFeatures(newModel.vocabSize)
val model = copyValues(newModel).setParent(this)
val model = copyValues(newModel, extra = ParamMap.empty, copyDefault = false).setParent(this)
instr.logSuccess(model)
model
}
Expand Down
10 changes: 7 additions & 3 deletions mllib/src/main/scala/org/apache/spark/ml/param/params.scala
Original file line number Diff line number Diff line change
Expand Up @@ -865,7 +865,7 @@ trait Params extends Identifiable with Serializable {
}

/** Internal param map for user-supplied values. */
private val paramMap: ParamMap = ParamMap.empty
private[ml] val paramMap: ParamMap = ParamMap.empty

/** Internal param map for default values. */
private val defaultParamMap: ParamMap = ParamMap.empty
Expand All @@ -886,14 +886,18 @@ trait Params extends Identifiable with Serializable {
*
* @param to the target instance, which should work with the same set of default Params as this
* source instance
* @param copyDefault whether to copy default Params. Default is `true`.
* @param extra extra params to be copied to the target's `paramMap`
* @return the target instance with param values copied
*/
protected def copyValues[T <: Params](to: T, extra: ParamMap = ParamMap.empty): T = {
protected def copyValues[T <: Params](
to: T,
extra: ParamMap = ParamMap.empty,
copyDefault: Boolean = true): T = {
val map = paramMap ++ extra
params.foreach { param =>
// copy default Params
if (defaultParamMap.contains(param) && to.hasParam(param.name)) {
if (copyDefault && defaultParamMap.contains(param) && to.hasParam(param.name)) {
to.defaultParamMap.put(to.getParam(param.name), defaultParamMap(param))
}
// copy explicitly set Params
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ import org.apache.spark.util.random.XORShiftRandom
* Common params for ALS and ALSModel.
*/
private[recommendation] trait ALSModelParams extends Params with HasPredictionCol {

/**
* Param for the column name for user ids. Ids must be integers. Other
* numeric types are supported for this column, but will be cast to integers as long as they
Expand Down Expand Up @@ -124,6 +125,8 @@ private[recommendation] trait ALSModelParams extends Params with HasPredictionCo

/** @group expertGetParam */
def getColdStartStrategy: String = $(coldStartStrategy).toLowerCase(Locale.ROOT)

setDefault(userCol -> "user", itemCol -> "item", coldStartStrategy -> "nan")
}

/**
Expand Down Expand Up @@ -235,10 +238,9 @@ private[recommendation] trait ALSParams extends ALSModelParams with HasMaxIter w
def getFinalStorageLevel: String = $(finalStorageLevel)

setDefault(rank -> 10, maxIter -> 10, regParam -> 0.1, numUserBlocks -> 10, numItemBlocks -> 10,
implicitPrefs -> false, alpha -> 1.0, userCol -> "user", itemCol -> "item",
implicitPrefs -> false, alpha -> 1.0,
ratingCol -> "rating", nonnegative -> false, checkpointInterval -> 10,
intermediateStorageLevel -> "MEMORY_AND_DISK", finalStorageLevel -> "MEMORY_AND_DISK",
coldStartStrategy -> "nan")
intermediateStorageLevel -> "MEMORY_AND_DISK", finalStorageLevel -> "MEMORY_AND_DISK")

/**
* Validates and transforms the input schema.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.ml.regression

import org.apache.hadoop.fs.Path
import org.json4s.{DefaultFormats, JObject}
import org.json4s.{DefaultFormats, JInt, JObject, JString}
import org.json4s.JsonDSL._

import org.apache.spark.annotation.Since
Expand Down Expand Up @@ -261,7 +261,8 @@ object DecisionTreeRegressionModel extends MLReadable[DecisionTreeRegressionMode

override protected def saveImpl(path: String): Unit = {
val extraMetadata: JObject = Map(
"numFeatures" -> instance.numFeatures)
"numFeatures" -> JInt(instance.numFeatures),
"impurity" -> JString(instance.getImpurity))
DefaultParamsWriter.saveMetadata(instance, path, sc, Some(extraMetadata))
val (nodeData, _) = NodeData.build(instance.rootNode, 0)
val dataPath = new Path(path, "data").toString
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.apache.spark.ml.regression

import com.github.fommil.netlib.BLAS.{getInstance => blas}
import org.json4s.{DefaultFormats, JObject}
import org.json4s.{DefaultFormats, JInt, JObject, JString}
import org.json4s.JsonDSL._

import org.apache.spark.annotation.Since
Expand Down Expand Up @@ -287,8 +287,9 @@ object GBTRegressionModel extends MLReadable[GBTRegressionModel] {

override protected def saveImpl(path: String): Unit = {
val extraMetadata: JObject = Map(
"numFeatures" -> instance.numFeatures,
"numTrees" -> instance.getNumTrees)
"numFeatures" -> JInt(instance.numFeatures),
"numTrees" -> JInt(instance.getNumTrees),
"impurity" -> JString(instance.getImpurity))
EnsembleModelReadWrite.saveImpl(instance, path, sparkSession, extraMetadata)
}
}
Expand Down
Loading