Skip to content

Commit dbe8163

Browse files
yanboliangjkbradley
authored andcommitted
[SPARK-20501][ML] ML 2.2 QA: New Scala APIs, docs
## What changes were proposed in this pull request? Review new Scala APIs introduced in 2.2. ## How was this patch tested? Existing tests. Author: Yanbo Liang <[email protected]> Closes #17934 from yanboliang/spark-20501.
1 parent d4022d4 commit dbe8163

File tree

6 files changed

+20
-12
lines changed

6 files changed

+20
-12
lines changed

mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ private[classification] trait LinearSVCParams extends ClassifierParams with HasR
5151
* Linear SVM Classifier</a>
5252
*
5353
* This binary classifier optimizes the Hinge Loss using the OWLQN optimizer.
54+
* Only supports L2 regularization currently.
5455
*
5556
*/
5657
@Since("2.2.0")
@@ -148,7 +149,7 @@ class LinearSVC @Since("2.2.0") (
148149
@Since("2.2.0")
149150
override def copy(extra: ParamMap): LinearSVC = defaultCopy(extra)
150151

151-
override protected[classification] def train(dataset: Dataset[_]): LinearSVCModel = {
152+
override protected def train(dataset: Dataset[_]): LinearSVCModel = {
152153
val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol))
153154
val instances: RDD[Instance] =
154155
dataset.select(col($(labelCol)), w, col($(featuresCol))).rdd.map {
@@ -264,7 +265,7 @@ object LinearSVC extends DefaultParamsReadable[LinearSVC] {
264265

265266
/**
266267
* :: Experimental ::
267-
* SVM Model trained by [[LinearSVC]]
268+
* Linear SVM Model trained by [[LinearSVC]]
268269
*/
269270
@Since("2.2.0")
270271
@Experimental

mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,8 +267,12 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
267267
}
268268

269269
/**
270-
* Logistic regression. Supports multinomial logistic (softmax) regression and binomial logistic
271-
* regression.
270+
* Logistic regression. Supports:
271+
* - Multinomial logistic (softmax) regression.
272+
* - Binomial logistic regression.
273+
*
274+
* This class supports fitting traditional logistic regression model by LBFGS/OWLQN and
275+
* bound (box) constrained logistic regression model by LBFGSB.
272276
*/
273277
@Since("1.2.0")
274278
class LogisticRegression @Since("1.2.0") (

mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,8 @@ private[feature] trait ImputerParams extends Params with HasInputCols {
102102
* computing median, DataFrameStatFunctions.approxQuantile is used with a relative error of 0.001.
103103
*/
104104
@Experimental
105-
class Imputer @Since("2.2.0")(override val uid: String)
105+
@Since("2.2.0")
106+
class Imputer @Since("2.2.0") (@Since("2.2.0") override val uid: String)
106107
extends Estimator[ImputerModel] with ImputerParams with DefaultParamsWritable {
107108

108109
@Since("2.2.0")
@@ -165,8 +166,8 @@ class Imputer @Since("2.2.0")(override val uid: String)
165166
object Imputer extends DefaultParamsReadable[Imputer] {
166167

167168
/** strategy names that Imputer currently supports. */
168-
private[ml] val mean = "mean"
169-
private[ml] val median = "median"
169+
private[feature] val mean = "mean"
170+
private[feature] val median = "median"
170171

171172
@Since("2.2.0")
172173
override def load(path: String): Imputer = super.load(path)
@@ -180,9 +181,10 @@ object Imputer extends DefaultParamsReadable[Imputer] {
180181
* which are used to replace the missing values in the input DataFrame.
181182
*/
182183
@Experimental
183-
class ImputerModel private[ml](
184-
override val uid: String,
185-
val surrogateDF: DataFrame)
184+
@Since("2.2.0")
185+
class ImputerModel private[ml] (
186+
@Since("2.2.0") override val uid: String,
187+
@Since("2.2.0") val surrogateDF: DataFrame)
186188
extends Model[ImputerModel] with ImputerParams with MLWritable {
187189

188190
import ImputerModel._

mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ object FPGrowth extends DefaultParamsReadable[FPGrowth] {
200200
@Experimental
201201
class FPGrowthModel private[ml] (
202202
@Since("2.2.0") override val uid: String,
203-
@transient val freqItemsets: DataFrame)
203+
@Since("2.2.0") @transient val freqItemsets: DataFrame)
204204
extends Model[FPGrowthModel] with FPGrowthParams with MLWritable {
205205

206206
/** @group setParam */

mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import org.apache.spark.sql.{DataFrame, Dataset, Row}
2727
import org.apache.spark.sql.types.{StructField, StructType}
2828

2929
/**
30-
* API for correlation functions in MLlib, compatible with Dataframes and Datasets.
30+
* API for correlation functions in MLlib, compatible with DataFrames and Datasets.
3131
*
3232
* The functions in this package generalize the functions in [[org.apache.spark.sql.Dataset#stat]]
3333
* to spark.ml's Vector types.

python/pyspark/ml/classification.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ class LinearSVC(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, Ha
7070
`Linear SVM Classifier <https://en.wikipedia.org/wiki/Support_vector_machine#Linear_SVM>`_
7171
7272
This binary classifier optimizes the Hinge Loss using the OWLQN optimizer.
73+
Only supports L2 regularization currently.
7374
7475
>>> from pyspark.sql import Row
7576
>>> from pyspark.ml.linalg import Vectors

0 commit comments

Comments
 (0)