Skip to content

Commit 601e792

Browse files
committed
Modified ParamMap to sort parameters in toString. Cleaned up classes in class hierarchy, before implementing tests and examples.
1 parent d705e87 commit 601e792

File tree

7 files changed

+146
-25
lines changed

7 files changed

+146
-25
lines changed

mllib/src/main/scala/org/apache/spark/ml/LabeledPoint.scala

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,48 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
118
package org.apache.spark.ml
219

20+
import scala.beans.BeanInfo
21+
22+
import org.apache.spark.annotation.AlphaComponent
323
import org.apache.spark.mllib.linalg.Vector
424

525
/**
26+
* :: AlphaComponent ::
627
* Class that represents an instance (data point) for prediction tasks.
728
*
829
* @param label Label to predict
930
* @param features List of features describing this instance
1031
* @param weight Instance weight
1132
*/
33+
@AlphaComponent
34+
@BeanInfo
1235
case class LabeledPoint(label: Double, features: Vector, weight: Double) {
1336

1437
override def toString: String = {
1538
"(%s,%s,%s)".format(label, features, weight)
1639
}
1740
}
1841

42+
/**
43+
* :: AlphaComponent ::
44+
*/
45+
@AlphaComponent
1946
object LabeledPoint {
2047
/** Constructor which sets instance weight to 1.0 */
2148
def apply(label: Double, features: Vector) = new LabeledPoint(label, features, 1.0)

mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,27 +21,40 @@ import org.apache.spark.annotation.AlphaComponent
2121
import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor, PredictorParams}
2222
import org.apache.spark.mllib.linalg.Vector
2323

24-
@AlphaComponent
24+
/**
25+
* Params for classification.
26+
* Currently empty, but may add functionality later.
27+
*/
2528
private[classification] trait ClassifierParams extends PredictorParams
2629

2730
/**
2831
* Single-label binary or multiclass classification
32+
* Classes are indexed {0, 1, ..., numClasses - 1}.
2933
*/
34+
@AlphaComponent
3035
abstract class Classifier[Learner <: Classifier[Learner, M], M <: ClassificationModel[M]]
3136
extends Predictor[Learner, M]
3237
with ClassifierParams {
3338

3439
// TODO: defaultEvaluator (follow-up PR)
3540
}
3641

37-
38-
private[ml] abstract class ClassificationModel[M <: ClassificationModel[M]]
42+
/**
43+
* :: AlphaComponent ::
44+
* Model produced by a [[Classifier]].
45+
* Classes are indexed {0, 1, ..., numClasses - 1}.
46+
*
47+
* @tparam M Model type.
48+
*/
49+
@AlphaComponent
50+
abstract class ClassificationModel[M <: ClassificationModel[M]]
3951
extends PredictionModel[M] with ClassifierParams {
4052

53+
/** Number of classes (values which the label can take). */
4154
def numClasses: Int
4255

4356
/**
44-
* Predict label for the given features. Labels are indexed {0, 1, ..., numClasses - 1}.
57+
* Predict label for the given features.
4558
* This default implementation for classification predicts the index of the maximum value
4659
* from [[predictRaw()]].
4760
*/
@@ -50,8 +63,12 @@ private[ml] abstract class ClassificationModel[M <: ClassificationModel[M]]
5063
}
5164

5265
/**
53-
* Raw prediction for each possible label
54-
* @return vector where element i is the raw score for label i
66+
* Raw prediction for each possible label.
67+
* The meaning of a "raw" prediction may vary between algorithms, but it intuitively gives
68+
* a magnitude of confidence in each possible label.
69+
* @return vector where element i is the raw prediction for label i.
70+
* This raw prediction may be any real number, where a larger value indicates greater
71+
* confidence for that label.
5572
*/
5673
def predictRaw(features: Vector): Vector
5774

mllib/src/main/scala/org/apache/spark/ml/impl/estimator/Predictor.scala

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,29 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
118
package org.apache.spark.ml.impl.estimator
219

3-
import org.apache.spark.annotation.AlphaComponent
420
import org.apache.spark.ml.{Estimator, LabeledPoint, Model}
521
import org.apache.spark.ml.param._
622
import org.apache.spark.mllib.linalg.{Vector, VectorUDT}
723
import org.apache.spark.rdd.RDD
824
import org.apache.spark.sql._
925
import org.apache.spark.sql.catalyst.analysis.Star
1026

11-
@AlphaComponent
1227
private[ml] trait PredictorParams extends Params
1328
with HasLabelCol with HasFeaturesCol with HasPredictionCol {
1429

mllib/src/main/scala/org/apache/spark/ml/impl/estimator/ProbabilisticClassificationModel.scala

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,36 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
118
package org.apache.spark.ml.impl.estimator
219

320
import org.apache.spark.mllib.linalg.Vector
421

22+
/**
23+
* Trait for a [[org.apache.spark.ml.classification.ClassificationModel]] which can output
24+
* class conditional probabilities.
25+
*/
526
private[ml] trait ProbabilisticClassificationModel {
627

728
/**
8-
* Predict the probability of each label.
29+
* Predict the probability of each class given the features.
30+
* These predictions are also called class conditional probabilities.
31+
*
32+
* WARNING: Not all models output well-calibrated probability estimates! These probabilities
33+
* should be treated as confidences, not precise probabilities.
934
*/
1035
def predictProbabilities(features: Vector): Vector
1136

mllib/src/main/scala/org/apache/spark/ml/param/params.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any]) exten
279279
def copy: ParamMap = new ParamMap(map.clone())
280280

281281
override def toString: String = {
282-
map.map { case (param, value) =>
282+
map.toSeq.sorted.map { case (param, value) =>
283283
s"\t${param.parent.uid}-${param.name}: $value"
284284
}.mkString("{\n", ",\n", "\n}")
285285
}

mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
118
package org.apache.spark.ml.regression
219

320
import org.apache.spark.annotation.AlphaComponent
@@ -9,22 +26,20 @@ import org.apache.spark.rdd.RDD
926
import org.apache.spark.storage.StorageLevel
1027

1128
/**
12-
* :: AlphaComponent ::
1329
* Params for linear regression.
1430
*/
1531
@AlphaComponent
1632
private[regression] trait LinearRegressionParams extends RegressorParams
1733
with HasRegParam with HasMaxIter
1834

19-
2035
/**
36+
* :: AlphaComponent ::
2137
* Logistic regression.
2238
*/
39+
@AlphaComponent
2340
class LinearRegression extends Regressor[LinearRegression, LinearRegressionModel]
2441
with LinearRegressionParams {
2542

26-
// TODO: Extend IterativeEstimator
27-
2843
setRegParam(0.1)
2944
setMaxIter(100)
3045

@@ -52,7 +67,6 @@ class LinearRegression extends Regressor[LinearRegression, LinearRegressionModel
5267
}
5368
}
5469

55-
5670
/**
5771
* :: AlphaComponent ::
5872
* Model produced by [[LinearRegression]].

mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,51 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
118
package org.apache.spark.ml.regression
219

320
import org.apache.spark.annotation.AlphaComponent
4-
import org.apache.spark.ml.Evaluator
5-
import org.apache.spark.ml.evaluation.RegressionEvaluator
6-
import org.apache.spark.ml.impl.estimator.{PredictionModel, HasDefaultEvaluator, Predictor,
7-
PredictorParams}
21+
import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor, PredictorParams}
822
import org.apache.spark.mllib.linalg.Vector
923

10-
@AlphaComponent
24+
/**
25+
* Params for regression.
26+
* Currently empty, but may add functionality later.
27+
*/
1128
private[regression] trait RegressorParams extends PredictorParams
1229

1330
/**
31+
* :: AlphaComponent ::
1432
* Single-label regression
1533
*/
34+
@AlphaComponent
1635
abstract class Regressor[Learner <: Regressor[Learner, M], M <: RegressionModel[M]]
1736
extends Predictor[Learner, M]
18-
with RegressorParams
19-
with HasDefaultEvaluator {
37+
with RegressorParams {
2038

21-
override def defaultEvaluator: Evaluator = new RegressionEvaluator
39+
// TODO: defaultEvaluator (follow-up PR)
2240
}
2341

24-
25-
private[ml] abstract class RegressionModel[M <: RegressionModel[M]]
42+
/**
43+
* :: AlphaComponent ::
44+
* Model produced by a [[Regressor]].
45+
* @tparam M Model type.
46+
*/
47+
@AlphaComponent
48+
abstract class RegressionModel[M <: RegressionModel[M]]
2649
extends PredictionModel[M] with RegressorParams {
2750

2851
/**

0 commit comments

Comments
 (0)