Skip to content

Commit c0ada5b

Browse files
yinxusenmengxr
authored andcommitted
[SPARK-11729] Replace example code in ml-linear-methods.md using include_example
JIRA link: https://issues.apache.org/jira/browse/SPARK-11729 Author: Xusen Yin <[email protected]> Closes #9713 from yinxusen/SPARK-11729. (cherry picked from commit 328eb49) Signed-off-by: Xiangrui Meng <[email protected]>
1 parent b468f81 commit c0ada5b

9 files changed

+491
-210
lines changed

docs/ml-linear-methods.md

Lines changed: 8 additions & 210 deletions
Original file line numberDiff line numberDiff line change
@@ -57,77 +57,15 @@ $\alpha$ and `regParam` corresponds to $\lambda$.
5757
<div class="codetabs">
5858

5959
<div data-lang="scala" markdown="1">
60-
{% highlight scala %}
61-
import org.apache.spark.ml.classification.LogisticRegression
62-
63-
// Load training data
64-
val training = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
65-
66-
val lr = new LogisticRegression()
67-
.setMaxIter(10)
68-
.setRegParam(0.3)
69-
.setElasticNetParam(0.8)
70-
71-
// Fit the model
72-
val lrModel = lr.fit(training)
73-
74-
// Print the coefficients and intercept for logistic regression
75-
println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")
76-
{% endhighlight %}
60+
{% include_example scala/org/apache/spark/examples/ml/LogisticRegressionWithElasticNetExample.scala %}
7761
</div>
7862

7963
<div data-lang="java" markdown="1">
80-
{% highlight java %}
81-
import org.apache.spark.ml.classification.LogisticRegression;
82-
import org.apache.spark.ml.classification.LogisticRegressionModel;
83-
import org.apache.spark.SparkConf;
84-
import org.apache.spark.SparkContext;
85-
import org.apache.spark.sql.DataFrame;
86-
import org.apache.spark.sql.SQLContext;
87-
88-
public class LogisticRegressionWithElasticNetExample {
89-
public static void main(String[] args) {
90-
SparkConf conf = new SparkConf()
91-
.setAppName("Logistic Regression with Elastic Net Example");
92-
93-
SparkContext sc = new SparkContext(conf);
94-
SQLContext sql = new SQLContext(sc);
95-
String path = "data/mllib/sample_libsvm_data.txt";
96-
97-
// Load training data
98-
DataFrame training = sqlContext.read().format("libsvm").load(path);
99-
100-
LogisticRegression lr = new LogisticRegression()
101-
.setMaxIter(10)
102-
.setRegParam(0.3)
103-
.setElasticNetParam(0.8);
104-
105-
// Fit the model
106-
LogisticRegressionModel lrModel = lr.fit(training);
107-
108-
// Print the coefficients and intercept for logistic regression
109-
System.out.println("Coefficients: " + lrModel.coefficients() + " Intercept: " + lrModel.intercept());
110-
}
111-
}
112-
{% endhighlight %}
64+
{% include_example java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java %}
11365
</div>
11466

11567
<div data-lang="python" markdown="1">
116-
{% highlight python %}
117-
from pyspark.ml.classification import LogisticRegression
118-
119-
# Load training data
120-
training = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
121-
122-
lr = LogisticRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)
123-
124-
# Fit the model
125-
lrModel = lr.fit(training)
126-
127-
# Print the coefficients and intercept for logistic regression
128-
print("Coefficients: " + str(lrModel.coefficients))
129-
print("Intercept: " + str(lrModel.intercept))
130-
{% endhighlight %}
68+
{% include_example python/ml/logistic_regression_with_elastic_net.py %}
13169
</div>
13270

13371
</div>
@@ -152,33 +90,7 @@ This will likely change when multiclass classification is supported.
15290

15391
Continuing the earlier example:
15492

155-
{% highlight scala %}
156-
import org.apache.spark.ml.classification.BinaryLogisticRegressionSummary
157-
158-
// Extract the summary from the returned LogisticRegressionModel instance trained in the earlier example
159-
val trainingSummary = lrModel.summary
160-
161-
// Obtain the objective per iteration.
162-
val objectiveHistory = trainingSummary.objectiveHistory
163-
objectiveHistory.foreach(loss => println(loss))
164-
165-
// Obtain the metrics useful to judge performance on test data.
166-
// We cast the summary to a BinaryLogisticRegressionSummary since the problem is a
167-
// binary classification problem.
168-
val binarySummary = trainingSummary.asInstanceOf[BinaryLogisticRegressionSummary]
169-
170-
// Obtain the receiver-operating characteristic as a dataframe and areaUnderROC.
171-
val roc = binarySummary.roc
172-
roc.show()
173-
println(binarySummary.areaUnderROC)
174-
175-
// Set the model threshold to maximize F-Measure
176-
val fMeasure = binarySummary.fMeasureByThreshold
177-
val maxFMeasure = fMeasure.select(max("F-Measure")).head().getDouble(0)
178-
val bestThreshold = fMeasure.where($"F-Measure" === maxFMeasure).
179-
select("threshold").head().getDouble(0)
180-
lrModel.setThreshold(bestThreshold)
181-
{% endhighlight %}
93+
{% include_example scala/org/apache/spark/examples/ml/LogisticRegressionSummaryExample.scala %}
18294
</div>
18395

18496
<div data-lang="java" markdown="1">
@@ -192,39 +104,7 @@ This will likely change when multiclass classification is supported.
192104

193105
Continuing the earlier example:
194106

195-
{% highlight java %}
196-
import org.apache.spark.ml.classification.LogisticRegressionTrainingSummary;
197-
import org.apache.spark.ml.classification.BinaryLogisticRegressionSummary;
198-
import org.apache.spark.sql.functions;
199-
200-
// Extract the summary from the returned LogisticRegressionModel instance trained in the earlier example
201-
LogisticRegressionTrainingSummary trainingSummary = lrModel.summary();
202-
203-
// Obtain the loss per iteration.
204-
double[] objectiveHistory = trainingSummary.objectiveHistory();
205-
for (double lossPerIteration : objectiveHistory) {
206-
System.out.println(lossPerIteration);
207-
}
208-
209-
// Obtain the metrics useful to judge performance on test data.
210-
// We cast the summary to a BinaryLogisticRegressionSummary since the problem is a
211-
// binary classification problem.
212-
BinaryLogisticRegressionSummary binarySummary = (BinaryLogisticRegressionSummary) trainingSummary;
213-
214-
// Obtain the receiver-operating characteristic as a dataframe and areaUnderROC.
215-
DataFrame roc = binarySummary.roc();
216-
roc.show();
217-
roc.select("FPR").show();
218-
System.out.println(binarySummary.areaUnderROC());
219-
220-
// Get the threshold corresponding to the maximum F-Measure and rerun LogisticRegression with
221-
// this selected threshold.
222-
DataFrame fMeasure = binarySummary.fMeasureByThreshold();
223-
double maxFMeasure = fMeasure.select(functions.max("F-Measure")).head().getDouble(0);
224-
double bestThreshold = fMeasure.where(fMeasure.col("F-Measure").equalTo(maxFMeasure)).
225-
select("threshold").head().getDouble(0);
226-
lrModel.setThreshold(bestThreshold);
227-
{% endhighlight %}
107+
{% include_example java/org/apache/spark/examples/ml/JavaLogisticRegressionSummaryExample.java %}
228108
</div>
229109

230110
<!--- TODO: Add python model summaries once implemented -->
@@ -244,98 +124,16 @@ regression model and extracting model summary statistics.
244124
<div class="codetabs">
245125

246126
<div data-lang="scala" markdown="1">
247-
{% highlight scala %}
248-
import org.apache.spark.ml.regression.LinearRegression
249-
250-
// Load training data
251-
val training = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
252-
253-
val lr = new LinearRegression()
254-
.setMaxIter(10)
255-
.setRegParam(0.3)
256-
.setElasticNetParam(0.8)
257-
258-
// Fit the model
259-
val lrModel = lr.fit(training)
260-
261-
// Print the coefficients and intercept for linear regression
262-
println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")
263-
264-
// Summarize the model over the training set and print out some metrics
265-
val trainingSummary = lrModel.summary
266-
println(s"numIterations: ${trainingSummary.totalIterations}")
267-
println(s"objectiveHistory: ${trainingSummary.objectiveHistory.toList}")
268-
trainingSummary.residuals.show()
269-
println(s"RMSE: ${trainingSummary.rootMeanSquaredError}")
270-
println(s"r2: ${trainingSummary.r2}")
271-
{% endhighlight %}
127+
{% include_example scala/org/apache/spark/examples/ml/LinearRegressionWithElasticNetExample.scala %}
272128
</div>
273129

274130
<div data-lang="java" markdown="1">
275-
{% highlight java %}
276-
import org.apache.spark.ml.regression.LinearRegression;
277-
import org.apache.spark.ml.regression.LinearRegressionModel;
278-
import org.apache.spark.ml.regression.LinearRegressionTrainingSummary;
279-
import org.apache.spark.mllib.linalg.Vectors;
280-
import org.apache.spark.SparkConf;
281-
import org.apache.spark.SparkContext;
282-
import org.apache.spark.sql.DataFrame;
283-
import org.apache.spark.sql.SQLContext;
284-
285-
public class LinearRegressionWithElasticNetExample {
286-
public static void main(String[] args) {
287-
SparkConf conf = new SparkConf()
288-
.setAppName("Linear Regression with Elastic Net Example");
289-
290-
SparkContext sc = new SparkContext(conf);
291-
SQLContext sql = new SQLContext(sc);
292-
String path = "data/mllib/sample_libsvm_data.txt";
293-
294-
// Load training data
295-
DataFrame training = sqlContext.read().format("libsvm").load(path);
296-
297-
LinearRegression lr = new LinearRegression()
298-
.setMaxIter(10)
299-
.setRegParam(0.3)
300-
.setElasticNetParam(0.8);
301-
302-
// Fit the model
303-
LinearRegressionModel lrModel = lr.fit(training);
304-
305-
// Print the coefficients and intercept for linear regression
306-
System.out.println("Coefficients: " + lrModel.coefficients() + " Intercept: " + lrModel.intercept());
307-
308-
// Summarize the model over the training set and print out some metrics
309-
LinearRegressionTrainingSummary trainingSummary = lrModel.summary();
310-
System.out.println("numIterations: " + trainingSummary.totalIterations());
311-
System.out.println("objectiveHistory: " + Vectors.dense(trainingSummary.objectiveHistory()));
312-
trainingSummary.residuals().show();
313-
System.out.println("RMSE: " + trainingSummary.rootMeanSquaredError());
314-
System.out.println("r2: " + trainingSummary.r2());
315-
}
316-
}
317-
{% endhighlight %}
131+
{% include_example java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java %}
318132
</div>
319133

320134
<div data-lang="python" markdown="1">
321135
<!--- TODO: Add python model summaries once implemented -->
322-
{% highlight python %}
323-
from pyspark.ml.regression import LinearRegression
324-
325-
# Load training data
326-
training = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
327-
328-
lr = LinearRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)
329-
330-
# Fit the model
331-
lrModel = lr.fit(training)
332-
333-
# Print the coefficients and intercept for linear regression
334-
print("Coefficients: " + str(lrModel.coefficients))
335-
print("Intercept: " + str(lrModel.intercept))
336-
337-
# Linear regression model summary is not yet supported in Python.
338-
{% endhighlight %}
136+
{% include_example python/ml/linear_regression_with_elastic_net.py %}
339137
</div>
340138

341139
</div>
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.examples.ml;
19+
20+
import org.apache.spark.SparkConf;
21+
import org.apache.spark.api.java.JavaSparkContext;
22+
// $example on$
23+
import org.apache.spark.ml.regression.LinearRegression;
24+
import org.apache.spark.ml.regression.LinearRegressionModel;
25+
import org.apache.spark.ml.regression.LinearRegressionTrainingSummary;
26+
import org.apache.spark.mllib.linalg.Vectors;
27+
import org.apache.spark.sql.DataFrame;
28+
import org.apache.spark.sql.SQLContext;
29+
// $example off$
30+
31+
public class JavaLinearRegressionWithElasticNetExample {
32+
public static void main(String[] args) {
33+
SparkConf conf = new SparkConf().setAppName("JavaLinearRegressionWithElasticNetExample");
34+
JavaSparkContext jsc = new JavaSparkContext(conf);
35+
SQLContext sqlContext = new SQLContext(jsc);
36+
37+
// $example on$
38+
// Load training data
39+
DataFrame training = sqlContext.read().format("libsvm")
40+
.load("data/mllib/sample_libsvm_data.txt");
41+
42+
LinearRegression lr = new LinearRegression()
43+
.setMaxIter(10)
44+
.setRegParam(0.3)
45+
.setElasticNetParam(0.8);
46+
47+
// Fit the model
48+
LinearRegressionModel lrModel = lr.fit(training);
49+
50+
// Print the coefficients and intercept for linear regression
51+
System.out.println("Coefficients: "
52+
+ lrModel.coefficients() + " Intercept: " + lrModel.intercept());
53+
54+
// Summarize the model over the training set and print out some metrics
55+
LinearRegressionTrainingSummary trainingSummary = lrModel.summary();
56+
System.out.println("numIterations: " + trainingSummary.totalIterations());
57+
System.out.println("objectiveHistory: " + Vectors.dense(trainingSummary.objectiveHistory()));
58+
trainingSummary.residuals().show();
59+
System.out.println("RMSE: " + trainingSummary.rootMeanSquaredError());
60+
System.out.println("r2: " + trainingSummary.r2());
61+
// $example off$
62+
63+
jsc.stop();
64+
}
65+
}

0 commit comments

Comments
 (0)