@@ -57,77 +57,15 @@ $\alpha$ and `regParam` corresponds to $\lambda$.
5757<div class =" codetabs " >
5858
5959<div data-lang =" scala " markdown =" 1 " >
60- {% highlight scala %}
61- import org.apache.spark.ml.classification.LogisticRegression
62-
63- // Load training data
64- val training = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
65-
66- val lr = new LogisticRegression()
67- .setMaxIter(10)
68- .setRegParam(0.3)
69- .setElasticNetParam(0.8)
70-
71- // Fit the model
72- val lrModel = lr.fit(training)
73-
74- // Print the coefficients and intercept for logistic regression
75- println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")
76- {% endhighlight %}
60+ {% include_example scala/org/apache/spark/examples/ml/LogisticRegressionWithElasticNetExample.scala %}
7761</div >
7862
7963<div data-lang =" java " markdown =" 1 " >
80- {% highlight java %}
81- import org.apache.spark.ml.classification.LogisticRegression;
82- import org.apache.spark.ml.classification.LogisticRegressionModel;
83- import org.apache.spark.SparkConf;
84- import org.apache.spark.SparkContext;
85- import org.apache.spark.sql.DataFrame;
86- import org.apache.spark.sql.SQLContext;
87-
88- public class LogisticRegressionWithElasticNetExample {
89- public static void main(String[ ] args) {
90- SparkConf conf = new SparkConf()
91- .setAppName("Logistic Regression with Elastic Net Example");
92-
93- SparkContext sc = new SparkContext(conf);
94- SQLContext sql = new SQLContext(sc);
95- String path = "data/mllib/sample_libsvm_data.txt";
96-
97- // Load training data
98- DataFrame training = sqlContext.read().format("libsvm").load(path);
99-
100- LogisticRegression lr = new LogisticRegression()
101- .setMaxIter(10)
102- .setRegParam(0.3)
103- .setElasticNetParam(0.8);
104-
105- // Fit the model
106- LogisticRegressionModel lrModel = lr.fit(training);
107-
108- // Print the coefficients and intercept for logistic regression
109- System.out.println("Coefficients: " + lrModel.coefficients() + " Intercept: " + lrModel.intercept());
110- }
111- }
112- {% endhighlight %}
64+ {% include_example java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java %}
11365</div >
11466
11567<div data-lang =" python " markdown =" 1 " >
116- {% highlight python %}
117- from pyspark.ml.classification import LogisticRegression
118-
119- # Load training data
120- training = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
121-
122- lr = LogisticRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)
123-
124- # Fit the model
125- lrModel = lr.fit(training)
126-
127- # Print the coefficients and intercept for logistic regression
128- print("Coefficients: " + str(lrModel.coefficients))
129- print("Intercept: " + str(lrModel.intercept))
130- {% endhighlight %}
68+ {% include_example python/ml/logistic_regression_with_elastic_net.py %}
13169</div >
13270
13371</div >
@@ -152,33 +90,7 @@ This will likely change when multiclass classification is supported.
15290
15391Continuing the earlier example:
15492
155- {% highlight scala %}
156- import org.apache.spark.ml.classification.BinaryLogisticRegressionSummary
157-
158- // Extract the summary from the returned LogisticRegressionModel instance trained in the earlier example
159- val trainingSummary = lrModel.summary
160-
161- // Obtain the objective per iteration.
162- val objectiveHistory = trainingSummary.objectiveHistory
163- objectiveHistory.foreach(loss => println(loss))
164-
165- // Obtain the metrics useful to judge performance on test data.
166- // We cast the summary to a BinaryLogisticRegressionSummary since the problem is a
167- // binary classification problem.
168- val binarySummary = trainingSummary.asInstanceOf[ BinaryLogisticRegressionSummary]
169-
170- // Obtain the receiver-operating characteristic as a dataframe and areaUnderROC.
171- val roc = binarySummary.roc
172- roc.show()
173- println(binarySummary.areaUnderROC)
174-
175- // Set the model threshold to maximize F-Measure
176- val fMeasure = binarySummary.fMeasureByThreshold
177- val maxFMeasure = fMeasure.select(max("F-Measure")).head().getDouble(0)
178- val bestThreshold = fMeasure.where($"F-Measure" === maxFMeasure).
179- select("threshold").head().getDouble(0)
180- lrModel.setThreshold(bestThreshold)
181- {% endhighlight %}
93+ {% include_example scala/org/apache/spark/examples/ml/LogisticRegressionSummaryExample.scala %}
18294</div >
18395
18496<div data-lang =" java " markdown =" 1 " >
@@ -192,39 +104,7 @@ This will likely change when multiclass classification is supported.
192104
193105Continuing the earlier example:
194106
195- {% highlight java %}
196- import org.apache.spark.ml.classification.LogisticRegressionTrainingSummary;
197- import org.apache.spark.ml.classification.BinaryLogisticRegressionSummary;
198- import org.apache.spark.sql.functions;
199-
200- // Extract the summary from the returned LogisticRegressionModel instance trained in the earlier example
201- LogisticRegressionTrainingSummary trainingSummary = lrModel.summary();
202-
203- // Obtain the loss per iteration.
204- double[ ] objectiveHistory = trainingSummary.objectiveHistory();
205- for (double lossPerIteration : objectiveHistory) {
206- System.out.println(lossPerIteration);
207- }
208-
209- // Obtain the metrics useful to judge performance on test data.
210- // We cast the summary to a BinaryLogisticRegressionSummary since the problem is a
211- // binary classification problem.
212- BinaryLogisticRegressionSummary binarySummary = (BinaryLogisticRegressionSummary) trainingSummary;
213-
214- // Obtain the receiver-operating characteristic as a dataframe and areaUnderROC.
215- DataFrame roc = binarySummary.roc();
216- roc.show();
217- roc.select("FPR").show();
218- System.out.println(binarySummary.areaUnderROC());
219-
220- // Get the threshold corresponding to the maximum F-Measure and rerun LogisticRegression with
221- // this selected threshold.
222- DataFrame fMeasure = binarySummary.fMeasureByThreshold();
223- double maxFMeasure = fMeasure.select(functions.max("F-Measure")).head().getDouble(0);
224- double bestThreshold = fMeasure.where(fMeasure.col("F-Measure").equalTo(maxFMeasure)).
225- select("threshold").head().getDouble(0);
226- lrModel.setThreshold(bestThreshold);
227- {% endhighlight %}
107+ {% include_example java/org/apache/spark/examples/ml/JavaLogisticRegressionSummaryExample.java %}
228108</div >
229109
230110<!-- - TODO: Add python model summaries once implemented -->
@@ -244,98 +124,16 @@ regression model and extracting model summary statistics.
244124<div class =" codetabs " >
245125
246126<div data-lang =" scala " markdown =" 1 " >
247- {% highlight scala %}
248- import org.apache.spark.ml.regression.LinearRegression
249-
250- // Load training data
251- val training = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
252-
253- val lr = new LinearRegression()
254- .setMaxIter(10)
255- .setRegParam(0.3)
256- .setElasticNetParam(0.8)
257-
258- // Fit the model
259- val lrModel = lr.fit(training)
260-
261- // Print the coefficients and intercept for linear regression
262- println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")
263-
264- // Summarize the model over the training set and print out some metrics
265- val trainingSummary = lrModel.summary
266- println(s"numIterations: ${trainingSummary.totalIterations}")
267- println(s"objectiveHistory: ${trainingSummary.objectiveHistory.toList}")
268- trainingSummary.residuals.show()
269- println(s"RMSE: ${trainingSummary.rootMeanSquaredError}")
270- println(s"r2: ${trainingSummary.r2}")
271- {% endhighlight %}
127+ {% include_example scala/org/apache/spark/examples/ml/LinearRegressionWithElasticNetExample.scala %}
272128</div >
273129
274130<div data-lang =" java " markdown =" 1 " >
275- {% highlight java %}
276- import org.apache.spark.ml.regression.LinearRegression;
277- import org.apache.spark.ml.regression.LinearRegressionModel;
278- import org.apache.spark.ml.regression.LinearRegressionTrainingSummary;
279- import org.apache.spark.mllib.linalg.Vectors;
280- import org.apache.spark.SparkConf;
281- import org.apache.spark.SparkContext;
282- import org.apache.spark.sql.DataFrame;
283- import org.apache.spark.sql.SQLContext;
284-
285- public class LinearRegressionWithElasticNetExample {
286- public static void main(String[ ] args) {
287- SparkConf conf = new SparkConf()
288- .setAppName("Linear Regression with Elastic Net Example");
289-
290- SparkContext sc = new SparkContext(conf);
291- SQLContext sql = new SQLContext(sc);
292- String path = "data/mllib/sample_libsvm_data.txt";
293-
294- // Load training data
295- DataFrame training = sqlContext.read().format("libsvm").load(path);
296-
297- LinearRegression lr = new LinearRegression()
298- .setMaxIter(10)
299- .setRegParam(0.3)
300- .setElasticNetParam(0.8);
301-
302- // Fit the model
303- LinearRegressionModel lrModel = lr.fit(training);
304-
305- // Print the coefficients and intercept for linear regression
306- System.out.println("Coefficients: " + lrModel.coefficients() + " Intercept: " + lrModel.intercept());
307-
308- // Summarize the model over the training set and print out some metrics
309- LinearRegressionTrainingSummary trainingSummary = lrModel.summary();
310- System.out.println("numIterations: " + trainingSummary.totalIterations());
311- System.out.println("objectiveHistory: " + Vectors.dense(trainingSummary.objectiveHistory()));
312- trainingSummary.residuals().show();
313- System.out.println("RMSE: " + trainingSummary.rootMeanSquaredError());
314- System.out.println("r2: " + trainingSummary.r2());
315- }
316- }
317- {% endhighlight %}
131+ {% include_example java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java %}
318132</div >
319133
320134<div data-lang =" python " markdown =" 1 " >
321135<!-- - TODO: Add python model summaries once implemented -->
322- {% highlight python %}
323- from pyspark.ml.regression import LinearRegression
324-
325- # Load training data
326- training = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
327-
328- lr = LinearRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)
329-
330- # Fit the model
331- lrModel = lr.fit(training)
332-
333- # Print the coefficients and intercept for linear regression
334- print("Coefficients: " + str(lrModel.coefficients))
335- print("Intercept: " + str(lrModel.intercept))
336-
337- # Linear regression model summary is not yet supported in Python.
338- {% endhighlight %}
136+ {% include_example python/ml/linear_regression_with_elastic_net.py %}
339137</div >
340138
341139</div >
0 commit comments