@@ -163,11 +163,8 @@ object, and make predictions with the resulting model to compute the training
163163error.
164164
165165{% highlight scala %}
166- import org.apache.spark.SparkContext
167166import org.apache.spark.mllib.classification.{SVMModel, SVMWithSGD}
168167import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
169- import org.apache.spark.mllib.regression.LabeledPoint
170- import org.apache.spark.mllib.linalg.Vectors
171168import org.apache.spark.mllib.util.MLUtils
172169
173170// Load training data in LIBSVM format.
@@ -231,15 +228,13 @@ calling `.rdd()` on your `JavaRDD` object. A self-contained application example
231228that is equivalent to the provided example in Scala is given bellow:
232229
233230{% highlight java %}
234- import java.util.Random;
235-
236231import scala.Tuple2;
237232
238233import org.apache.spark.api.java.* ;
239234import org.apache.spark.api.java.function.Function;
240235import org.apache.spark.mllib.classification.* ;
241236import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics;
242- import org.apache.spark.mllib.linalg.Vector;
237+
243238import org.apache.spark.mllib.regression.LabeledPoint;
244239import org.apache.spark.mllib.util.MLUtils;
245240import org.apache.spark.SparkConf;
@@ -282,8 +277,8 @@ public class SVMClassifier {
282277 System.out.println("Area under ROC = " + auROC);
283278
284279 // Save and load model
285- model.save(sc.sc() , "myModelPath");
286- SVMModel sameModel = SVMModel.load(sc.sc() , "myModelPath");
280+ model.save(sc, "myModelPath");
281+ SVMModel sameModel = SVMModel.load(sc, "myModelPath");
287282 }
288283}
289284{% endhighlight %}
@@ -315,15 +310,12 @@ a dependency.
315310</div >
316311
317312<div data-lang =" python " markdown =" 1 " >
318- The following example shows how to load a sample dataset, build Logistic Regression model,
313+ The following example shows how to load a sample dataset, build SVM model,
319314and make predictions with the resulting model to compute the training error.
320315
321- Note that the Python API does not yet support model save/load but will in the future.
322-
323316{% highlight python %}
324- from pyspark.mllib.classification import LogisticRegressionWithSGD
317+ from pyspark.mllib.classification import SVMWithSGD, SVMModel
325318from pyspark.mllib.regression import LabeledPoint
326- from numpy import array
327319
328320# Load and parse the data
329321def parsePoint(line):
@@ -334,12 +326,16 @@ data = sc.textFile("data/mllib/sample_svm_data.txt")
334326parsedData = data.map(parsePoint)
335327
336328# Build the model
337- model = LogisticRegressionWithSGD .train(parsedData)
329+ model = SVMWithSGD .train(parsedData, iterations=100 )
338330
339331# Evaluating the model on training data
340332labelsAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
341333trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count() / float(parsedData.count())
342334print("Training Error = " + str(trainErr))
335+
336+ # Save and load model
337+ model.save(sc, "myModelPath")
338+ sameModel = SVMModel.load(sc, "myModelPath")
343339{% endhighlight %}
344340</div >
345341</div >
0 commit comments