From 28411701ed24b3f15e4787af2203c2806559cd92 Mon Sep 17 00:00:00 2001 From: "Joseph K. Bradley" Date: Fri, 27 Feb 2015 10:39:14 -0800 Subject: [PATCH 1/2] Fixed save,load calls in ML guide examples --- docs/mllib-collaborative-filtering.md | 8 +++---- docs/mllib-decision-tree.md | 16 +++++++------- docs/mllib-ensembles.md | 32 +++++++++++++-------------- docs/mllib-linear-methods.md | 16 +++++++------- docs/mllib-naive-bayes.md | 8 +++---- 5 files changed, 40 insertions(+), 40 deletions(-) diff --git a/docs/mllib-collaborative-filtering.md b/docs/mllib-collaborative-filtering.md index 935cd8dad3b25..e702f19a19518 100644 --- a/docs/mllib-collaborative-filtering.md +++ b/docs/mllib-collaborative-filtering.md @@ -97,8 +97,8 @@ val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) => }.mean() println("Mean Squared Error = " + MSE) -model.save("myModelPath") -val sameModel = MatrixFactorizationModel.load("myModelPath") +model.save(sc, "myModelPath") +val sameModel = MatrixFactorizationModel.load(sc, "myModelPath") {% endhighlight %} If the rating matrix is derived from another source of information (e.g., it is inferred from @@ -186,8 +186,8 @@ public class CollaborativeFiltering { ).rdd()).mean(); System.out.println("Mean Squared Error = " + MSE); - model.save("myModelPath"); - MatrixFactorizationModel sameModel = MatrixFactorizationModel.load("myModelPath"); + model.save(sc.sc(), "myModelPath"); + MatrixFactorizationModel sameModel = MatrixFactorizationModel.load(sc.sc(), "myModelPath"); } } {% endhighlight %} diff --git a/docs/mllib-decision-tree.md b/docs/mllib-decision-tree.md index 4695d1cde4901..b1f33465343b7 100644 --- a/docs/mllib-decision-tree.md +++ b/docs/mllib-decision-tree.md @@ -223,8 +223,8 @@ val testErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / testData. println("Test Error = " + testErr) println("Learned classification tree model:\n" + model.toDebugString) -model.save("myModelPath") -val sameModel = DecisionTreeModel.load("myModelPath") +model.save(sc, "myModelPath") +val sameModel = DecisionTreeModel.load(sc, "myModelPath") {% endhighlight %} @@ -284,8 +284,8 @@ Double testErr = System.out.println("Test Error: " + testErr); System.out.println("Learned classification tree model:\n" + model.toDebugString()); -model.save("myModelPath"); -DecisionTreeModel sameModel = DecisionTreeModel.load("myModelPath"); +model.save(sc.sc(), "myModelPath"); +DecisionTreeModel sameModel = DecisionTreeModel.load(sc.sc(), "myModelPath"); {% endhighlight %} @@ -362,8 +362,8 @@ val testMSE = labelsAndPredictions.map{ case(v, p) => math.pow((v - p), 2)}.mean println("Test Mean Squared Error = " + testMSE) println("Learned regression tree model:\n" + model.toDebugString) -model.save("myModelPath") -val sameModel = DecisionTreeModel.load("myModelPath") +model.save(sc, "myModelPath") +val sameModel = DecisionTreeModel.load(sc, "myModelPath") {% endhighlight %} @@ -429,8 +429,8 @@ Double testMSE = System.out.println("Test Mean Squared Error: " + testMSE); System.out.println("Learned regression tree model:\n" + model.toDebugString()); -model.save("myModelPath"); -DecisionTreeModel sameModel = DecisionTreeModel.load("myModelPath"); +model.save(sc.sc(), "myModelPath"); +DecisionTreeModel sameModel = DecisionTreeModel.load(sc.sc(), "myModelPath"); {% endhighlight %} diff --git a/docs/mllib-ensembles.md b/docs/mllib-ensembles.md index ddae84165f8a9..579b2426f5a42 100644 --- a/docs/mllib-ensembles.md +++ b/docs/mllib-ensembles.md @@ -129,8 +129,8 @@ val testErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / testData. println("Test Error = " + testErr) println("Learned classification forest model:\n" + model.toDebugString) -model.save("myModelPath") -val sameModel = RandomForestModel.load("myModelPath") +model.save(sc, "myModelPath") +val sameModel = RandomForestModel.load(sc, "myModelPath") {% endhighlight %} @@ -193,8 +193,8 @@ Double testErr = System.out.println("Test Error: " + testErr); System.out.println("Learned classification forest model:\n" + model.toDebugString()); -model.save("myModelPath"); -RandomForestModel sameModel = RandomForestModel.load("myModelPath"); +model.save(sc.sc(), "myModelPath"); +RandomForestModel sameModel = RandomForestModel.load(sc.sc(), "myModelPath"); {% endhighlight %} @@ -276,8 +276,8 @@ val testMSE = labelsAndPredictions.map{ case(v, p) => math.pow((v - p), 2)}.mean println("Test Mean Squared Error = " + testMSE) println("Learned regression forest model:\n" + model.toDebugString) -model.save("myModelPath") -val sameModel = RandomForestModel.load("myModelPath") +model.save(sc, "myModelPath") +val sameModel = RandomForestModel.load(sc, "myModelPath") {% endhighlight %} @@ -343,8 +343,8 @@ Double testMSE = System.out.println("Test Mean Squared Error: " + testMSE); System.out.println("Learned regression forest model:\n" + model.toDebugString()); -model.save("myModelPath"); -RandomForestModel sameModel = RandomForestModel.load("myModelPath"); +model.save(sc.sc(), "myModelPath"); +RandomForestModel sameModel = RandomForestModel.load(sc.sc(), "myModelPath"); {% endhighlight %} @@ -504,8 +504,8 @@ val testErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / testData. println("Test Error = " + testErr) println("Learned classification GBT model:\n" + model.toDebugString) -model.save("myModelPath") -val sameModel = GradientBoostedTreesModel.load("myModelPath") +model.save(sc, "myModelPath") +val sameModel = GradientBoostedTreesModel.load(sc, "myModelPath") {% endhighlight %} @@ -568,8 +568,8 @@ Double testErr = System.out.println("Test Error: " + testErr); System.out.println("Learned classification GBT model:\n" + model.toDebugString()); -model.save("myModelPath"); -GradientBoostedTreesModel sameModel = GradientBoostedTreesModel.load("myModelPath"); +model.save(sc.sc(), "myModelPath"); +GradientBoostedTreesModel sameModel = GradientBoostedTreesModel.load(sc.sc(), "myModelPath"); {% endhighlight %} @@ -647,8 +647,8 @@ val testMSE = labelsAndPredictions.map{ case(v, p) => math.pow((v - p), 2)}.mean println("Test Mean Squared Error = " + testMSE) println("Learned regression GBT model:\n" + model.toDebugString) -model.save("myModelPath") -val sameModel = GradientBoostedTreesModel.load("myModelPath") +model.save(sc, "myModelPath") +val sameModel = GradientBoostedTreesModel.load(sc, "myModelPath") {% endhighlight %} @@ -717,8 +717,8 @@ Double testMSE = System.out.println("Test Mean Squared Error: " + testMSE); System.out.println("Learned regression GBT model:\n" + model.toDebugString()); -model.save("myModelPath"); -GradientBoostedTreesModel sameModel = GradientBoostedTreesModel.load("myModelPath"); +model.save(sc.sc(), "myModelPath"); +GradientBoostedTreesModel sameModel = GradientBoostedTreesModel.load(sc.sc(), "myModelPath"); {% endhighlight %} diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md index d9fc63b37d116..7c44a47723d29 100644 --- a/docs/mllib-linear-methods.md +++ b/docs/mllib-linear-methods.md @@ -223,8 +223,8 @@ val auROC = metrics.areaUnderROC() println("Area under ROC = " + auROC) -model.save("myModelPath") -val sameModel = SVMModel.load("myModelPath") +model.save(sc, "myModelPath") +val sameModel = SVMModel.load(sc, "myModelPath") {% endhighlight %} The `SVMWithSGD.train()` method by default performs L2 regularization with the @@ -308,8 +308,8 @@ public class SVMClassifier { System.out.println("Area under ROC = " + auROC); - model.save("myModelPath"); - SVMModel sameModel = SVMModel.load("myModelPath"); + model.save(sc.sc(), "myModelPath"); + SVMModel sameModel = SVMModel.load(sc.sc(), "myModelPath"); } } {% endhighlight %} @@ -423,8 +423,8 @@ val valuesAndPreds = parsedData.map { point => val MSE = valuesAndPreds.map{case(v, p) => math.pow((v - p), 2)}.mean() println("training Mean Squared Error = " + MSE) -model.save("myModelPath") -val sameModel = LinearRegressionModel.load("myModelPath") +model.save(sc, "myModelPath") +val sameModel = LinearRegressionModel.load(sc, "myModelPath") {% endhighlight %} [`RidgeRegressionWithSGD`](api/scala/index.html#org.apache.spark.mllib.regression.RidgeRegressionWithSGD) @@ -496,8 +496,8 @@ public class LinearRegression { ).rdd()).mean(); System.out.println("training Mean Squared Error = " + MSE); - model.save("myModelPath"); - LinearRegressionModel sameModel = LinearRegressionModel.load("myModelPath"); + model.save(sc.sc(), "myModelPath"); + LinearRegressionModel sameModel = LinearRegressionModel.load(sc.sc(), "myModelPath"); } } {% endhighlight %} diff --git a/docs/mllib-naive-bayes.md b/docs/mllib-naive-bayes.md index 81173255b590d..93072f7be9a49 100644 --- a/docs/mllib-naive-bayes.md +++ b/docs/mllib-naive-bayes.md @@ -56,8 +56,8 @@ val model = NaiveBayes.train(training, lambda = 1.0) val predictionAndLabel = test.map(p => (model.predict(p.features), p.label)) val accuracy = 1.0 * predictionAndLabel.filter(x => x._1 == x._2).count() / test.count() -model.save("myModelPath") -val sameModel = NaiveBayesModel.load("myModelPath") +model.save(sc, "myModelPath") +val sameModel = NaiveBayesModel.load(sc, "myModelPath") {% endhighlight %} @@ -97,8 +97,8 @@ double accuracy = predictionAndLabel.filter(new Function, } }).count() / (double) test.count(); -model.save("myModelPath"); -NaiveBayesModel sameModel = NaiveBayesModel.load("myModelPath"); +model.save(sc.sc(), "myModelPath"); +NaiveBayesModel sameModel = NaiveBayesModel.load(sc.sc(), "myModelPath"); {% endhighlight %} From 83d369dba2cb1b89144b36ccd3917eda851d2f11 Mon Sep 17 00:00:00 2001 From: "Joseph K. Bradley" Date: Fri, 27 Feb 2015 10:47:05 -0800 Subject: [PATCH 2/2] added comment to save,load parts of ML guide examples --- docs/mllib-collaborative-filtering.md | 2 ++ docs/mllib-decision-tree.md | 4 ++++ docs/mllib-ensembles.md | 8 ++++++++ docs/mllib-linear-methods.md | 4 ++++ docs/mllib-naive-bayes.md | 2 ++ 5 files changed, 20 insertions(+) diff --git a/docs/mllib-collaborative-filtering.md b/docs/mllib-collaborative-filtering.md index e702f19a19518..27aa4d38b7617 100644 --- a/docs/mllib-collaborative-filtering.md +++ b/docs/mllib-collaborative-filtering.md @@ -97,6 +97,7 @@ val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) => }.mean() println("Mean Squared Error = " + MSE) +// Save and load model model.save(sc, "myModelPath") val sameModel = MatrixFactorizationModel.load(sc, "myModelPath") {% endhighlight %} @@ -186,6 +187,7 @@ public class CollaborativeFiltering { ).rdd()).mean(); System.out.println("Mean Squared Error = " + MSE); + // Save and load model model.save(sc.sc(), "myModelPath"); MatrixFactorizationModel sameModel = MatrixFactorizationModel.load(sc.sc(), "myModelPath"); } diff --git a/docs/mllib-decision-tree.md b/docs/mllib-decision-tree.md index b1f33465343b7..8e478ab035582 100644 --- a/docs/mllib-decision-tree.md +++ b/docs/mllib-decision-tree.md @@ -223,6 +223,7 @@ val testErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / testData. println("Test Error = " + testErr) println("Learned classification tree model:\n" + model.toDebugString) +// Save and load model model.save(sc, "myModelPath") val sameModel = DecisionTreeModel.load(sc, "myModelPath") {% endhighlight %} @@ -284,6 +285,7 @@ Double testErr = System.out.println("Test Error: " + testErr); System.out.println("Learned classification tree model:\n" + model.toDebugString()); +// Save and load model model.save(sc.sc(), "myModelPath"); DecisionTreeModel sameModel = DecisionTreeModel.load(sc.sc(), "myModelPath"); {% endhighlight %} @@ -362,6 +364,7 @@ val testMSE = labelsAndPredictions.map{ case(v, p) => math.pow((v - p), 2)}.mean println("Test Mean Squared Error = " + testMSE) println("Learned regression tree model:\n" + model.toDebugString) +// Save and load model model.save(sc, "myModelPath") val sameModel = DecisionTreeModel.load(sc, "myModelPath") {% endhighlight %} @@ -429,6 +432,7 @@ Double testMSE = System.out.println("Test Mean Squared Error: " + testMSE); System.out.println("Learned regression tree model:\n" + model.toDebugString()); +// Save and load model model.save(sc.sc(), "myModelPath"); DecisionTreeModel sameModel = DecisionTreeModel.load(sc.sc(), "myModelPath"); {% endhighlight %} diff --git a/docs/mllib-ensembles.md b/docs/mllib-ensembles.md index 579b2426f5a42..ec1ef38b453d3 100644 --- a/docs/mllib-ensembles.md +++ b/docs/mllib-ensembles.md @@ -129,6 +129,7 @@ val testErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / testData. println("Test Error = " + testErr) println("Learned classification forest model:\n" + model.toDebugString) +// Save and load model model.save(sc, "myModelPath") val sameModel = RandomForestModel.load(sc, "myModelPath") {% endhighlight %} @@ -193,6 +194,7 @@ Double testErr = System.out.println("Test Error: " + testErr); System.out.println("Learned classification forest model:\n" + model.toDebugString()); +// Save and load model model.save(sc.sc(), "myModelPath"); RandomForestModel sameModel = RandomForestModel.load(sc.sc(), "myModelPath"); {% endhighlight %} @@ -276,6 +278,7 @@ val testMSE = labelsAndPredictions.map{ case(v, p) => math.pow((v - p), 2)}.mean println("Test Mean Squared Error = " + testMSE) println("Learned regression forest model:\n" + model.toDebugString) +// Save and load model model.save(sc, "myModelPath") val sameModel = RandomForestModel.load(sc, "myModelPath") {% endhighlight %} @@ -343,6 +346,7 @@ Double testMSE = System.out.println("Test Mean Squared Error: " + testMSE); System.out.println("Learned regression forest model:\n" + model.toDebugString()); +// Save and load model model.save(sc.sc(), "myModelPath"); RandomForestModel sameModel = RandomForestModel.load(sc.sc(), "myModelPath"); {% endhighlight %} @@ -504,6 +508,7 @@ val testErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / testData. println("Test Error = " + testErr) println("Learned classification GBT model:\n" + model.toDebugString) +// Save and load model model.save(sc, "myModelPath") val sameModel = GradientBoostedTreesModel.load(sc, "myModelPath") {% endhighlight %} @@ -568,6 +573,7 @@ Double testErr = System.out.println("Test Error: " + testErr); System.out.println("Learned classification GBT model:\n" + model.toDebugString()); +// Save and load model model.save(sc.sc(), "myModelPath"); GradientBoostedTreesModel sameModel = GradientBoostedTreesModel.load(sc.sc(), "myModelPath"); {% endhighlight %} @@ -647,6 +653,7 @@ val testMSE = labelsAndPredictions.map{ case(v, p) => math.pow((v - p), 2)}.mean println("Test Mean Squared Error = " + testMSE) println("Learned regression GBT model:\n" + model.toDebugString) +// Save and load model model.save(sc, "myModelPath") val sameModel = GradientBoostedTreesModel.load(sc, "myModelPath") {% endhighlight %} @@ -717,6 +724,7 @@ Double testMSE = System.out.println("Test Mean Squared Error: " + testMSE); System.out.println("Learned regression GBT model:\n" + model.toDebugString()); +// Save and load model model.save(sc.sc(), "myModelPath"); GradientBoostedTreesModel sameModel = GradientBoostedTreesModel.load(sc.sc(), "myModelPath"); {% endhighlight %} diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md index 7c44a47723d29..ffbd7ef1bff51 100644 --- a/docs/mllib-linear-methods.md +++ b/docs/mllib-linear-methods.md @@ -223,6 +223,7 @@ val auROC = metrics.areaUnderROC() println("Area under ROC = " + auROC) +// Save and load model model.save(sc, "myModelPath") val sameModel = SVMModel.load(sc, "myModelPath") {% endhighlight %} @@ -308,6 +309,7 @@ public class SVMClassifier { System.out.println("Area under ROC = " + auROC); + // Save and load model model.save(sc.sc(), "myModelPath"); SVMModel sameModel = SVMModel.load(sc.sc(), "myModelPath"); } @@ -423,6 +425,7 @@ val valuesAndPreds = parsedData.map { point => val MSE = valuesAndPreds.map{case(v, p) => math.pow((v - p), 2)}.mean() println("training Mean Squared Error = " + MSE) +// Save and load model model.save(sc, "myModelPath") val sameModel = LinearRegressionModel.load(sc, "myModelPath") {% endhighlight %} @@ -496,6 +499,7 @@ public class LinearRegression { ).rdd()).mean(); System.out.println("training Mean Squared Error = " + MSE); + // Save and load model model.save(sc.sc(), "myModelPath"); LinearRegressionModel sameModel = LinearRegressionModel.load(sc.sc(), "myModelPath"); } diff --git a/docs/mllib-naive-bayes.md b/docs/mllib-naive-bayes.md index 93072f7be9a49..5224a0b49a991 100644 --- a/docs/mllib-naive-bayes.md +++ b/docs/mllib-naive-bayes.md @@ -56,6 +56,7 @@ val model = NaiveBayes.train(training, lambda = 1.0) val predictionAndLabel = test.map(p => (model.predict(p.features), p.label)) val accuracy = 1.0 * predictionAndLabel.filter(x => x._1 == x._2).count() / test.count() +// Save and load model model.save(sc, "myModelPath") val sameModel = NaiveBayesModel.load(sc, "myModelPath") {% endhighlight %} @@ -97,6 +98,7 @@ double accuracy = predictionAndLabel.filter(new Function, } }).count() / (double) test.count(); +// Save and load model model.save(sc.sc(), "myModelPath"); NaiveBayesModel sameModel = NaiveBayesModel.load(sc.sc(), "myModelPath"); {% endhighlight %}