From 9d7a0e16ae99ef2d4288a48f0fa13b60719390e1 Mon Sep 17 00:00:00 2001
From: GayathriMurali <gayathri.m@intel.com>
Date: Tue, 31 May 2016 15:48:48 -0700
Subject: [PATCH 1/4] Removing unnecessary commits

---
 docs/sparkr.md | 73 +++++++++++---------------------------------------
 1 file changed, 15 insertions(+), 58 deletions(-)
diff --git a/docs/sparkr.md b/docs/sparkr.md
index 59e486d1929f0..29a2df886686d 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -285,71 +285,28 @@ head(teenagers)
 
 # Machine Learning
 
-SparkR allows the fitting of generalized linear models over DataFrames using the [glm()](api/R/glm.html) function. Under the hood, SparkR uses MLlib to train a model of the specified family. Currently the gaussian and binomial families are supported. We support a subset of the available R formula operators for model fitting, including '~', '.', ':', '+', and '-'.
+SparkR supports the following Machine Learning algorithms.
 
-The [summary()](api/R/summary.html) function gives the summary of a model produced by [glm()](api/R/glm.html).
+* Generalized Linear Regression Model [spark.glm()](api/R/glm.html)
+* Naive Bayes [spark.naiveBayes()](api/R/naiveBayes.html)
+* KMeans [spark.kmeans()](api/R/kmeans.html)
+* AFT Survival Regression [spark.survreg()](api/R/survreg.html)
 
-* For gaussian GLM model, it returns a list with 'devianceResiduals' and 'coefficients' components. The 'devianceResiduals' gives the min/max deviance residuals of the estimation; the 'coefficients' gives the estimated coefficients and their estimated standard errors, t values and p-values. (It only available when model fitted by normal solver.)
-* For binomial GLM model, it returns a list with 'coefficients' component which gives the estimated coefficients.
+Generalized Linear Regression can be used to train a model from a specified family. Currently the Gaussian, Binomial, Poisson and Gamma families are supported. We support a subset of the available R formula operators for model fitting, including '~', '.', ':', '+', and '-'.
 
-The examples below show the use of building gaussian GLM model and binomial GLM model using SparkR.
+The [summary()](api/R/summary.html) function gives the summary of a model produced by different algorithms listed above.
+This summary is same as the result of summary() function in R.
 
-## Gaussian GLM model
+## Model persistence
 
-<div data-lang="r"  markdown="1">
-{% highlight r %}
-# Create the DataFrame
-df <- createDataFrame(sqlContext, iris)
-
-# Fit a gaussian GLM model over the dataset.
-model <- glm(Sepal_Length ~ Sepal_Width + Species, data = df, family = "gaussian")
-
-# Model summary are returned in a similar format to R's native glm().
-summary(model)
-##$devianceResiduals
-## Min       Max     
-## -1.307112 1.412532
-##
-##$coefficients
-##                   Estimate  Std. Error t value  Pr(>|t|)    
-##(Intercept)        2.251393  0.3697543  6.08889  9.568102e-09
-##Sepal_Width        0.8035609 0.106339   7.556598 4.187317e-12
-##Species_versicolor 1.458743  0.1121079  13.01195 0           
-##Species_virginica  1.946817  0.100015   19.46525 0           
-
-# Make predictions based on the model.
-predictions <- predict(model, newData = df)
-head(select(predictions, "Sepal_Length", "prediction"))
-##  Sepal_Length prediction
-##1          5.1   5.063856
-##2          4.9   4.662076
-##3          4.7   4.822788
-##4          4.6   4.742432
-##5          5.0   5.144212
-##6          5.4   5.385281
-{% endhighlight %}
-</div>
+* write.ml allows users to save a fitted model in a given input path
+* read.ml allows users to read/load the model which was saved using write.ml in a given path
 
-## Binomial GLM model
+Model persistence is supported for all Machine Learning algorithms for all families.
 
-<div data-lang="r"  markdown="1">
-{% highlight r %}
-# Create the DataFrame
-df <- createDataFrame(sqlContext, iris)
-training <- filter(df, df$Species != "setosa")
-
-# Fit a binomial GLM model over the dataset.
-model <- glm(Species ~ Sepal_Length + Sepal_Width, data = training, family = "binomial")
-
-# Model coefficients are returned in a similar format to R's native glm().
-summary(model)
-##$coefficients
-##               Estimate
-##(Intercept)  -13.046005
-##Sepal_Length   1.902373
-##Sepal_Width    0.404655
-{% endhighlight %}
-</div>
+The examples below show the use of building Gaussian GLM, NaiveBayes, kMeans and AFTSurvivalReg using SparkR
+
+{% include_example r/ml.r %}
 
 # R Function Name Conflicts
 

From 13530efdf23951b0725a5284a433992e8a3b10a4 Mon Sep 17 00:00:00 2001
From: GayathriMurali <gayathri.m@intel.com>
Date: Wed, 1 Jun 2016 15:42:14 -0700
Subject: [PATCH 2/4] Fixing example R file

---
 docs/sparkr.md           | 4 ++--
 examples/src/main/r/ml.R | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/sparkr.md b/docs/sparkr.md
index 29a2df886686d..966c41f41ca23 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -304,9 +304,9 @@ This summary is same as the result of summary() function in R.
 
 Model persistence is supported for all Machine Learning algorithms for all families.
 
-The examples below show the use of building Gaussian GLM, NaiveBayes, kMeans and AFTSurvivalReg using SparkR
+The examples below show the use of building Gaussian GLM, NaiveBayes, kMeans and AFTSurvivalReg models using SparkR
 
-{% include_example r/ml.r %}
+{% include_example r/ml.R %}
 
 # R Function Name Conflicts
 
diff --git a/examples/src/main/r/ml.R b/examples/src/main/r/ml.R
index fd35936635334..7cb747c1fb20b 100644
--- a/examples/src/main/r/ml.R
+++ b/examples/src/main/r/ml.R
@@ -25,8 +25,9 @@ library(SparkR)
 sc <- sparkR.init(appName="SparkR-ML-example")
 sqlContext <- sparkRSQL.init(sc)
 
-############################ spark.glm and glm ##############################################
 
+############################ spark.glm and glm ##############################################
+# $example on$
 irisDF <- suppressWarnings(createDataFrame(sqlContext, iris))
 # Fit a generalized linear model of family "gaussian" with spark.glm
 gaussianDF <- irisDF
@@ -57,7 +58,6 @@ binomialPredictions <- predict(binomialGLM, binomialTestDF)
 showDF(binomialPredictions)
 
 ############################ spark.survreg ##############################################
-
 # Use the ovarian dataset available in R survival package
 library(survival)
 
@@ -121,7 +121,7 @@ gaussianGLM <- spark.glm(gaussianDF, Sepal_Length ~ Sepal_Width + Species, famil
 modelPath <- tempfile(pattern = "ml", fileext = ".tmp")
 write.ml(gaussianGLM, modelPath)
 gaussianGLM2 <- read.ml(modelPath)
-
+# $example off$
 # Check model summary
 summary(gaussianGLM2)
 

From f4516c73cdb7da5578ee611d0ddac42ae6873c4f Mon Sep 17 00:00:00 2001
From: GayathriMurali <gayathri.m@intel.com>
Date: Sun, 5 Jun 2016 23:16:08 -0700
Subject: [PATCH 3/4] Review comments

---
 docs/sparkr.md           | 18 +++++++++---------
 examples/src/main/r/ml.R |  4 ++--
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/docs/sparkr.md b/docs/sparkr.md
index 966c41f41ca23..0e4b9c416528f 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -287,24 +287,24 @@ head(teenagers)
 
 SparkR supports the following Machine Learning algorithms.
 
-* Generalized Linear Regression Model [spark.glm()](api/R/glm.html)
-* Naive Bayes [spark.naiveBayes()](api/R/naiveBayes.html)
-* KMeans [spark.kmeans()](api/R/kmeans.html)
-* AFT Survival Regression [spark.survreg()](api/R/survreg.html)
+* Generalized Linear Regression Model [spark.glm()](api/R/spark.glm.html)
+* Naive Bayes [spark.naiveBayes()](api/R/spark.naiveBayes.html)
+* KMeans [spark.kmeans()](api/R/spark.kmeans.html)
+* AFT Survival Regression [spark.survreg()](api/R/spark.survreg.html)
 
-Generalized Linear Regression can be used to train a model from a specified family. Currently the Gaussian, Binomial, Poisson and Gamma families are supported. We support a subset of the available R formula operators for model fitting, including '~', '.', ':', '+', and '-'.
+[Generalized Linear Regression](api/R/spark.glm.html) can be used to train a model from a specified family. Currently the Gaussian, Binomial, Poisson and Gamma families are supported. We support a subset of the available R formula operators for model fitting, including '~', '.', ':', '+', and '-'.
 
 The [summary()](api/R/summary.html) function gives the summary of a model produced by different algorithms listed above.
-This summary is same as the result of summary() function in R.
+It produces the similar result compared with R summary function.
 
 ## Model persistence
 
-* write.ml allows users to save a fitted model in a given input path
-* read.ml allows users to read/load the model which was saved using write.ml in a given path
+* [write.ml](api/R/write.ml.html) allows users to save a fitted model in a given input path
+* [read.ml](api/R/read.ml.html) allows users to read/load the model which was saved using write.ml in a given path
 
 Model persistence is supported for all Machine Learning algorithms for all families.
 
-The examples below show the use of building Gaussian GLM, NaiveBayes, kMeans and AFTSurvivalReg models using SparkR
+The examples below show the use of building glm with Gaussian family,glm with Binomial family, survreg, naiveBayes, kmeans models using SparkR
 
 {% include_example r/ml.R %}
 
diff --git a/examples/src/main/r/ml.R b/examples/src/main/r/ml.R
index 7cb747c1fb20b..495f392c26542 100644
--- a/examples/src/main/r/ml.R
+++ b/examples/src/main/r/ml.R
@@ -25,9 +25,9 @@ library(SparkR)
 sc <- sparkR.init(appName="SparkR-ML-example")
 sqlContext <- sparkRSQL.init(sc)
 
-
-############################ spark.glm and glm ##############################################
 # $example on$
+############################ spark.glm and glm ##############################################
+
 irisDF <- suppressWarnings(createDataFrame(sqlContext, iris))
 # Fit a generalized linear model of family "gaussian" with spark.glm
 gaussianDF <- irisDF

From e773d038460e566394cefa9c183c636bbaba4a4e Mon Sep 17 00:00:00 2001
From: GayathriMurali <gayathri.m@intel.com>
Date: Wed, 15 Jun 2016 15:53:24 -0700
Subject: [PATCH 4/4] Review comment

---
 docs/sparkr.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/sparkr.md b/docs/sparkr.md
index 0e4b9c416528f..961bd323fabcb 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -304,7 +304,11 @@ It produces the similar result compared with R summary function.
 
 Model persistence is supported for all Machine Learning algorithms for all families.
 
-The examples below show the use of building glm with Gaussian family,glm with Binomial family, survreg, naiveBayes, kmeans models using SparkR
+The examples below show how to build several models:
+* GLM using the Gaussian and Binomial model families
+* AFT survival regression model
+* Naive Bayes model
+* K-Means model
 
 {% include_example r/ml.R %}