Skip to content

Commit ea3a12b

Browse files
junyangqmengxr
authored andcommitted
[SPARK-16107][R] group glm methods in documentation
## What changes were proposed in this pull request? This groups GLM methods (spark.glm, summary, print, predict and write.ml) in the documentation. The example code was updated. ## How was this patch tested? N/A (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) ![screen shot 2016-06-21 at 2 31 37 pm](https://cloud.githubusercontent.com/assets/15318264/16247077/f6eafc04-37bc-11e6-89a8-7898ff3e4078.png) ![screen shot 2016-06-21 at 2 31 45 pm](https://cloud.githubusercontent.com/assets/15318264/16247078/f6eb1c16-37bc-11e6-940a-2b595b10617c.png) Author: Junyang Qian <[email protected]> Author: Junyang Qian <[email protected]> Closes #13820 from junyangq/SPARK-16107.
1 parent cf1995a commit ea3a12b

File tree

1 file changed

+36
-44
lines changed

1 file changed

+36
-44
lines changed

R/pkg/R/mllib.R

Lines changed: 36 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,10 @@ setClass("AFTSurvivalRegressionModel", representation(jobj = "jobj"))
5353
#' @note KMeansModel since 2.0.0
5454
setClass("KMeansModel", representation(jobj = "jobj"))
5555

56-
#' Fits a generalized linear model
56+
#' Generalized Linear Models
5757
#'
58-
#' Fits a generalized linear model against a Spark DataFrame.
58+
#' Fits generalized linear model against a Spark DataFrame. Users can print, make predictions on the
59+
#' produced model and save the model to the input path.
5960
#'
6061
#' @param data SparkDataFrame for training.
6162
#' @param formula A symbolic description of the model to be fitted. Currently only a few formula
@@ -66,8 +67,9 @@ setClass("KMeansModel", representation(jobj = "jobj"))
6667
#' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
6768
#' @param tol Positive convergence tolerance of iterations.
6869
#' @param maxIter Integer giving the maximal number of IRLS iterations.
69-
#' @return a fitted generalized linear model
70+
#' @return \code{spark.glm} returns a fitted generalized linear model
7071
#' @rdname spark.glm
72+
#' @name spark.glm
7173
#' @export
7274
#' @examples
7375
#' \dontrun{
@@ -76,8 +78,21 @@ setClass("KMeansModel", representation(jobj = "jobj"))
7678
#' df <- createDataFrame(iris)
7779
#' model <- spark.glm(df, Sepal_Length ~ Sepal_Width, family = "gaussian")
7880
#' summary(model)
81+
#'
82+
#' # fitted values on training data
83+
#' fitted <- predict(model, df)
84+
#' head(select(fitted, "Sepal_Length", "prediction"))
85+
#'
86+
#' # save fitted model to input path
87+
#' path <- "path/to/model"
88+
#' write.ml(model, path)
89+
#'
90+
#' # can also read back the saved model and print
91+
#' savedModel <- read.ml(path)
92+
#' summary(savedModel)
7993
#' }
8094
#' @note spark.glm since 2.0.0
95+
#' @seealso \link{glm}, \link{read.ml}
8196
setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
8297
function(data, formula, family = gaussian, tol = 1e-6, maxIter = 25) {
8398
if (is.character(family)) {
@@ -99,10 +114,9 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
99114
return(new("GeneralizedLinearRegressionModel", jobj = jobj))
100115
})
101116

102-
#' Fits a generalized linear model (R-compliant).
117+
#' Generalized Linear Models (R-compliant)
103118
#'
104119
#' Fits a generalized linear model, similarly to R's glm().
105-
#'
106120
#' @param formula A symbolic description of the model to be fitted. Currently only a few formula
107121
#' operators are supported, including '~', '.', ':', '+', and '-'.
108122
#' @param data SparkDataFrame for training.
@@ -112,7 +126,7 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
112126
#' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
113127
#' @param epsilon Positive convergence tolerance of iterations.
114128
#' @param maxit Integer giving the maximal number of IRLS iterations.
115-
#' @return a fitted generalized linear model
129+
#' @return \code{glm} returns a fitted generalized linear model.
116130
#' @rdname glm
117131
#' @export
118132
#' @examples
@@ -124,24 +138,21 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
124138
#' summary(model)
125139
#' }
126140
#' @note glm since 1.5.0
141+
#' @seealso \link{spark.glm}
127142
setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDataFrame"),
128143
function(formula, family = gaussian, data, epsilon = 1e-6, maxit = 25) {
129144
spark.glm(data, formula, family, tol = epsilon, maxIter = maxit)
130145
})
131146

132-
#' Get the summary of a generalized linear model
133-
#'
134-
#' Returns the summary of a model produced by glm() or spark.glm(), similarly to R's summary().
147+
# Returns the summary of a model produced by glm() or spark.glm(), similarly to R's summary().
135148
#'
136149
#' @param object A fitted generalized linear model
137-
#' @return coefficients the model's coefficients, intercept
138-
#' @rdname summary
150+
#' @return \code{summary} returns a summary object of the fitted model, a list of components
151+
#' including at least the coefficients, null/residual deviance, null/residual degrees
152+
#' of freedom, AIC and number of iterations IRLS takes.
153+
#'
154+
#' @rdname spark.glm
139155
#' @export
140-
#' @examples
141-
#' \dontrun{
142-
#' model <- glm(y ~ x, trainingData)
143-
#' summary(model)
144-
#' }
145156
#' @note summary(GeneralizedLinearRegressionModel) since 2.0.0
146157
setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"),
147158
function(object, ...) {
@@ -173,10 +184,10 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"),
173184
return(ans)
174185
})
175186

176-
#' Print the summary of GeneralizedLinearRegressionModel
187+
# Prints the summary of GeneralizedLinearRegressionModel
177188
#'
178-
#' @rdname print
179-
#' @name print.summary.GeneralizedLinearRegressionModel
189+
#' @rdname spark.glm
190+
#' @param x Summary object of fitted generalized linear model returned by \code{summary} function
180191
#' @export
181192
#' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0
182193
print.summary.GeneralizedLinearRegressionModel <- function(x, ...) {
@@ -205,22 +216,13 @@ print.summary.GeneralizedLinearRegressionModel <- function(x, ...) {
205216
invisible(x)
206217
}
207218

208-
#' Predicted values based on model
219+
# Makes predictions from a generalized linear model produced by glm() or spark.glm(),
220+
# similarly to R's predict().
209221
#'
210-
#' Makes predictions from a generalized linear model produced by glm() or spark.glm(),
211-
#' similarly to R's predict().
212-
#'
213-
#' @param object A fitted generalized linear model
214222
#' @param newData SparkDataFrame for testing
215-
#' @return SparkDataFrame containing predicted labels in a column named "prediction"
216-
#' @rdname predict
223+
#' @return \code{predict} returns a SparkDataFrame containing predicted labels in a column named "prediction"
224+
#' @rdname spark.glm
217225
#' @export
218-
#' @examples
219-
#' \dontrun{
220-
#' model <- glm(y ~ x, trainingData)
221-
#' predicted <- predict(model, testData)
222-
#' showDF(predicted)
223-
#' }
224226
#' @note predict(GeneralizedLinearRegressionModel) since 1.5.0
225227
setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"),
226228
function(object, newData) {
@@ -471,24 +473,14 @@ setMethod("write.ml", signature(object = "AFTSurvivalRegressionModel", path = "c
471473
invisible(callJMethod(writer, "save", path))
472474
})
473475

474-
#' Save fitted MLlib model to the input path
475-
#'
476-
#' Save the generalized linear model to the input path.
476+
# Saves the generalized linear model to the input path.
477477
#'
478-
#' @param object A fitted generalized linear model
479478
#' @param path The directory where the model is saved
480479
#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
481480
#' which means throw exception if the output path exists.
482481
#'
483-
#' @rdname write.ml
484-
#' @name write.ml
482+
#' @rdname spark.glm
485483
#' @export
486-
#' @examples
487-
#' \dontrun{
488-
#' model <- glm(y ~ x, trainingData)
489-
#' path <- "path/to/model"
490-
#' write.ml(model, path)
491-
#' }
492484
#' @note write.ml(GeneralizedLinearRegressionModel, character) since 2.0.0
493485
setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", path = "character"),
494486
function(object, path, overwrite = FALSE) {

0 commit comments

Comments
 (0)