@@ -53,9 +53,10 @@ setClass("AFTSurvivalRegressionModel", representation(jobj = "jobj"))
5353# ' @note KMeansModel since 2.0.0
5454setClass ("KMeansModel ", representation(jobj = "jobj"))
5555
56- # ' Fits a generalized linear model
56+ # ' Generalized Linear Models
5757# '
58- # ' Fits a generalized linear model against a Spark DataFrame.
58+ # ' Fits generalized linear model against a Spark DataFrame. Users can print, make predictions on the
59+ # ' produced model and save the model to the input path.
5960# '
6061# ' @param data SparkDataFrame for training.
6162# ' @param formula A symbolic description of the model to be fitted. Currently only a few formula
@@ -66,8 +67,9 @@ setClass("KMeansModel", representation(jobj = "jobj"))
6667# ' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
6768# ' @param tol Positive convergence tolerance of iterations.
6869# ' @param maxIter Integer giving the maximal number of IRLS iterations.
69- # ' @return a fitted generalized linear model
70+ # ' @return \code{spark.glm} returns a fitted generalized linear model
7071# ' @rdname spark.glm
72+ # ' @name spark.glm
7173# ' @export
7274# ' @examples
7375# ' \dontrun{
@@ -76,8 +78,21 @@ setClass("KMeansModel", representation(jobj = "jobj"))
7678# ' df <- createDataFrame(iris)
7779# ' model <- spark.glm(df, Sepal_Length ~ Sepal_Width, family = "gaussian")
7880# ' summary(model)
81+ # '
82+ # ' # fitted values on training data
83+ # ' fitted <- predict(model, df)
84+ # ' head(select(fitted, "Sepal_Length", "prediction"))
85+ # '
86+ # ' # save fitted model to input path
87+ # ' path <- "path/to/model"
88+ # ' write.ml(model, path)
89+ # '
90+ # ' # can also read back the saved model and print
91+ # ' savedModel <- read.ml(path)
92+ # ' summary(savedModel)
7993# ' }
8094# ' @note spark.glm since 2.0.0
95+ # ' @seealso \link{glm}, \link{read.ml}
8196setMethod ("spark.glm ", signature(data = "SparkDataFrame", formula = "formula"),
8297 function (data , formula , family = gaussian , tol = 1e-6 , maxIter = 25 ) {
8398 if (is.character(family )) {
@@ -99,10 +114,9 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
99114 return (new(" GeneralizedLinearRegressionModel" , jobj = jobj ))
100115 })
101116
102- # ' Fits a generalized linear model (R-compliant).
117+ # ' Generalized Linear Models (R-compliant)
103118# '
104119# ' Fits a generalized linear model, similarly to R's glm().
105- # '
106120# ' @param formula A symbolic description of the model to be fitted. Currently only a few formula
107121# ' operators are supported, including '~', '.', ':', '+', and '-'.
108122# ' @param data SparkDataFrame for training.
@@ -112,7 +126,7 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
112126# ' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
113127# ' @param epsilon Positive convergence tolerance of iterations.
114128# ' @param maxit Integer giving the maximal number of IRLS iterations.
115- # ' @return a fitted generalized linear model
129+ # ' @return \code{glm} returns a fitted generalized linear model.
116130# ' @rdname glm
117131# ' @export
118132# ' @examples
@@ -124,24 +138,21 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
124138# ' summary(model)
125139# ' }
126140# ' @note glm since 1.5.0
141+ # ' @seealso \link{spark.glm}
127142setMethod ("glm ", signature(formula = "formula", family = "ANY", data = "SparkDataFrame"),
128143 function (formula , family = gaussian , data , epsilon = 1e-6 , maxit = 25 ) {
129144 spark.glm(data , formula , family , tol = epsilon , maxIter = maxit )
130145 })
131146
132- # ' Get the summary of a generalized linear model
133- # '
134- # ' Returns the summary of a model produced by glm() or spark.glm(), similarly to R's summary().
147+ # Returns the summary of a model produced by glm() or spark.glm(), similarly to R's summary().
135148# '
136149# ' @param object A fitted generalized linear model
137- # ' @return coefficients the model's coefficients, intercept
138- # ' @rdname summary
150+ # ' @return \code{summary} returns a summary object of the fitted model, a list of components
151+ # ' including at least the coefficients, null/residual deviance, null/residual degrees
152+ # ' of freedom, AIC and number of iterations IRLS takes.
153+ # '
154+ # ' @rdname spark.glm
139155# ' @export
140- # ' @examples
141- # ' \dontrun{
142- # ' model <- glm(y ~ x, trainingData)
143- # ' summary(model)
144- # ' }
145156# ' @note summary(GeneralizedLinearRegressionModel) since 2.0.0
146157setMethod ("summary ", signature(object = "GeneralizedLinearRegressionModel"),
147158 function (object , ... ) {
@@ -173,10 +184,10 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"),
173184 return (ans )
174185 })
175186
176- # ' Print the summary of GeneralizedLinearRegressionModel
187+ # Prints the summary of GeneralizedLinearRegressionModel
177188# '
178- # ' @rdname print
179- # ' @name print. summary.GeneralizedLinearRegressionModel
189+ # ' @rdname spark.glm
190+ # ' @param x Summary object of fitted generalized linear model returned by \code{ summary} function
180191# ' @export
181192# ' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0
182193print.summary.GeneralizedLinearRegressionModel <- function (x , ... ) {
@@ -205,22 +216,13 @@ print.summary.GeneralizedLinearRegressionModel <- function(x, ...) {
205216 invisible (x )
206217 }
207218
208- # ' Predicted values based on model
219+ # Makes predictions from a generalized linear model produced by glm() or spark.glm(),
220+ # similarly to R's predict().
209221# '
210- # ' Makes predictions from a generalized linear model produced by glm() or spark.glm(),
211- # ' similarly to R's predict().
212- # '
213- # ' @param object A fitted generalized linear model
214222# ' @param newData SparkDataFrame for testing
215- # ' @return SparkDataFrame containing predicted labels in a column named "prediction"
216- # ' @rdname predict
223+ # ' @return \code{predict} returns a SparkDataFrame containing predicted labels in a column named "prediction"
224+ # ' @rdname spark.glm
217225# ' @export
218- # ' @examples
219- # ' \dontrun{
220- # ' model <- glm(y ~ x, trainingData)
221- # ' predicted <- predict(model, testData)
222- # ' showDF(predicted)
223- # ' }
224226# ' @note predict(GeneralizedLinearRegressionModel) since 1.5.0
225227setMethod ("predict ", signature(object = "GeneralizedLinearRegressionModel"),
226228 function (object , newData ) {
@@ -471,24 +473,14 @@ setMethod("write.ml", signature(object = "AFTSurvivalRegressionModel", path = "c
471473 invisible (callJMethod(writer , " save" , path ))
472474 })
473475
474- # ' Save fitted MLlib model to the input path
475- # '
476- # ' Save the generalized linear model to the input path.
476+ # Saves the generalized linear model to the input path.
477477# '
478- # ' @param object A fitted generalized linear model
479478# ' @param path The directory where the model is saved
480479# ' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
481480# ' which means throw exception if the output path exists.
482481# '
483- # ' @rdname write.ml
484- # ' @name write.ml
482+ # ' @rdname spark.glm
485483# ' @export
486- # ' @examples
487- # ' \dontrun{
488- # ' model <- glm(y ~ x, trainingData)
489- # ' path <- "path/to/model"
490- # ' write.ml(model, path)
491- # ' }
492484# ' @note write.ml(GeneralizedLinearRegressionModel, character) since 2.0.0
493485setMethod ("write.ml ", signature(object = "GeneralizedLinearRegressionModel", path = "character"),
494486 function (object , path , overwrite = FALSE ) {
0 commit comments