2222# ' @export
2323setClass ("PipelineModel ", representation(model = "jobj"))
2424
25+ # ' @title S4 class that represents a NaiveBayesModel
26+ # ' @param jobj a Java object reference to the backing Scala NaiveBayesWrapper
27+ # ' @export
28+ setClass ("NaiveBayesModel ", representation(jobj = "jobj"))
29+
2530# ' Fits a generalized linear model
2631# '
2732# ' Fits a generalized linear model, similarly to R's glm(). Also see the glmnet package.
@@ -42,7 +47,7 @@ setClass("PipelineModel", representation(model = "jobj"))
4247# ' @rdname glm
4348# ' @export
4449# ' @examples
45- # '\dontrun{
50+ # ' \dontrun{
4651# ' sc <- sparkR.init()
4752# ' sqlContext <- sparkRSQL.init(sc)
4853# ' data(iris)
@@ -71,7 +76,7 @@ setMethod("glm", signature(formula = "formula", family = "ANY", data = "DataFram
7176# ' @rdname predict
7277# ' @export
7378# ' @examples
74- # '\dontrun{
79+ # ' \dontrun{
7580# ' model <- glm(y ~ x, trainingData)
7681# ' predicted <- predict(model, testData)
7782# ' showDF(predicted)
@@ -81,6 +86,26 @@ setMethod("predict", signature(object = "PipelineModel"),
8186 return (dataFrame(callJMethod(object @ model , " transform" , newData @ sdf )))
8287 })
8388
89+ # ' Make predictions from a naive Bayes model
90+ # '
91+ # ' Makes predictions from a model produced by naiveBayes(), similarly to R package e1071's predict.
92+ # '
93+ # ' @param object A fitted naive Bayes model
94+ # ' @param newData DataFrame for testing
95+ # ' @return DataFrame containing predicted labels in a column named "prediction"
96+ # ' @rdname predict
97+ # ' @export
98+ # ' @examples
99+ # ' \dontrun{
100+ # ' model <- naiveBayes(y ~ x, trainingData)
101+ # ' predicted <- predict(model, testData)
102+ # ' showDF(predicted)
103+ # '}
104+ setMethod ("predict ", signature(object = "NaiveBayesModel"),
105+ function (object , newData ) {
106+ return (dataFrame(callJMethod(object @ jobj , " transform" , newData @ sdf )))
107+ })
108+
84109# ' Get the summary of a model
85110# '
86111# ' Returns the summary of a model produced by glm(), similarly to R's summary().
@@ -97,7 +122,7 @@ setMethod("predict", signature(object = "PipelineModel"),
97122# ' @rdname summary
98123# ' @export
99124# ' @examples
100- # '\dontrun{
125+ # ' \dontrun{
101126# ' model <- glm(y ~ x, trainingData)
102127# ' summary(model)
103128# '}
@@ -140,6 +165,35 @@ setMethod("summary", signature(object = "PipelineModel"),
140165 }
141166 })
142167
168+ # ' Get the summary of a naive Bayes model
169+ # '
170+ # ' Returns the summary of a naive Bayes model produced by naiveBayes(), similarly to R's summary().
171+ # '
172+ # ' @param object A fitted MLlib model
173+ # ' @return a list containing 'apriori', the label distribution, and 'tables', conditional
174+ # probabilities given the target label
175+ # ' @rdname summary
176+ # ' @export
177+ # ' @examples
178+ # ' \dontrun{
179+ # ' model <- naiveBayes(y ~ x, trainingData)
180+ # ' summary(model)
181+ # '}
182+ setMethod ("summary ", signature(object = "NaiveBayesModel"),
183+ function (object , ... ) {
184+ jobj <- object @ jobj
185+ features <- callJMethod(jobj , " features" )
186+ labels <- callJMethod(jobj , " labels" )
187+ apriori <- callJMethod(jobj , " apriori" )
188+ apriori <- t(as.matrix(unlist(apriori )))
189+ colnames(apriori ) <- unlist(labels )
190+ tables <- callJMethod(jobj , " tables" )
191+ tables <- matrix (tables , nrow = length(labels ))
192+ rownames(tables ) <- unlist(labels )
193+ colnames(tables ) <- unlist(features )
194+ return (list (apriori = apriori , tables = tables ))
195+ })
196+
143197# ' Fit a k-means model
144198# '
145199# ' Fit a k-means model, similarly to R's kmeans().
@@ -152,7 +206,7 @@ setMethod("summary", signature(object = "PipelineModel"),
152206# ' @rdname kmeans
153207# ' @export
154208# ' @examples
155- # '\dontrun{
209+ # ' \dontrun{
156210# ' model <- kmeans(x, centers = 2, algorithm="random")
157211# '}
158212setMethod ("kmeans ", signature(x = "DataFrame"),
@@ -173,7 +227,7 @@ setMethod("kmeans", signature(x = "DataFrame"),
173227# ' @rdname fitted
174228# ' @export
175229# ' @examples
176- # '\dontrun{
230+ # ' \dontrun{
177231# ' model <- kmeans(trainingData, 2)
178232# ' fitted.model <- fitted(model)
179233# ' showDF(fitted.model)
@@ -192,3 +246,30 @@ setMethod("fitted", signature(object = "PipelineModel"),
192246 stop(paste(" Unsupported model" , modelName , sep = " " ))
193247 }
194248 })
249+
250+ # ' Fit a Bernoulli naive Bayes model
251+ # '
252+ # ' Fit a Bernoulli naive Bayes model, similarly to R package e1071's naiveBayes() while only
253+ # ' categorical features are supported. The input should be a DataFrame of observations instead of a
254+ # ' contingency table.
255+ # '
256+ # ' @param object A symbolic description of the model to be fitted. Currently only a few formula
257+ # ' operators are supported, including '~', '.', ':', '+', and '-'.
258+ # ' @param data DataFrame for training
259+ # ' @param laplace Smoothing parameter
260+ # ' @return a fitted naive Bayes model
261+ # ' @rdname naiveBayes
262+ # ' @seealso e1071: \url{https://cran.r-project.org/web/packages/e1071/}
263+ # ' @export
264+ # ' @examples
265+ # ' \dontrun{
266+ # ' df <- createDataFrame(sqlContext, infert)
267+ # ' model <- naiveBayes(education ~ ., df, laplace = 0)
268+ # '}
269+ setMethod ("naiveBayes ", signature(formula = "formula", data = "DataFrame"),
270+ function (formula , data , laplace = 0 , ... ) {
271+ formula <- paste(deparse(formula ), collapse = " " )
272+ jobj <- callJStatic(" org.apache.spark.ml.r.NaiveBayesWrapper" , " fit" ,
273+ formula , data @ sdf , laplace )
274+ return (new(" NaiveBayesModel" , jobj = jobj ))
275+ })
0 commit comments