From d73a2c04ef3ba0cc033f79d4c83e8dd659f71b47 Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Sat, 20 Aug 2016 09:14:19 -0700 Subject: [PATCH 1/6] refactor, cleanup, fix deprecation in test --- R/pkg/R/mllib.R | 200 +++++++++++-------------- R/pkg/inst/tests/testthat/test_mllib.R | 2 +- 2 files changed, 90 insertions(+), 112 deletions(-) diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R index 9a53c80aecded..17c242d8ee0a8 100644 --- a/R/pkg/R/mllib.R +++ b/R/pkg/R/mllib.R @@ -88,9 +88,9 @@ setClass("ALSModel", representation(jobj = "jobj")) #' @rdname write.ml #' @name write.ml #' @export -#' @seealso \link{spark.glm}, \link{glm}, \link{spark.gaussianMixture} -#' @seealso \link{spark.als}, \link{spark.kmeans}, \link{spark.lda}, \link{spark.naiveBayes} -#' @seealso \link{spark.survreg}, \link{spark.isoreg} +#' @seealso \link{spark.glm}, \link{glm}, +#' @seealso \link{spark.als}, \link{spark.gaussianMixture}, \link{spark.isoreg}, \link{spark.kmeans}, +#' @seealso \link{spark.lda}, \link{spark.naiveBayes}, \link{spark.survreg}, #' @seealso \link{read.ml} NULL @@ -101,11 +101,22 @@ NULL #' @rdname predict #' @name predict #' @export -#' @seealso \link{spark.glm}, \link{glm}, \link{spark.gaussianMixture} -#' @seealso \link{spark.als}, \link{spark.kmeans}, \link{spark.naiveBayes}, \link{spark.survreg} -#' @seealso \link{spark.isoreg} +#' @seealso \link{spark.glm}, \link{glm}, +#' @seealso \link{spark.als}, \link{spark.gaussianMixture}, \link{spark.isoreg}, \link{spark.kmeans}, +#' @seealso \link{spark.naiveBayes}, \link{spark.survreg}, NULL +write_internal <- function(object, path, overwrite = FALSE) { + writer <- callJMethod(object@jobj, "write") + if (overwrite) { + writer <- callJMethod(writer, "overwrite") + } + invisible(callJMethod(writer, "save", path)) +} + +predict_internal <- function(object, newData) { + dataFrame(callJMethod(object@jobj, "transform", newData@sdf)) +} #' Generalized Linear Models #' @@ -173,7 +184,7 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), jobj <- callJStatic("org.apache.spark.ml.r.GeneralizedLinearRegressionWrapper", "fit", formula, data@sdf, family$family, family$link, tol, as.integer(maxIter), as.character(weightCol)) - return(new("GeneralizedLinearRegressionModel", jobj = jobj)) + new("GeneralizedLinearRegressionModel", jobj = jobj) }) #' Generalized Linear Models (R-compliant) @@ -219,7 +230,7 @@ setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDat #' @export #' @note summary(GeneralizedLinearRegressionModel) since 2.0.0 setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"), - function(object, ...) { + function(object) { jobj <- object@jobj is.loaded <- callJMethod(jobj, "isLoaded") features <- callJMethod(jobj, "rFeatures") @@ -245,7 +256,7 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"), deviance = deviance, df.null = df.null, df.residual = df.residual, aic = aic, iter = iter, family = family, is.loaded = is.loaded) class(ans) <- "summary.GeneralizedLinearRegressionModel" - return(ans) + ans }) # Prints the summary of GeneralizedLinearRegressionModel @@ -254,7 +265,7 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"), #' @param x summary object of fitted generalized linear model returned by \code{summary} function #' @export #' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0 -print.summary.GeneralizedLinearRegressionModel <- function(x, ...) { +print.summary.GeneralizedLinearRegressionModel <- function(x) { if (x$is.loaded) { cat("\nSaved-loaded model does not support output 'Deviance Residuals'.\n") } else { @@ -291,7 +302,7 @@ print.summary.GeneralizedLinearRegressionModel <- function(x, ...) { #' @note predict(GeneralizedLinearRegressionModel) since 1.5.0 setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"), function(object, newData) { - return(dataFrame(callJMethod(object@jobj, "transform", newData@sdf))) + predict_internal(object, newData) }) # Makes predictions from a naive Bayes model or a model produced by spark.naiveBayes(), @@ -305,7 +316,7 @@ setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"), #' @note predict(NaiveBayesModel) since 2.0.0 setMethod("predict", signature(object = "NaiveBayesModel"), function(object, newData) { - return(dataFrame(callJMethod(object@jobj, "transform", newData@sdf))) + predict_internal(object, newData) }) # Returns the summary of a naive Bayes model produced by \code{spark.naiveBayes} @@ -317,7 +328,7 @@ setMethod("predict", signature(object = "NaiveBayesModel"), #' @export #' @note summary(NaiveBayesModel) since 2.0.0 setMethod("summary", signature(object = "NaiveBayesModel"), - function(object, ...) { + function(object) { jobj <- object@jobj features <- callJMethod(jobj, "features") labels <- callJMethod(jobj, "labels") @@ -328,7 +339,7 @@ setMethod("summary", signature(object = "NaiveBayesModel"), tables <- matrix(tables, nrow = length(labels)) rownames(tables) <- unlist(labels) colnames(tables) <- unlist(features) - return(list(apriori = apriori, tables = tables)) + list(apriori = apriori, tables = tables) }) # Returns posterior probabilities from a Latent Dirichlet Allocation model produced by spark.lda() @@ -342,7 +353,7 @@ setMethod("summary", signature(object = "NaiveBayesModel"), #' @note spark.posterior(LDAModel) since 2.1.0 setMethod("spark.posterior", signature(object = "LDAModel", newData = "SparkDataFrame"), function(object, newData) { - return(dataFrame(callJMethod(object@jobj, "transform", newData@sdf))) + predict_internal(object, newData) }) # Returns the summary of a Latent Dirichlet Allocation model produced by \code{spark.lda} @@ -377,12 +388,11 @@ setMethod("summary", signature(object = "LDAModel"), vocabSize <- callJMethod(jobj, "vocabSize") topics <- dataFrame(callJMethod(jobj, "topics", maxTermsPerTopic)) vocabulary <- callJMethod(jobj, "vocabulary") - return(list(docConcentration = unlist(docConcentration), - topicConcentration = topicConcentration, - logLikelihood = logLikelihood, logPerplexity = logPerplexity, - isDistributed = isDistributed, vocabSize = vocabSize, - topics = topics, - vocabulary = unlist(vocabulary))) + list(docConcentration = unlist(docConcentration), + topicConcentration = topicConcentration, + logLikelihood = logLikelihood, logPerplexity = logPerplexity, + isDistributed = isDistributed, vocabSize = vocabSize, + topics = topics, vocabulary = unlist(vocabulary)) }) # Returns the log perplexity of a Latent Dirichlet Allocation model produced by \code{spark.lda} @@ -395,8 +405,8 @@ setMethod("summary", signature(object = "LDAModel"), #' @note spark.perplexity(LDAModel) since 2.1.0 setMethod("spark.perplexity", signature(object = "LDAModel", data = "SparkDataFrame"), function(object, data) { - return(ifelse(missing(data), callJMethod(object@jobj, "logPerplexity"), - callJMethod(object@jobj, "computeLogPerplexity", data@sdf))) + ifelse(missing(data), callJMethod(object@jobj, "logPerplexity"), + callJMethod(object@jobj, "computeLogPerplexity", data@sdf)) }) # Saves the Latent Dirichlet Allocation model to the input path. @@ -412,11 +422,7 @@ setMethod("spark.perplexity", signature(object = "LDAModel", data = "SparkDataFr #' @note write.ml(LDAModel, character) since 2.1.0 setMethod("write.ml", signature(object = "LDAModel", path = "character"), function(object, path, overwrite = FALSE) { - writer <- callJMethod(object@jobj, "write") - if (overwrite) { - writer <- callJMethod(writer, "overwrite") - } - invisible(callJMethod(writer, "save", path)) + write_internal(object, path, overwrite) }) #' Isotonic Regression Model @@ -473,7 +479,7 @@ setMethod("spark.isoreg", signature(data = "SparkDataFrame", formula = "formula" jobj <- callJStatic("org.apache.spark.ml.r.IsotonicRegressionWrapper", "fit", data@sdf, formula, as.logical(isotonic), as.integer(featureIndex), as.character(weightCol)) - return(new("IsotonicRegressionModel", jobj = jobj)) + new("IsotonicRegressionModel", jobj = jobj) }) # Predicted values based on an isotonicRegression model @@ -487,7 +493,7 @@ setMethod("spark.isoreg", signature(data = "SparkDataFrame", formula = "formula" #' @note predict(IsotonicRegressionModel) since 2.1.0 setMethod("predict", signature(object = "IsotonicRegressionModel"), function(object, newData) { - return(dataFrame(callJMethod(object@jobj, "transform", newData@sdf))) + predict_internal(object, newData) }) # Get the summary of an IsotonicRegressionModel model @@ -499,11 +505,11 @@ setMethod("predict", signature(object = "IsotonicRegressionModel"), #' @export #' @note summary(IsotonicRegressionModel) since 2.1.0 setMethod("summary", signature(object = "IsotonicRegressionModel"), - function(object, ...) { + function(object) { jobj <- object@jobj boundaries <- callJMethod(jobj, "boundaries") predictions <- callJMethod(jobj, "predictions") - return(list(boundaries = boundaries, predictions = predictions)) + list(boundaries = boundaries, predictions = predictions) }) #' K-Means Clustering Model @@ -553,7 +559,7 @@ setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula" initMode <- match.arg(initMode) jobj <- callJStatic("org.apache.spark.ml.r.KMeansWrapper", "fit", data@sdf, formula, as.integer(k), as.integer(maxIter), initMode) - return(new("KMeansModel", jobj = jobj)) + new("KMeansModel", jobj = jobj) }) #' Get fitted result from a k-means model @@ -576,14 +582,14 @@ setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula" #'} #' @note fitted since 2.0.0 setMethod("fitted", signature(object = "KMeansModel"), - function(object, method = c("centers", "classes"), ...) { + function(object, method = c("centers", "classes")) { method <- match.arg(method) jobj <- object@jobj is.loaded <- callJMethod(jobj, "isLoaded") if (is.loaded) { - stop(paste("Saved-loaded k-means model does not support 'fitted' method")) + stop("Saved-loaded k-means model does not support 'fitted' method") } else { - return(dataFrame(callJMethod(jobj, "fitted", method))) + dataFrame(callJMethod(jobj, "fitted", method)) } }) @@ -595,7 +601,7 @@ setMethod("fitted", signature(object = "KMeansModel"), #' @export #' @note summary(KMeansModel) since 2.0.0 setMethod("summary", signature(object = "KMeansModel"), - function(object, ...) { + function(object) { jobj <- object@jobj is.loaded <- callJMethod(jobj, "isLoaded") features <- callJMethod(jobj, "features") @@ -610,8 +616,8 @@ setMethod("summary", signature(object = "KMeansModel"), } else { dataFrame(callJMethod(jobj, "cluster")) } - return(list(coefficients = coefficients, size = size, - cluster = cluster, is.loaded = is.loaded)) + list(coefficients = coefficients, size = size, + cluster = cluster, is.loaded = is.loaded) }) # Predicted values based on a k-means model @@ -623,7 +629,7 @@ setMethod("summary", signature(object = "KMeansModel"), #' @note predict(KMeansModel) since 2.0.0 setMethod("predict", signature(object = "KMeansModel"), function(object, newData) { - return(dataFrame(callJMethod(object@jobj, "transform", newData@sdf))) + predict_internal(object, newData) }) #' Naive Bayes Models @@ -665,11 +671,11 @@ setMethod("predict", signature(object = "KMeansModel"), #' } #' @note spark.naiveBayes since 2.0.0 setMethod("spark.naiveBayes", signature(data = "SparkDataFrame", formula = "formula"), - function(data, formula, smoothing = 1.0, ...) { + function(data, formula, smoothing = 1.0) { formula <- paste(deparse(formula), collapse = "") jobj <- callJStatic("org.apache.spark.ml.r.NaiveBayesWrapper", "fit", formula, data@sdf, smoothing) - return(new("NaiveBayesModel", jobj = jobj)) + new("NaiveBayesModel", jobj = jobj) }) # Saves the Bernoulli naive Bayes model to the input path. @@ -684,11 +690,7 @@ setMethod("spark.naiveBayes", signature(data = "SparkDataFrame", formula = "form #' @note write.ml(NaiveBayesModel, character) since 2.0.0 setMethod("write.ml", signature(object = "NaiveBayesModel", path = "character"), function(object, path, overwrite = FALSE) { - writer <- callJMethod(object@jobj, "write") - if (overwrite) { - writer <- callJMethod(writer, "overwrite") - } - invisible(callJMethod(writer, "save", path)) + write_internal(object, path, overwrite) }) # Saves the AFT survival regression model to the input path. @@ -702,11 +704,7 @@ setMethod("write.ml", signature(object = "NaiveBayesModel", path = "character"), #' @seealso \link{read.ml} setMethod("write.ml", signature(object = "AFTSurvivalRegressionModel", path = "character"), function(object, path, overwrite = FALSE) { - writer <- callJMethod(object@jobj, "write") - if (overwrite) { - writer <- callJMethod(writer, "overwrite") - } - invisible(callJMethod(writer, "save", path)) + write_internal(object, path, overwrite) }) # Saves the generalized linear model to the input path. @@ -720,11 +718,7 @@ setMethod("write.ml", signature(object = "AFTSurvivalRegressionModel", path = "c #' @note write.ml(GeneralizedLinearRegressionModel, character) since 2.0.0 setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", path = "character"), function(object, path, overwrite = FALSE) { - writer <- callJMethod(object@jobj, "write") - if (overwrite) { - writer <- callJMethod(writer, "overwrite") - } - invisible(callJMethod(writer, "save", path)) + write_internal(object, path, overwrite) }) # Save fitted MLlib model to the input path @@ -738,11 +732,7 @@ setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", pat #' @note write.ml(KMeansModel, character) since 2.0.0 setMethod("write.ml", signature(object = "KMeansModel", path = "character"), function(object, path, overwrite = FALSE) { - writer <- callJMethod(object@jobj, "write") - if (overwrite) { - writer <- callJMethod(writer, "overwrite") - } - invisible(callJMethod(writer, "save", path)) + write_internal(object, path, overwrite) }) # Save fitted IsotonicRegressionModel to the input path @@ -757,11 +747,7 @@ setMethod("write.ml", signature(object = "KMeansModel", path = "character"), #' @note write.ml(IsotonicRegression, character) since 2.1.0 setMethod("write.ml", signature(object = "IsotonicRegressionModel", path = "character"), function(object, path, overwrite = FALSE) { - writer <- callJMethod(object@jobj, "write") - if (overwrite) { - writer <- callJMethod(writer, "overwrite") - } - invisible(callJMethod(writer, "save", path)) + write_internal(object, path, overwrite) }) # Save fitted MLlib model to the input path @@ -776,11 +762,7 @@ setMethod("write.ml", signature(object = "IsotonicRegressionModel", path = "char #' @note write.ml(GaussianMixtureModel, character) since 2.1.0 setMethod("write.ml", signature(object = "GaussianMixtureModel", path = "character"), function(object, path, overwrite = FALSE) { - writer <- callJMethod(object@jobj, "write") - if (overwrite) { - writer <- callJMethod(writer, "overwrite") - } - invisible(callJMethod(writer, "save", path)) + write_internal(object, path, overwrite) }) #' Load a fitted MLlib model from the input path. @@ -801,21 +783,21 @@ read.ml <- function(path) { path <- suppressWarnings(normalizePath(path)) jobj <- callJStatic("org.apache.spark.ml.r.RWrappers", "load", path) if (isInstanceOf(jobj, "org.apache.spark.ml.r.NaiveBayesWrapper")) { - return(new("NaiveBayesModel", jobj = jobj)) + new("NaiveBayesModel", jobj = jobj) } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.AFTSurvivalRegressionWrapper")) { - return(new("AFTSurvivalRegressionModel", jobj = jobj)) + new("AFTSurvivalRegressionModel", jobj = jobj) } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.GeneralizedLinearRegressionWrapper")) { - return(new("GeneralizedLinearRegressionModel", jobj = jobj)) + new("GeneralizedLinearRegressionModel", jobj = jobj) } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.KMeansWrapper")) { - return(new("KMeansModel", jobj = jobj)) + new("KMeansModel", jobj = jobj) } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.LDAWrapper")) { - return(new("LDAModel", jobj = jobj)) + new("LDAModel", jobj = jobj) } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.IsotonicRegressionWrapper")) { - return(new("IsotonicRegressionModel", jobj = jobj)) + new("IsotonicRegressionModel", jobj = jobj) } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.GaussianMixtureWrapper")) { - return(new("GaussianMixtureModel", jobj = jobj)) + new("GaussianMixtureModel", jobj = jobj) } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.ALSWrapper")) { - return(new("ALSModel", jobj = jobj)) + new("ALSModel", jobj = jobj) } else { stop(paste("Unsupported model: ", jobj)) } @@ -860,7 +842,7 @@ setMethod("spark.survreg", signature(data = "SparkDataFrame", formula = "formula formula <- paste(deparse(formula), collapse = "") jobj <- callJStatic("org.apache.spark.ml.r.AFTSurvivalRegressionWrapper", "fit", formula, data@sdf) - return(new("AFTSurvivalRegressionModel", jobj = jobj)) + new("AFTSurvivalRegressionModel", jobj = jobj) }) #' Latent Dirichlet Allocation @@ -926,7 +908,7 @@ setMethod("spark.lda", signature(data = "SparkDataFrame"), as.numeric(subsamplingRate), topicConcentration, as.array(docConcentration), as.array(customizedStopWords), maxVocabSize) - return(new("LDAModel", jobj = jobj)) + new("LDAModel", jobj = jobj) }) # Returns a summary of the AFT survival regression model produced by spark.survreg, @@ -946,7 +928,7 @@ setMethod("summary", signature(object = "AFTSurvivalRegressionModel"), coefficients <- as.matrix(unlist(coefficients)) colnames(coefficients) <- c("Value") rownames(coefficients) <- unlist(features) - return(list(coefficients = coefficients)) + list(coefficients = coefficients) }) # Makes predictions from an AFT survival regression model or a model produced by @@ -960,7 +942,7 @@ setMethod("summary", signature(object = "AFTSurvivalRegressionModel"), #' @note predict(AFTSurvivalRegressionModel) since 2.0.0 setMethod("predict", signature(object = "AFTSurvivalRegressionModel"), function(object, newData) { - return(dataFrame(callJMethod(object@jobj, "transform", newData@sdf))) + predict_internal(object, newData) }) #' Multivariate Gaussian Mixture Model (GMM) @@ -1014,7 +996,7 @@ setMethod("spark.gaussianMixture", signature(data = "SparkDataFrame", formula = formula <- paste(deparse(formula), collapse = "") jobj <- callJStatic("org.apache.spark.ml.r.GaussianMixtureWrapper", "fit", data@sdf, formula, as.integer(k), as.integer(maxIter), as.numeric(tol)) - return(new("GaussianMixtureModel", jobj = jobj)) + new("GaussianMixtureModel", jobj = jobj) }) # Get the summary of a multivariate gaussian mixture model @@ -1027,7 +1009,7 @@ setMethod("spark.gaussianMixture", signature(data = "SparkDataFrame", formula = #' @export #' @note summary(GaussianMixtureModel) since 2.1.0 setMethod("summary", signature(object = "GaussianMixtureModel"), - function(object, ...) { + function(object) { jobj <- object@jobj is.loaded <- callJMethod(jobj, "isLoaded") lambda <- unlist(callJMethod(jobj, "lambda")) @@ -1052,8 +1034,8 @@ setMethod("summary", signature(object = "GaussianMixtureModel"), } else { dataFrame(callJMethod(jobj, "posterior")) } - return(list(lambda = lambda, mu = mu, sigma = sigma, - posterior = posterior, is.loaded = is.loaded)) + list(lambda = lambda, mu = mu, sigma = sigma, + posterior = posterior, is.loaded = is.loaded) }) # Predicted values based on a gaussian mixture model @@ -1067,7 +1049,7 @@ setMethod("summary", signature(object = "GaussianMixtureModel"), #' @note predict(GaussianMixtureModel) since 2.1.0 setMethod("predict", signature(object = "GaussianMixtureModel"), function(object, newData) { - return(dataFrame(callJMethod(object@jobj, "transform", newData@sdf))) + predict_internal(object, newData) }) #' Alternating Least Squares (ALS) for Collaborative Filtering @@ -1149,7 +1131,7 @@ setMethod("spark.als", signature(data = "SparkDataFrame"), reg, as.integer(maxIter), implicitPrefs, alpha, nonnegative, as.integer(numUserBlocks), as.integer(numItemBlocks), as.integer(checkpointInterval), as.integer(seed)) - return(new("ALSModel", jobj = jobj)) + new("ALSModel", jobj = jobj) }) # Returns a summary of the ALS model produced by spark.als. @@ -1163,17 +1145,17 @@ setMethod("spark.als", signature(data = "SparkDataFrame"), #' @export #' @note summary(ALSModel) since 2.1.0 setMethod("summary", signature(object = "ALSModel"), -function(object, ...) { - jobj <- object@jobj - user <- callJMethod(jobj, "userCol") - item <- callJMethod(jobj, "itemCol") - rating <- callJMethod(jobj, "ratingCol") - userFactors <- dataFrame(callJMethod(jobj, "userFactors")) - itemFactors <- dataFrame(callJMethod(jobj, "itemFactors")) - rank <- callJMethod(jobj, "rank") - return(list(user = user, item = item, rating = rating, userFactors = userFactors, - itemFactors = itemFactors, rank = rank)) -}) + function(object) { + jobj <- object@jobj + user <- callJMethod(jobj, "userCol") + item <- callJMethod(jobj, "itemCol") + rating <- callJMethod(jobj, "ratingCol") + userFactors <- dataFrame(callJMethod(jobj, "userFactors")) + itemFactors <- dataFrame(callJMethod(jobj, "itemFactors")) + rank <- callJMethod(jobj, "rank") + list(user = user, item = item, rating = rating, userFactors = userFactors, + itemFactors = itemFactors, rank = rank) + }) # Makes predictions from an ALS model or a model produced by spark.als. @@ -1185,9 +1167,9 @@ function(object, ...) { #' @export #' @note predict(ALSModel) since 2.1.0 setMethod("predict", signature(object = "ALSModel"), -function(object, newData) { - return(dataFrame(callJMethod(object@jobj, "transform", newData@sdf))) -}) + function(object, newData) { + predict_internal(object, newData) + }) # Saves the ALS model to the input path. @@ -1203,10 +1185,6 @@ function(object, newData) { #' @seealso \link{read.ml} #' @note write.ml(ALSModel, character) since 2.1.0 setMethod("write.ml", signature(object = "ALSModel", path = "character"), -function(object, path, overwrite = FALSE) { - writer <- callJMethod(object@jobj, "write") - if (overwrite) { - writer <- callJMethod(writer, "overwrite") - } - invisible(callJMethod(writer, "save", path)) -}) + function(object, path, overwrite = FALSE) { + write_internal(object, path, overwrite) + }) diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R index 67a3099101cf1..d6ea495e00ae2 100644 --- a/R/pkg/inst/tests/testthat/test_mllib.R +++ b/R/pkg/inst/tests/testthat/test_mllib.R @@ -409,7 +409,7 @@ test_that("spark.naiveBayes", { # Test e1071::naiveBayes if (requireNamespace("e1071", quietly = TRUE)) { - expect_that(m <- e1071::naiveBayes(Survived ~ ., data = t1), not(throws_error())) + expect_error(m <- e1071::naiveBayes(Survived ~ ., data = t1), NA) expect_equal(as.character(predict(m, t1[1, ])), "Yes") } }) From 3ea30bb3b5d22626f6de6e0699504180f267dfdc Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Sat, 20 Aug 2016 09:39:17 -0700 Subject: [PATCH 2/6] add print.summary.GeneralizedLinearRegressionModel test --- R/pkg/inst/tests/testthat/test_mllib.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R index d6ea495e00ae2..56f3b541ebb52 100644 --- a/R/pkg/inst/tests/testthat/test_mllib.R +++ b/R/pkg/inst/tests/testthat/test_mllib.R @@ -95,6 +95,10 @@ test_that("spark.glm summary", { expect_equal(stats$df.residual, rStats$df.residual) expect_equal(stats$aic, rStats$aic) + out <- capture.output(print(stats)) + expect_equal(out[1], "Deviance Residuals:") + expect_true(any(grepl("AIC: 59.22", out))) + # binomial family df <- suppressWarnings(createDataFrame(iris)) training <- df[df$Species %in% c("versicolor", "virginica"), ] From 1ef18d6abfe854c95e0323a406065d9ee4f11c15 Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Sat, 20 Aug 2016 09:58:08 -0700 Subject: [PATCH 3/6] fix bugs in test --- R/pkg/inst/tests/testthat/test_mllib.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R index 56f3b541ebb52..c33c0c2104c56 100644 --- a/R/pkg/inst/tests/testthat/test_mllib.R +++ b/R/pkg/inst/tests/testthat/test_mllib.R @@ -96,7 +96,7 @@ test_that("spark.glm summary", { expect_equal(stats$aic, rStats$aic) out <- capture.output(print(stats)) - expect_equal(out[1], "Deviance Residuals:") + expect_equal(out[2], "Deviance Residuals:") expect_true(any(grepl("AIC: 59.22", out))) # binomial family @@ -491,7 +491,7 @@ test_that("spark.isotonicRegression", { weightCol = "weight") # only allow one variable on the right hand side of the formula expect_error(model2 <- spark.isoreg(df, ~., isotonic = FALSE)) - result <- summary(model, df) + result <- summary(model) expect_equal(result$predictions, list(7, 5, 4, 4, 1)) # Test model prediction From 30815e067a37175e0f5d4539c80db6b0ec6cc159 Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Sat, 20 Aug 2016 14:13:17 -0700 Subject: [PATCH 4/6] fix test --- R/pkg/inst/tests/testthat/test_mllib.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R index c33c0c2104c56..9602eb59de9ef 100644 --- a/R/pkg/inst/tests/testthat/test_mllib.R +++ b/R/pkg/inst/tests/testthat/test_mllib.R @@ -96,7 +96,7 @@ test_that("spark.glm summary", { expect_equal(stats$aic, rStats$aic) out <- capture.output(print(stats)) - expect_equal(out[2], "Deviance Residuals:") + expect_match(out[2], "Deviance Residuals:") expect_true(any(grepl("AIC: 59.22", out))) # binomial family @@ -507,7 +507,7 @@ test_that("spark.isotonicRegression", { expect_error(write.ml(model, modelPath)) write.ml(model, modelPath, overwrite = TRUE) model2 <- read.ml(modelPath) - expect_equal(result, summary(model2, df)) + expect_equal(result, summary(model2)) unlink(modelPath) }) From b42e10c84f122774411e6d9bdc08b27951161272 Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Sun, 21 Aug 2016 00:05:19 -0700 Subject: [PATCH 5/6] revert signature change causing check cran warning * checking S3 generic/method consistency ... WARNING print: function(x, ...) print.summary.GeneralizedLinearRegressionModel: function(x) print: function(x, ...) print.summary.GeneralizedLinearRegressionModel: function(x) --- R/pkg/R/mllib.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R index 17c242d8ee0a8..8c77fb8110085 100644 --- a/R/pkg/R/mllib.R +++ b/R/pkg/R/mllib.R @@ -265,7 +265,7 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"), #' @param x summary object of fitted generalized linear model returned by \code{summary} function #' @export #' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0 -print.summary.GeneralizedLinearRegressionModel <- function(x) { +print.summary.GeneralizedLinearRegressionModel <- function(x, ...) { if (x$is.loaded) { cat("\nSaved-loaded model does not support output 'Deviance Residuals'.\n") } else { From d727093092514f4e49f0a2f2e0814ef5df4c71ed Mon Sep 17 00:00:00 2001 From: Felix Cheung Date: Mon, 22 Aug 2016 05:13:16 -0700 Subject: [PATCH 6/6] fix alignment --- R/pkg/R/mllib.R | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R index 8c77fb8110085..b36fbcee17671 100644 --- a/R/pkg/R/mllib.R +++ b/R/pkg/R/mllib.R @@ -286,8 +286,7 @@ print.summary.GeneralizedLinearRegressionModel <- function(x, ...) { " on", format(unlist(x[c("df.null", "df.residual")])), " degrees of freedom\n"), 1L, paste, collapse = " "), sep = "") cat("AIC: ", format(x$aic, digits = 4L), "\n\n", - "Number of Fisher Scoring iterations: ", x$iter, "\n", sep = "") - cat("\n") + "Number of Fisher Scoring iterations: ", x$iter, "\n\n", sep = "") invisible(x) } @@ -477,8 +476,8 @@ setMethod("spark.isoreg", signature(data = "SparkDataFrame", formula = "formula" } jobj <- callJStatic("org.apache.spark.ml.r.IsotonicRegressionWrapper", "fit", - data@sdf, formula, as.logical(isotonic), as.integer(featureIndex), - as.character(weightCol)) + data@sdf, formula, as.logical(isotonic), as.integer(featureIndex), + as.character(weightCol)) new("IsotonicRegressionModel", jobj = jobj) }) @@ -617,7 +616,7 @@ setMethod("summary", signature(object = "KMeansModel"), dataFrame(callJMethod(jobj, "cluster")) } list(coefficients = coefficients, size = size, - cluster = cluster, is.loaded = is.loaded) + cluster = cluster, is.loaded = is.loaded) }) # Predicted values based on a k-means model @@ -787,17 +786,17 @@ read.ml <- function(path) { } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.AFTSurvivalRegressionWrapper")) { new("AFTSurvivalRegressionModel", jobj = jobj) } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.GeneralizedLinearRegressionWrapper")) { - new("GeneralizedLinearRegressionModel", jobj = jobj) + new("GeneralizedLinearRegressionModel", jobj = jobj) } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.KMeansWrapper")) { - new("KMeansModel", jobj = jobj) + new("KMeansModel", jobj = jobj) } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.LDAWrapper")) { - new("LDAModel", jobj = jobj) + new("LDAModel", jobj = jobj) } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.IsotonicRegressionWrapper")) { - new("IsotonicRegressionModel", jobj = jobj) + new("IsotonicRegressionModel", jobj = jobj) } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.GaussianMixtureWrapper")) { - new("GaussianMixtureModel", jobj = jobj) + new("GaussianMixtureModel", jobj = jobj) } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.ALSWrapper")) { - new("ALSModel", jobj = jobj) + new("ALSModel", jobj = jobj) } else { stop(paste("Unsupported model: ", jobj)) } @@ -1035,7 +1034,7 @@ setMethod("summary", signature(object = "GaussianMixtureModel"), dataFrame(callJMethod(jobj, "posterior")) } list(lambda = lambda, mu = mu, sigma = sigma, - posterior = posterior, is.loaded = is.loaded) + posterior = posterior, is.loaded = is.loaded) }) # Predicted values based on a gaussian mixture model @@ -1154,7 +1153,7 @@ setMethod("summary", signature(object = "ALSModel"), itemFactors <- dataFrame(callJMethod(jobj, "itemFactors")) rank <- callJMethod(jobj, "rank") list(user = user, item = item, rating = rating, userFactors = userFactors, - itemFactors = itemFactors, rank = rank) + itemFactors = itemFactors, rank = rank) })