apache
diff --git a/‎R/pkg/R/DataFrame.R‎
Lines changed: 31 additions & 12 deletions b/‎R/pkg/R/DataFrame.R‎
Lines changed: 31 additions & 12 deletions
diff --git a/‎R/pkg/R/SQLContext.R‎
Lines changed: 17 additions & 6 deletions b/‎R/pkg/R/SQLContext.R‎
Lines changed: 17 additions & 6 deletions
diff --git a/‎R/pkg/R/generics.R‎
Lines changed: 6 additions & 4 deletions b/‎R/pkg/R/generics.R‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎R/pkg/R/utils.R‎
Lines changed: 22 additions & 0 deletions b/‎R/pkg/R/utils.R‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎R/pkg/inst/tests/testthat/test_mllib.R‎
Lines changed: 10 additions & 0 deletions b/‎R/pkg/inst/tests/testthat/test_mllib.R‎
Lines changed: 10 additions & 0 deletions
@@ -55,6 +55,19 @@ setMethod("initialize", "SparkDataFrame", function(.Object, sdf, isCached) {
   .Object
 })
 
+#' Set options/mode and then return the write object
+#' @noRd
+setWriteOptions <- function(write, path = NULL, mode = "error", ...) {
+    options <- varargsToStrEnv(...)
+    if (!is.null(path)) {
+      options[["path"]] <- path
+    }
+    jmode <- convertToJSaveMode(mode)
+    write <- callJMethod(write, "mode", jmode)
+    write <- callJMethod(write, "options", options)
+    write
+}
+
 #' @export
 #' @param sdf A Java object reference to the backing Scala DataFrame
 #' @param isCached TRUE if the SparkDataFrame is cached
@@ -727,6 +740,8 @@ setMethod("toJSON",
 #'
 #' @param x A SparkDataFrame
 #' @param path The directory where the file is saved
+#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default)
+#' @param ... additional argument(s) passed to the method.
 #'
 #' @family SparkDataFrame functions
 #' @rdname write.json
@@ -743,8 +758,9 @@ setMethod("toJSON",
 #' @note write.json since 1.6.0
 setMethod("write.json",
           signature(x = "SparkDataFrame", path = "character"),
-          function(x, path) {
+          function(x, path, mode = "error", ...) {
             write <- callJMethod(x@sdf, "write")
+            write <- setWriteOptions(write, mode = mode, ...)
             invisible(callJMethod(write, "json", path))
           })
 
@@ -755,6 +771,8 @@ setMethod("write.json",
 #'
 #' @param x A SparkDataFrame
 #' @param path The directory where the file is saved
+#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default)
+#' @param ... additional argument(s) passed to the method.
 #'
 #' @family SparkDataFrame functions
 #' @aliases write.orc,SparkDataFrame,character-method
@@ -771,8 +789,9 @@ setMethod("write.json",
 #' @note write.orc since 2.0.0
 setMethod("write.orc",
           signature(x = "SparkDataFrame", path = "character"),
-          function(x, path) {
+          function(x, path, mode = "error", ...) {
             write <- callJMethod(x@sdf, "write")
+            write <- setWriteOptions(write, mode = mode, ...)
             invisible(callJMethod(write, "orc", path))
           })
 
@@ -783,6 +802,8 @@ setMethod("write.orc",
 #'
 #' @param x A SparkDataFrame
 #' @param path The directory where the file is saved
+#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default)
+#' @param ... additional argument(s) passed to the method.
 #'
 #' @family SparkDataFrame functions
 #' @rdname write.parquet
@@ -800,8 +821,9 @@ setMethod("write.orc",
 #' @note write.parquet since 1.6.0
 setMethod("write.parquet",
           signature(x = "SparkDataFrame", path = "character"),
-          function(x, path) {
+          function(x, path, mode = "error", ...) {
             write <- callJMethod(x@sdf, "write")
+            write <- setWriteOptions(write, mode = mode, ...)
             invisible(callJMethod(write, "parquet", path))
           })
 
@@ -825,6 +847,8 @@ setMethod("saveAsParquetFile",
 #'
 #' @param x A SparkDataFrame
 #' @param path The directory where the file is saved
+#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default)
+#' @param ... additional argument(s) passed to the method.
 #'
 #' @family SparkDataFrame functions
 #' @aliases write.text,SparkDataFrame,character-method
@@ -841,8 +865,9 @@ setMethod("saveAsParquetFile",
 #' @note write.text since 2.0.0
 setMethod("write.text",
           signature(x = "SparkDataFrame", path = "character"),
-          function(x, path) {
+          function(x, path, mode = "error", ...) {
             write <- callJMethod(x@sdf, "write")
+            write <- setWriteOptions(write, mode = mode, ...)
             invisible(callJMethod(write, "text", path))
           })
 
@@ -2637,15 +2662,9 @@ setMethod("write.df",
             if (is.null(source)) {
               source <- getDefaultSqlSource()
             }
-            jmode <- convertToJSaveMode(mode)
-            options <- varargsToEnv(...)
-            if (!is.null(path)) {
-              options[["path"]] <- path
-            }
             write <- callJMethod(df@sdf, "write")
             write <- callJMethod(write, "format", source)
-            write <- callJMethod(write, "mode", jmode)
-            write <- callJMethod(write, "options", options)
+            write <- setWriteOptions(write, path = path, mode = mode, ...)
             write <- handledCallJMethod(write, "save")
           })
 
@@ -2701,7 +2720,7 @@ setMethod("saveAsTable",
               source <- getDefaultSqlSource()
             }
             jmode <- convertToJSaveMode(mode)
-            options <- varargsToEnv(...)
+            options <- varargsToStrEnv(...)
 
             write <- callJMethod(df@sdf, "write")
             write <- callJMethod(write, "format", source)
 
@@ -328,6 +328,7 @@ setMethod("toDF", signature(x = "RDD"),
 #' It goes through the entire dataset once to determine the schema.
 #'
 #' @param path Path of file to read. A vector of multiple paths is allowed.
+#' @param ... additional external data source specific named properties.
 #' @return SparkDataFrame
 #' @rdname read.json
 #' @export
@@ -341,11 +342,13 @@ setMethod("toDF", signature(x = "RDD"),
 #' @name read.json
 #' @method read.json default
 #' @note read.json since 1.6.0
-read.json.default <- function(path) {
+read.json.default <- function(path, ...) {
   sparkSession <- getSparkSession()
+  options <- varargsToStrEnv(...)
   # Allow the user to have a more flexible definiton of the text file path
   paths <- as.list(suppressWarnings(normalizePath(path)))
   read <- callJMethod(sparkSession, "read")
+  read <- callJMethod(read, "options", options)
   sdf <- callJMethod(read, "json", paths)
   dataFrame(sdf)
 }
@@ -405,16 +408,19 @@ jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) {
 #' Loads an ORC file, returning the result as a SparkDataFrame.
 #'
 #' @param path Path of file to read.
+#' @param ... additional external data source specific named properties.
 #' @return SparkDataFrame
 #' @rdname read.orc
 #' @export
 #' @name read.orc
 #' @note read.orc since 2.0.0
-read.orc <- function(path) {
+read.orc <- function(path, ...) {
   sparkSession <- getSparkSession()
+  options <- varargsToStrEnv(...)
   # Allow the user to have a more flexible definiton of the ORC file path
   path <- suppressWarnings(normalizePath(path))
   read <- callJMethod(sparkSession, "read")
+  read <- callJMethod(read, "options", options)
   sdf <- callJMethod(read, "orc", path)
   dataFrame(sdf)
 }
@@ -430,11 +436,13 @@ read.orc <- function(path) {
 #' @name read.parquet
 #' @method read.parquet default
 #' @note read.parquet since 1.6.0
-read.parquet.default <- function(path) {
+read.parquet.default <- function(path, ...) {
   sparkSession <- getSparkSession()
+  options <- varargsToStrEnv(...)
   # Allow the user to have a more flexible definiton of the Parquet file path
   paths <- as.list(suppressWarnings(normalizePath(path)))
   read <- callJMethod(sparkSession, "read")
+  read <- callJMethod(read, "options", options)
   sdf <- callJMethod(read, "parquet", paths)
   dataFrame(sdf)
 }
@@ -467,6 +475,7 @@ parquetFile <- function(x, ...) {
 #' Each line in the text file is a new row in the resulting SparkDataFrame.
 #'
 #' @param path Path of file to read. A vector of multiple paths is allowed.
+#' @param ... additional external data source specific named properties.
 #' @return SparkDataFrame
 #' @rdname read.text
 #' @export
@@ -479,11 +488,13 @@ parquetFile <- function(x, ...) {
 #' @name read.text
 #' @method read.text default
 #' @note read.text since 1.6.1
-read.text.default <- function(path) {
+read.text.default <- function(path, ...) {
   sparkSession <- getSparkSession()
+  options <- varargsToStrEnv(...)
   # Allow the user to have a more flexible definiton of the text file path
   paths <- as.list(suppressWarnings(normalizePath(path)))
   read <- callJMethod(sparkSession, "read")
+  read <- callJMethod(read, "options", options)
   sdf <- callJMethod(read, "text", paths)
   dataFrame(sdf)
 }
@@ -779,7 +790,7 @@ read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.string
          "in 'spark.sql.sources.default' configuration by default.")
   }
   sparkSession <- getSparkSession()
-  options <- varargsToEnv(...)
+  options <- varargsToStrEnv(...)
   if (!is.null(path)) {
     options[["path"]] <- path
   }
@@ -842,7 +853,7 @@ loadDF <- function(x = NULL, ...) {
 #' @note createExternalTable since 1.4.0
 createExternalTable.default <- function(tableName, path = NULL, source = NULL, ...) {
   sparkSession <- getSparkSession()
-  options <- varargsToEnv(...)
+  options <- varargsToStrEnv(...)
   if (!is.null(path)) {
     options[["path"]] <- path
   }
 
@@ -651,23 +651,25 @@ setGeneric("write.jdbc", function(x, url, tableName, mode = "error", ...) {
 
 #' @rdname write.json
 #' @export
-setGeneric("write.json", function(x, path) { standardGeneric("write.json") })
+setGeneric("write.json", function(x, path, ...) { standardGeneric("write.json") })
 
 #' @rdname write.orc
 #' @export
-setGeneric("write.orc", function(x, path) { standardGeneric("write.orc") })
+setGeneric("write.orc", function(x, path, ...) { standardGeneric("write.orc") })
 
 #' @rdname write.parquet
 #' @export
-setGeneric("write.parquet", function(x, path) { standardGeneric("write.parquet") })
+setGeneric("write.parquet", function(x, path, ...) {
+  standardGeneric("write.parquet")
+})
 
 #' @rdname write.parquet
 #' @export
 setGeneric("saveAsParquetFile", function(x, path) { standardGeneric("saveAsParquetFile") })
 
 #' @rdname write.text
 #' @export
-setGeneric("write.text", function(x, path) { standardGeneric("write.text") })
+setGeneric("write.text", function(x, path, ...) { standardGeneric("write.text") })
 
 #' @rdname schema
 #' @export
 
@@ -334,6 +334,28 @@ varargsToEnv <- function(...) {
   env
 }
 
+# Utility function to capture the varargs into environment object but all values are converted
+# into string.
+varargsToStrEnv <- function(...) {
+  pairs <- list(...)
+  env <- new.env()
+  for (name in names(pairs)) {
+    value <- pairs[[name]]
+    if (!(is.logical(value) || is.numeric(value) || is.character(value) || is.null(value))) {
+      stop(paste0("Unsupported type for ", name, " : ", class(value),
+           ". Supported types are logical, numeric, character and NULL."))
+    }
+    if (is.logical(value)) {
+      env[[name]] <- tolower(as.character(value))
+    } else if (is.null(value)) {
+      env[[name]] <- value
+    } else {
+      env[[name]] <- as.character(value)
+    }
+  }
+  env
+}
+
 getStorageLevel <- function(newLevel = c("DISK_ONLY",
                                          "DISK_ONLY_2",
                                          "MEMORY_AND_DISK",
 
@@ -481,6 +481,16 @@ test_that("spark.naiveBayes", {
     expect_error(m <- e1071::naiveBayes(Survived ~ ., data = t1), NA)
     expect_equal(as.character(predict(m, t1[1, ])), "Yes")
   }
+
+  # Test numeric response variable
+  t1$NumericSurvived <- ifelse(t1$Survived == "No", 0, 1)
+  t2 <- t1[-4]
+  df <- suppressWarnings(createDataFrame(t2))
+  m <- spark.naiveBayes(df, NumericSurvived ~ ., smoothing = 0.0)
+  s <- summary(m)
+  expect_equal(as.double(s$apriori[1, 1]), 0.5833333, tolerance = 1e-6)
+  expect_equal(sum(s$apriori), 1)
+  expect_equal(as.double(s$tables[1, "Age_Adult"]), 0.5714286, tolerance = 1e-6)
 })
 
 test_that("spark.survreg", {