Skip to content

Commit b0b14b0

Browse files
committed
merge 'master' and resolve conflicts
2 parents 8e71b45 + 1e6f760 commit b0b14b0

File tree

302 files changed

+10134
-7061
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

302 files changed

+10134
-7061
lines changed

R/pkg/NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ exportMethods("%<=>%",
232232
"date_sub",
233233
"datediff",
234234
"dayofmonth",
235+
"dayofweek",
235236
"dayofyear",
236237
"decode",
237238
"dense_rank",

R/pkg/R/DataFrame.R

Lines changed: 53 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,23 @@ setMethod("initialize", "SparkDataFrame", function(.Object, sdf, isCached) {
5858
#' Set options/mode and then return the write object
5959
#' @noRd
6060
setWriteOptions <- function(write, path = NULL, mode = "error", ...) {
61-
options <- varargsToStrEnv(...)
62-
if (!is.null(path)) {
63-
options[["path"]] <- path
64-
}
65-
jmode <- convertToJSaveMode(mode)
66-
write <- callJMethod(write, "mode", jmode)
67-
write <- callJMethod(write, "options", options)
68-
write
61+
options <- varargsToStrEnv(...)
62+
if (!is.null(path)) {
63+
options[["path"]] <- path
64+
}
65+
write <- setWriteMode(write, mode)
66+
write <- callJMethod(write, "options", options)
67+
write
68+
}
69+
70+
#' Set mode and then return the write object
71+
#' @noRd
72+
setWriteMode <- function(write, mode) {
73+
if (!is.character(mode)) {
74+
stop("mode should be character or omitted. It is 'error' by default.")
75+
}
76+
write <- handledCallJMethod(write, "mode", mode)
77+
write
6978
}
7079

7180
#' @export
@@ -556,9 +565,8 @@ setMethod("registerTempTable",
556565
setMethod("insertInto",
557566
signature(x = "SparkDataFrame", tableName = "character"),
558567
function(x, tableName, overwrite = FALSE) {
559-
jmode <- convertToJSaveMode(ifelse(overwrite, "overwrite", "append"))
560568
write <- callJMethod(x@sdf, "write")
561-
write <- callJMethod(write, "mode", jmode)
569+
write <- setWriteMode(write, ifelse(overwrite, "overwrite", "append"))
562570
invisible(callJMethod(write, "insertInto", tableName))
563571
})
564572

@@ -810,7 +818,8 @@ setMethod("toJSON",
810818
#'
811819
#' @param x A SparkDataFrame
812820
#' @param path The directory where the file is saved
813-
#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default)
821+
#' @param mode one of 'append', 'overwrite', 'error', 'errorifexists', 'ignore'
822+
#' save mode (it is 'error' by default)
814823
#' @param ... additional argument(s) passed to the method.
815824
#'
816825
#' @family SparkDataFrame functions
@@ -841,7 +850,8 @@ setMethod("write.json",
841850
#'
842851
#' @param x A SparkDataFrame
843852
#' @param path The directory where the file is saved
844-
#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default)
853+
#' @param mode one of 'append', 'overwrite', 'error', 'errorifexists', 'ignore'
854+
#' save mode (it is 'error' by default)
845855
#' @param ... additional argument(s) passed to the method.
846856
#'
847857
#' @family SparkDataFrame functions
@@ -872,7 +882,8 @@ setMethod("write.orc",
872882
#'
873883
#' @param x A SparkDataFrame
874884
#' @param path The directory where the file is saved
875-
#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default)
885+
#' @param mode one of 'append', 'overwrite', 'error', 'errorifexists', 'ignore'
886+
#' save mode (it is 'error' by default)
876887
#' @param ... additional argument(s) passed to the method.
877888
#'
878889
#' @family SparkDataFrame functions
@@ -917,7 +928,8 @@ setMethod("saveAsParquetFile",
917928
#'
918929
#' @param x A SparkDataFrame
919930
#' @param path The directory where the file is saved
920-
#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default)
931+
#' @param mode one of 'append', 'overwrite', 'error', 'errorifexists', 'ignore'
932+
#' save mode (it is 'error' by default)
921933
#' @param ... additional argument(s) passed to the method.
922934
#'
923935
#' @family SparkDataFrame functions
@@ -2871,18 +2883,19 @@ setMethod("except",
28712883
#' Additionally, mode is used to specify the behavior of the save operation when data already
28722884
#' exists in the data source. There are four modes:
28732885
#' \itemize{
2874-
#' \item append: Contents of this SparkDataFrame are expected to be appended to existing data.
2875-
#' \item overwrite: Existing data is expected to be overwritten by the contents of this
2886+
#' \item 'append': Contents of this SparkDataFrame are expected to be appended to existing data.
2887+
#' \item 'overwrite': Existing data is expected to be overwritten by the contents of this
28762888
#' SparkDataFrame.
2877-
#' \item error: An exception is expected to be thrown.
2878-
#' \item ignore: The save operation is expected to not save the contents of the SparkDataFrame
2889+
#' \item 'error' or 'errorifexists': An exception is expected to be thrown.
2890+
#' \item 'ignore': The save operation is expected to not save the contents of the SparkDataFrame
28792891
#' and to not change the existing data.
28802892
#' }
28812893
#'
28822894
#' @param df a SparkDataFrame.
28832895
#' @param path a name for the table.
28842896
#' @param source a name for external data source.
2885-
#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default)
2897+
#' @param mode one of 'append', 'overwrite', 'error', 'errorifexists', 'ignore'
2898+
#' save mode (it is 'error' by default)
28862899
#' @param ... additional argument(s) passed to the method.
28872900
#'
28882901
#' @family SparkDataFrame functions
@@ -2940,17 +2953,18 @@ setMethod("saveDF",
29402953
#'
29412954
#' Additionally, mode is used to specify the behavior of the save operation when
29422955
#' data already exists in the data source. There are four modes: \cr
2943-
#' append: Contents of this SparkDataFrame are expected to be appended to existing data. \cr
2944-
#' overwrite: Existing data is expected to be overwritten by the contents of this
2956+
#' 'append': Contents of this SparkDataFrame are expected to be appended to existing data. \cr
2957+
#' 'overwrite': Existing data is expected to be overwritten by the contents of this
29452958
#' SparkDataFrame. \cr
2946-
#' error: An exception is expected to be thrown. \cr
2947-
#' ignore: The save operation is expected to not save the contents of the SparkDataFrame
2959+
#' 'error' or 'errorifexists': An exception is expected to be thrown. \cr
2960+
#' 'ignore': The save operation is expected to not save the contents of the SparkDataFrame
29482961
#' and to not change the existing data. \cr
29492962
#'
29502963
#' @param df a SparkDataFrame.
29512964
#' @param tableName a name for the table.
29522965
#' @param source a name for external data source.
2953-
#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default).
2966+
#' @param mode one of 'append', 'overwrite', 'error', 'errorifexists', 'ignore'
2967+
#' save mode (it is 'error' by default)
29542968
#' @param ... additional option(s) passed to the method.
29552969
#'
29562970
#' @family SparkDataFrame functions
@@ -2972,12 +2986,11 @@ setMethod("saveAsTable",
29722986
if (is.null(source)) {
29732987
source <- getDefaultSqlSource()
29742988
}
2975-
jmode <- convertToJSaveMode(mode)
29762989
options <- varargsToStrEnv(...)
29772990

29782991
write <- callJMethod(df@sdf, "write")
29792992
write <- callJMethod(write, "format", source)
2980-
write <- callJMethod(write, "mode", jmode)
2993+
write <- setWriteMode(write, mode)
29812994
write <- callJMethod(write, "options", options)
29822995
invisible(callJMethod(write, "saveAsTable", tableName))
29832996
})
@@ -3236,7 +3249,7 @@ setMethod("as.data.frame",
32363249
#'
32373250
#' @family SparkDataFrame functions
32383251
#' @rdname attach
3239-
#' @aliases attach,SparkDataFrame-method
3252+
#' @aliases attach attach,SparkDataFrame-method
32403253
#' @param what (SparkDataFrame) The SparkDataFrame to attach
32413254
#' @param pos (integer) Specify position in search() where to attach.
32423255
#' @param name (character) Name to use for the attached SparkDataFrame. Names
@@ -3252,9 +3265,12 @@ setMethod("as.data.frame",
32523265
#' @note attach since 1.6.0
32533266
setMethod("attach",
32543267
signature(what = "SparkDataFrame"),
3255-
function(what, pos = 2, name = deparse(substitute(what)), warn.conflicts = TRUE) {
3256-
newEnv <- assignNewEnv(what)
3257-
attach(newEnv, pos = pos, name = name, warn.conflicts = warn.conflicts)
3268+
function(what, pos = 2L, name = deparse(substitute(what), backtick = FALSE),
3269+
warn.conflicts = TRUE) {
3270+
args <- as.list(environment()) # capture all parameters - this must be the first line
3271+
newEnv <- assignNewEnv(args$what)
3272+
args$what <- newEnv
3273+
do.call(attach, args)
32583274
})
32593275

32603276
#' Evaluate a R expression in an environment constructed from a SparkDataFrame
@@ -3541,18 +3557,19 @@ setMethod("histogram",
35413557
#' Also, mode is used to specify the behavior of the save operation when
35423558
#' data already exists in the data source. There are four modes:
35433559
#' \itemize{
3544-
#' \item append: Contents of this SparkDataFrame are expected to be appended to existing data.
3545-
#' \item overwrite: Existing data is expected to be overwritten by the contents of this
3560+
#' \item 'append': Contents of this SparkDataFrame are expected to be appended to existing data.
3561+
#' \item 'overwrite': Existing data is expected to be overwritten by the contents of this
35463562
#' SparkDataFrame.
3547-
#' \item error: An exception is expected to be thrown.
3548-
#' \item ignore: The save operation is expected to not save the contents of the SparkDataFrame
3563+
#' \item 'error' or 'errorifexists': An exception is expected to be thrown.
3564+
#' \item 'ignore': The save operation is expected to not save the contents of the SparkDataFrame
35493565
#' and to not change the existing data.
35503566
#' }
35513567
#'
35523568
#' @param x a SparkDataFrame.
35533569
#' @param url JDBC database url of the form \code{jdbc:subprotocol:subname}.
35543570
#' @param tableName yhe name of the table in the external database.
3555-
#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default).
3571+
#' @param mode one of 'append', 'overwrite', 'error', 'errorifexists', 'ignore'
3572+
#' save mode (it is 'error' by default)
35563573
#' @param ... additional JDBC database connection properties.
35573574
#' @family SparkDataFrame functions
35583575
#' @rdname write.jdbc
@@ -3569,10 +3586,9 @@ setMethod("histogram",
35693586
setMethod("write.jdbc",
35703587
signature(x = "SparkDataFrame", url = "character", tableName = "character"),
35713588
function(x, url, tableName, mode = "error", ...) {
3572-
jmode <- convertToJSaveMode(mode)
35733589
jprops <- varargsToJProperties(...)
35743590
write <- callJMethod(x@sdf, "write")
3575-
write <- callJMethod(write, "mode", jmode)
3591+
write <- setWriteMode(write, mode)
35763592
invisible(handledCallJMethod(write, "jdbc", url, tableName, jprops))
35773593
})
35783594

R/pkg/R/functions.R

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -696,7 +696,7 @@ setMethod("hash",
696696
#'
697697
#' \dontrun{
698698
#' head(select(df, df$time, year(df$time), quarter(df$time), month(df$time),
699-
#' dayofmonth(df$time), dayofyear(df$time), weekofyear(df$time)))
699+
#' dayofmonth(df$time), dayofweek(df$time), dayofyear(df$time), weekofyear(df$time)))
700700
#' head(agg(groupBy(df, year(df$time)), count(df$y), avg(df$y)))
701701
#' head(agg(groupBy(df, month(df$time)), avg(df$y)))}
702702
#' @note dayofmonth since 1.5.0
@@ -707,6 +707,21 @@ setMethod("dayofmonth",
707707
column(jc)
708708
})
709709

710+
#' @details
711+
#' \code{dayofweek}: Extracts the day of the week as an integer from a
712+
#' given date/timestamp/string.
713+
#'
714+
#' @rdname column_datetime_functions
715+
#' @aliases dayofweek dayofweek,Column-method
716+
#' @export
717+
#' @note dayofweek since 2.3.0
718+
setMethod("dayofweek",
719+
signature(x = "Column"),
720+
function(x) {
721+
jc <- callJStatic("org.apache.spark.sql.functions", "dayofweek", x@jc)
722+
column(jc)
723+
})
724+
710725
#' @details
711726
#' \code{dayofyear}: Extracts the day of the year as an integer from a
712727
#' given date/timestamp/string.

R/pkg/R/generics.R

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,8 @@ setGeneric("as.data.frame",
409409
standardGeneric("as.data.frame")
410410
})
411411

412-
#' @rdname attach
412+
# Do not document the generic because of signature changes across R versions
413+
#' @noRd
413414
#' @export
414415
setGeneric("attach")
415416

@@ -1047,6 +1048,11 @@ setGeneric("date_sub", function(y, x) { standardGeneric("date_sub") })
10471048
#' @name NULL
10481049
setGeneric("dayofmonth", function(x) { standardGeneric("dayofmonth") })
10491050

1051+
#' @rdname column_datetime_functions
1052+
#' @export
1053+
#' @name NULL
1054+
setGeneric("dayofweek", function(x) { standardGeneric("dayofweek") })
1055+
10501056
#' @rdname column_datetime_functions
10511057
#' @export
10521058
#' @name NULL
@@ -1569,12 +1575,9 @@ setGeneric("year", function(x) { standardGeneric("year") })
15691575
#' @export
15701576
setGeneric("fitted")
15711577

1572-
#' @param x,y For \code{glm}: logical values indicating whether the response vector
1573-
#' and model matrix used in the fitting process should be returned as
1574-
#' components of the returned value.
1575-
#' @inheritParams stats::glm
1576-
#' @rdname glm
1578+
# Do not carry stats::glm usage and param here, and do not document the generic
15771579
#' @export
1580+
#' @noRd
15781581
setGeneric("glm")
15791582

15801583
#' @param object a fitted ML model object.

R/pkg/R/install.R

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,11 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL,
152152
})
153153
if (!tarExists || overwrite || !success) {
154154
unlink(packageLocalPath)
155+
if (success) {
156+
# if tar file was not there before (or it was, but we are told to overwrite it),
157+
# and untar is successful - set a flag that we have downloaded (and untar) Spark package.
158+
assign(".sparkDownloaded", TRUE, envir = .sparkREnv)
159+
}
155160
}
156161
if (!success) stop("Extract archive failed.")
157162
message("DONE.")
@@ -266,6 +271,7 @@ hadoopVersionName <- function(hadoopVersion) {
266271

267272
# The implementation refers to appdirs package: https://pypi.python.org/pypi/appdirs and
268273
# adapt to Spark context
274+
# see also sparkCacheRelPathLength()
269275
sparkCachePath <- function() {
270276
if (is_windows()) {
271277
winAppPath <- Sys.getenv("LOCALAPPDATA", unset = NA)
@@ -282,7 +288,7 @@ sparkCachePath <- function() {
282288
}
283289
} else if (.Platform$OS.type == "unix") {
284290
if (Sys.info()["sysname"] == "Darwin") {
285-
path <- file.path(Sys.getenv("HOME"), "Library/Caches", "spark")
291+
path <- file.path(Sys.getenv("HOME"), "Library", "Caches", "spark")
286292
} else {
287293
path <- file.path(
288294
Sys.getenv("XDG_CACHE_HOME", file.path(Sys.getenv("HOME"), ".cache")), "spark")
@@ -293,6 +299,16 @@ sparkCachePath <- function() {
293299
normalizePath(path, mustWork = FALSE)
294300
}
295301

302+
# Length of the Spark cache specific relative path segments for each platform
303+
# eg. "Apache\Spark\Cache" is 3 in Windows, or "spark" is 1 in unix
304+
# Must match sparkCachePath() exactly.
305+
sparkCacheRelPathLength <- function() {
306+
if (is_windows()) {
307+
3
308+
} else {
309+
1
310+
}
311+
}
296312

297313
installInstruction <- function(mode) {
298314
if (mode == "remote") {
@@ -310,3 +326,22 @@ installInstruction <- function(mode) {
310326
stop(paste0("No instruction found for ", mode, " mode."))
311327
}
312328
}
329+
330+
uninstallDownloadedSpark <- function() {
331+
# clean up if Spark was downloaded
332+
sparkDownloaded <- getOne(".sparkDownloaded",
333+
envir = .sparkREnv,
334+
inherits = TRUE,
335+
ifnotfound = FALSE)
336+
sparkDownloadedDir <- Sys.getenv("SPARK_HOME")
337+
if (sparkDownloaded && nchar(sparkDownloadedDir) > 0) {
338+
unlink(sparkDownloadedDir, recursive = TRUE, force = TRUE)
339+
340+
dirs <- traverseParentDirs(sparkCachePath(), sparkCacheRelPathLength())
341+
lapply(dirs, function(d) {
342+
if (length(list.files(d, all.files = TRUE, include.dirs = TRUE, no.. = TRUE)) == 0) {
343+
unlink(d, recursive = TRUE, force = TRUE)
344+
}
345+
})
346+
}
347+
}

R/pkg/R/mllib_regression.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
210210
#' 1.0.
211211
#' @return \code{glm} returns a fitted generalized linear model.
212212
#' @rdname glm
213+
#' @aliases glm
213214
#' @export
214215
#' @examples
215216
#' \dontrun{

R/pkg/R/sparkR.R

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,18 @@ sparkR.session <- function(
420420
enableHiveSupport)
421421
assign(".sparkRsession", sparkSession, envir = .sparkREnv)
422422
}
423+
424+
# Check if version number of SparkSession matches version number of SparkR package
425+
jvmVersion <- callJMethod(sparkSession, "version")
426+
# Remove -SNAPSHOT from jvm versions
427+
jvmVersionStrip <- gsub("-SNAPSHOT", "", jvmVersion)
428+
rPackageVersion <- paste0(packageVersion("SparkR"))
429+
430+
if (jvmVersionStrip != rPackageVersion) {
431+
warning(paste("Version mismatch between Spark JVM and SparkR package. JVM version was",
432+
jvmVersion, ", while R package version was", rPackageVersion))
433+
}
434+
423435
sparkSession
424436
}
425437

0 commit comments

Comments
 (0)