apache
diff --git a/‎R/WINDOWS.md‎
Lines changed: 2 additions & 2 deletions b/‎R/WINDOWS.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎R/install-dev.bat‎
Lines changed: 7 additions & 1 deletion b/‎R/install-dev.bat‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎R/pkg/DESCRIPTION‎
Lines changed: 1 addition & 1 deletion b/‎R/pkg/DESCRIPTION‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/pkg/R/mllib_fpm.R‎
Lines changed: 3 additions & 2 deletions b/‎R/pkg/R/mllib_fpm.R‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎R/pkg/R/utils.R‎
Lines changed: 4 additions & 1 deletion b/‎R/pkg/R/utils.R‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎R/pkg/inst/profile/general.R‎
Lines changed: 0 additions & 4 deletions b/‎R/pkg/inst/profile/general.R‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎R/pkg/inst/profile/shell.R‎
Lines changed: 0 additions & 4 deletions b/‎R/pkg/inst/profile/shell.R‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎R/pkg/tests/fulltests/test_context.R‎
Lines changed: 3 additions & 1 deletion b/‎R/pkg/tests/fulltests/test_context.R‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎R/pkg/tests/fulltests/test_mllib_classification.R‎
Lines changed: 9 additions & 9 deletions b/‎R/pkg/tests/fulltests/test_mllib_classification.R‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎R/pkg/tests/fulltests/test_mllib_clustering.R‎
Lines changed: 1 addition & 1 deletion b/‎R/pkg/tests/fulltests/test_mllib_clustering.R‎
Lines changed: 1 addition & 1 deletion
@@ -22,8 +22,8 @@ To build SparkR on Windows, the following steps are required
 
 1. Make sure `bash` is available and in `PATH` if you already have a built-in `bash` on Windows. If you do not have, install [Cygwin](https://www.cygwin.com/).
 
-2. Install R (>= 3.1) and [Rtools](https://cloud.r-project.org/bin/windows/Rtools/). Make sure to
-include Rtools and R in `PATH`. Note that support for R prior to version 3.4 is deprecated as of Spark 3.0.0.
+2. Install R (>= 3.5) and [Rtools](https://cloud.r-project.org/bin/windows/Rtools/). Make sure to
+include Rtools and R in `PATH`.
 
 3. Install JDK that SparkR supports (see `R/pkg/DESCRIPTION`), and set `JAVA_HOME` in the system environment variables.
 
 
@@ -24,7 +24,13 @@ set SPARK_HOME=%~dp0..
 
 MKDIR %SPARK_HOME%\R\lib
 
-R.exe CMD INSTALL --library="%SPARK_HOME%\R\lib"  %SPARK_HOME%\R\pkg\
+rem When you pass the package path directly as an argument to R CMD INSTALL,
+rem it takes the path as 'C:\projects\spark\R\..\R\pkg"' as an example at
+rem R 4.0. To work around this, directly go to the directoy and install it.
+rem See also SPARK-32074
+pushd %SPARK_HOME%\R\pkg\
+R.exe CMD INSTALL --library="%SPARK_HOME%\R\lib" .
+popd
 
 rem Zip the SparkR package so that it can be distributed to worker nodes on YARN
 pushd %SPARK_HOME%\R\lib
 
@@ -15,7 +15,7 @@ URL: https://www.apache.org/ https://spark.apache.org/
 BugReports: https://spark.apache.org/contributing.html
 SystemRequirements: Java (>= 8, < 12)
 Depends:
-    R (>= 3.1),
+    R (>= 3.5),
     methods
 Suggests:
     knitr,
 
@@ -122,11 +122,12 @@ setMethod("spark.freqItemsets", signature(object = "FPGrowthModel"),
 # Get association rules.
 
 #' @return A \code{SparkDataFrame} with association rules.
-#'         The \code{SparkDataFrame} contains four columns:
+#'         The \code{SparkDataFrame} contains five columns:
 #'         \code{antecedent} (an array of the same type as the input column),
 #'         \code{consequent} (an array of the same type as the input column),
 #'         \code{condfidence} (confidence for the rule)
-#'         and \code{lift} (lift for the rule)
+#'         \code{lift} (lift for the rule)
+#'         and \code{support} (support for the rule)
 #' @rdname spark.fpGrowth
 #' @aliases associationRules,FPGrowthModel-method
 #' @note spark.associationRules(FPGrowthModel) since 2.2.0
 
@@ -529,7 +529,10 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
         # Namespaces other than "SparkR" will not be searched.
         if (!isNamespace(func.env) ||
             (getNamespaceName(func.env) == "SparkR" &&
-               !(nodeChar %in% getNamespaceExports("SparkR")))) {
+               !(nodeChar %in% getNamespaceExports("SparkR")) &&
+                  # Note that generic S4 methods should not be set to the environment of
+                  # cleaned closure. It does not work with R 4.0.0+. See also SPARK-31918.
+                  nodeChar != "" && !methods::isGeneric(nodeChar, func.env))) {
           # Only include SparkR internals.
 
           # Set parameter 'inherits' to FALSE since we do not need to search in
 
@@ -16,10 +16,6 @@
 #
 
 .First <- function() {
-  if (utils::compareVersion(paste0(R.version$major, ".", R.version$minor), "3.4.0") == -1) {
-    warning("Support for R prior to version 3.4 is deprecated since Spark 3.0.0")
-  }
-
   packageDir <- Sys.getenv("SPARKR_PACKAGE_DIR")
   dirs <- strsplit(packageDir, ",")[[1]]
   .libPaths(c(dirs, .libPaths()))
 
@@ -16,10 +16,6 @@
 #
 
 .First <- function() {
-  if (utils::compareVersion(paste0(R.version$major, ".", R.version$minor), "3.4.0") == -1) {
-    warning("Support for R prior to version 3.4 is deprecated since Spark 3.0.0")
-  }
-
   home <- Sys.getenv("SPARK_HOME")
   .libPaths(c(file.path(home, "R", "lib"), .libPaths()))
   Sys.setenv(NOAWT = 1)
 
@@ -26,7 +26,9 @@ test_that("Check masked functions", {
                      "colnames", "colnames<-", "intersect", "rank", "rbind", "sample", "subset",
                      "summary", "transform", "drop", "window", "as.data.frame", "union", "not")
   version <- packageVersion("base")
-  if (as.numeric(version$major) >= 3 && as.numeric(version$minor) >= 3) {
+  is33Above <- as.numeric(version$major) >= 3 && as.numeric(version$minor) >= 3
+  is40Above <- as.numeric(version$major) >= 4
+  if (is33Above || is40Above) {
     namesOfMasked <- c("endsWith", "startsWith", namesOfMasked)
   }
   masked <- conflicts(detail = TRUE)$`package:SparkR`
 
@@ -34,7 +34,7 @@ test_that("spark.svmLinear", {
   summary <- summary(model)
 
   # test summary coefficients return matrix type
-  expect_true(class(summary$coefficients) == "matrix")
+  expect_true(any(class(summary$coefficients) == "matrix"))
   expect_true(class(summary$coefficients[, 1]) == "numeric")
 
   coefs <- summary$coefficients[, "Estimate"]
@@ -130,7 +130,7 @@ test_that("spark.logit", {
   summary <- summary(model)
 
   # test summary coefficients return matrix type
-  expect_true(class(summary$coefficients) == "matrix")
+  expect_true(any(class(summary$coefficients) == "matrix"))
   expect_true(class(summary$coefficients[, 1]) == "numeric")
 
   versicolorCoefsR <- c(1.52, 0.03, -0.53, 0.04, 0.00)
@@ -242,8 +242,8 @@ test_that("spark.logit", {
   # Test binomial logistic regression against two classes with upperBoundsOnCoefficients
   # and upperBoundsOnIntercepts
   u <- matrix(c(1.0, 0.0, 1.0, 0.0), nrow = 1, ncol = 4)
-  model <- spark.logit(training, Species ~ ., upperBoundsOnCoefficients = u,
-                       upperBoundsOnIntercepts = 1.0)
+  model <- suppressWarnings(spark.logit(training, Species ~ ., upperBoundsOnCoefficients = u,
+                                        upperBoundsOnIntercepts = 1.0))
   summary <- summary(model)
   coefsR <- c(-11.13331, 1.00000, 0.00000, 1.00000, 0.00000)
   coefs <- summary$coefficients[, "Estimate"]
@@ -255,8 +255,8 @@ test_that("spark.logit", {
   # Test binomial logistic regression against two classes with lowerBoundsOnCoefficients
   # and lowerBoundsOnIntercepts
   l <- matrix(c(0.0, -1.0, 0.0, -1.0), nrow = 1, ncol = 4)
-  model <- spark.logit(training, Species ~ ., lowerBoundsOnCoefficients = l,
-                       lowerBoundsOnIntercepts = 0.0)
+  model <- suppressWarnings(spark.logit(training, Species ~ ., lowerBoundsOnCoefficients = l,
+                                        lowerBoundsOnIntercepts = 0.0))
   summary <- summary(model)
   coefsR <- c(0, 0, -1, 0, 1.902192)
   coefs <- summary$coefficients[, "Estimate"]
@@ -268,9 +268,9 @@ test_that("spark.logit", {
   # Test multinomial logistic regression with lowerBoundsOnCoefficients
   # and lowerBoundsOnIntercepts
   l <- matrix(c(0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0), nrow = 2, ncol = 4)
-  model <- spark.logit(training, Species ~ ., family = "multinomial",
-                       lowerBoundsOnCoefficients = l,
-                       lowerBoundsOnIntercepts = as.array(c(0.0, 0.0)))
+  model <- suppressWarnings(spark.logit(training, Species ~ ., family = "multinomial",
+                                        lowerBoundsOnCoefficients = l,
+                                        lowerBoundsOnIntercepts = as.array(c(0.0, 0.0))))
   summary <- summary(model)
   versicolorCoefsR <- c(42.639465, 7.258104, 14.330814, 16.298243, 11.716429)
   virginicaCoefsR <- c(0.0002970796, 4.79274, 7.65047, 25.72793, 30.0021)
 
@@ -171,7 +171,7 @@ test_that("spark.kmeans", {
   expect_equal(sort(collect(distinct(select(cluster, "prediction")))$prediction), c(0, 1))
 
   # test summary coefficients return matrix type
-  expect_true(class(summary.model$coefficients) == "matrix")
+  expect_true(any(class(summary.model$coefficients) == "matrix"))
   expect_true(class(summary.model$coefficients[1, ]) == "numeric")
 
   # Test model save/load