Skip to content

Commit e21d97d

Browse files
committed
Merge branch 'master' of https://github.com/apache/spark into SPARK-31167-missing-test-deps
2 parents bae2c0c + f7d9e3d commit e21d97d

File tree

446 files changed

+13020
-5262
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

446 files changed

+13020
-5262
lines changed

R/WINDOWS.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ To build SparkR on Windows, the following steps are required
2222

2323
1. Make sure `bash` is available and in `PATH` if you already have a built-in `bash` on Windows. If you do not have, install [Cygwin](https://www.cygwin.com/).
2424

25-
2. Install R (>= 3.1) and [Rtools](https://cloud.r-project.org/bin/windows/Rtools/). Make sure to
26-
include Rtools and R in `PATH`. Note that support for R prior to version 3.4 is deprecated as of Spark 3.0.0.
25+
2. Install R (>= 3.5) and [Rtools](https://cloud.r-project.org/bin/windows/Rtools/). Make sure to
26+
include Rtools and R in `PATH`.
2727

2828
3. Install JDK that SparkR supports (see `R/pkg/DESCRIPTION`), and set `JAVA_HOME` in the system environment variables.
2929

R/install-dev.bat

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,13 @@ set SPARK_HOME=%~dp0..
2424

2525
MKDIR %SPARK_HOME%\R\lib
2626

27-
R.exe CMD INSTALL --library="%SPARK_HOME%\R\lib" %SPARK_HOME%\R\pkg\
27+
rem When you pass the package path directly as an argument to R CMD INSTALL,
28+
rem it takes the path as 'C:\projects\spark\R\..\R\pkg"' as an example at
29+
rem R 4.0. To work around this, directly go to the directoy and install it.
30+
rem See also SPARK-32074
31+
pushd %SPARK_HOME%\R\pkg\
32+
R.exe CMD INSTALL --library="%SPARK_HOME%\R\lib" .
33+
popd
2834

2935
rem Zip the SparkR package so that it can be distributed to worker nodes on YARN
3036
pushd %SPARK_HOME%\R\lib

R/pkg/DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ URL: https://www.apache.org/ https://spark.apache.org/
1515
BugReports: https://spark.apache.org/contributing.html
1616
SystemRequirements: Java (>= 8, < 12)
1717
Depends:
18-
R (>= 3.1),
18+
R (>= 3.5),
1919
methods
2020
Suggests:
2121
knitr,

R/pkg/R/mllib_fpm.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,12 @@ setMethod("spark.freqItemsets", signature(object = "FPGrowthModel"),
122122
# Get association rules.
123123

124124
#' @return A \code{SparkDataFrame} with association rules.
125-
#' The \code{SparkDataFrame} contains four columns:
125+
#' The \code{SparkDataFrame} contains five columns:
126126
#' \code{antecedent} (an array of the same type as the input column),
127127
#' \code{consequent} (an array of the same type as the input column),
128128
#' \code{condfidence} (confidence for the rule)
129-
#' and \code{lift} (lift for the rule)
129+
#' \code{lift} (lift for the rule)
130+
#' and \code{support} (support for the rule)
130131
#' @rdname spark.fpGrowth
131132
#' @aliases associationRules,FPGrowthModel-method
132133
#' @note spark.associationRules(FPGrowthModel) since 2.2.0

R/pkg/R/utils.R

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,10 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
529529
# Namespaces other than "SparkR" will not be searched.
530530
if (!isNamespace(func.env) ||
531531
(getNamespaceName(func.env) == "SparkR" &&
532-
!(nodeChar %in% getNamespaceExports("SparkR")))) {
532+
!(nodeChar %in% getNamespaceExports("SparkR")) &&
533+
# Note that generic S4 methods should not be set to the environment of
534+
# cleaned closure. It does not work with R 4.0.0+. See also SPARK-31918.
535+
nodeChar != "" && !methods::isGeneric(nodeChar, func.env))) {
533536
# Only include SparkR internals.
534537

535538
# Set parameter 'inherits' to FALSE since we do not need to search in

R/pkg/inst/profile/general.R

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,6 @@
1616
#
1717

1818
.First <- function() {
19-
if (utils::compareVersion(paste0(R.version$major, ".", R.version$minor), "3.4.0") == -1) {
20-
warning("Support for R prior to version 3.4 is deprecated since Spark 3.0.0")
21-
}
22-
2319
packageDir <- Sys.getenv("SPARKR_PACKAGE_DIR")
2420
dirs <- strsplit(packageDir, ",")[[1]]
2521
.libPaths(c(dirs, .libPaths()))

R/pkg/inst/profile/shell.R

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,6 @@
1616
#
1717

1818
.First <- function() {
19-
if (utils::compareVersion(paste0(R.version$major, ".", R.version$minor), "3.4.0") == -1) {
20-
warning("Support for R prior to version 3.4 is deprecated since Spark 3.0.0")
21-
}
22-
2319
home <- Sys.getenv("SPARK_HOME")
2420
.libPaths(c(file.path(home, "R", "lib"), .libPaths()))
2521
Sys.setenv(NOAWT = 1)

R/pkg/tests/fulltests/test_context.R

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ test_that("Check masked functions", {
2626
"colnames", "colnames<-", "intersect", "rank", "rbind", "sample", "subset",
2727
"summary", "transform", "drop", "window", "as.data.frame", "union", "not")
2828
version <- packageVersion("base")
29-
if (as.numeric(version$major) >= 3 && as.numeric(version$minor) >= 3) {
29+
is33Above <- as.numeric(version$major) >= 3 && as.numeric(version$minor) >= 3
30+
is40Above <- as.numeric(version$major) >= 4
31+
if (is33Above || is40Above) {
3032
namesOfMasked <- c("endsWith", "startsWith", namesOfMasked)
3133
}
3234
masked <- conflicts(detail = TRUE)$`package:SparkR`

R/pkg/tests/fulltests/test_mllib_classification.R

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ test_that("spark.svmLinear", {
3434
summary <- summary(model)
3535

3636
# test summary coefficients return matrix type
37-
expect_true(class(summary$coefficients) == "matrix")
37+
expect_true(any(class(summary$coefficients) == "matrix"))
3838
expect_true(class(summary$coefficients[, 1]) == "numeric")
3939

4040
coefs <- summary$coefficients[, "Estimate"]
@@ -130,7 +130,7 @@ test_that("spark.logit", {
130130
summary <- summary(model)
131131

132132
# test summary coefficients return matrix type
133-
expect_true(class(summary$coefficients) == "matrix")
133+
expect_true(any(class(summary$coefficients) == "matrix"))
134134
expect_true(class(summary$coefficients[, 1]) == "numeric")
135135

136136
versicolorCoefsR <- c(1.52, 0.03, -0.53, 0.04, 0.00)
@@ -242,8 +242,8 @@ test_that("spark.logit", {
242242
# Test binomial logistic regression against two classes with upperBoundsOnCoefficients
243243
# and upperBoundsOnIntercepts
244244
u <- matrix(c(1.0, 0.0, 1.0, 0.0), nrow = 1, ncol = 4)
245-
model <- spark.logit(training, Species ~ ., upperBoundsOnCoefficients = u,
246-
upperBoundsOnIntercepts = 1.0)
245+
model <- suppressWarnings(spark.logit(training, Species ~ ., upperBoundsOnCoefficients = u,
246+
upperBoundsOnIntercepts = 1.0))
247247
summary <- summary(model)
248248
coefsR <- c(-11.13331, 1.00000, 0.00000, 1.00000, 0.00000)
249249
coefs <- summary$coefficients[, "Estimate"]
@@ -255,8 +255,8 @@ test_that("spark.logit", {
255255
# Test binomial logistic regression against two classes with lowerBoundsOnCoefficients
256256
# and lowerBoundsOnIntercepts
257257
l <- matrix(c(0.0, -1.0, 0.0, -1.0), nrow = 1, ncol = 4)
258-
model <- spark.logit(training, Species ~ ., lowerBoundsOnCoefficients = l,
259-
lowerBoundsOnIntercepts = 0.0)
258+
model <- suppressWarnings(spark.logit(training, Species ~ ., lowerBoundsOnCoefficients = l,
259+
lowerBoundsOnIntercepts = 0.0))
260260
summary <- summary(model)
261261
coefsR <- c(0, 0, -1, 0, 1.902192)
262262
coefs <- summary$coefficients[, "Estimate"]
@@ -268,9 +268,9 @@ test_that("spark.logit", {
268268
# Test multinomial logistic regression with lowerBoundsOnCoefficients
269269
# and lowerBoundsOnIntercepts
270270
l <- matrix(c(0.0, -1.0, 0.0, -1.0, 0.0, -1.0, 0.0, -1.0), nrow = 2, ncol = 4)
271-
model <- spark.logit(training, Species ~ ., family = "multinomial",
272-
lowerBoundsOnCoefficients = l,
273-
lowerBoundsOnIntercepts = as.array(c(0.0, 0.0)))
271+
model <- suppressWarnings(spark.logit(training, Species ~ ., family = "multinomial",
272+
lowerBoundsOnCoefficients = l,
273+
lowerBoundsOnIntercepts = as.array(c(0.0, 0.0))))
274274
summary <- summary(model)
275275
versicolorCoefsR <- c(42.639465, 7.258104, 14.330814, 16.298243, 11.716429)
276276
virginicaCoefsR <- c(0.0002970796, 4.79274, 7.65047, 25.72793, 30.0021)

R/pkg/tests/fulltests/test_mllib_clustering.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ test_that("spark.kmeans", {
171171
expect_equal(sort(collect(distinct(select(cluster, "prediction")))$prediction), c(0, 1))
172172

173173
# test summary coefficients return matrix type
174-
expect_true(class(summary.model$coefficients) == "matrix")
174+
expect_true(any(class(summary.model$coefficients) == "matrix"))
175175
expect_true(class(summary.model$coefficients[1, ]) == "numeric")
176176

177177
# Test model save/load

0 commit comments

Comments
 (0)