Skip to content

Commit ffd134e

Browse files
committed
Merge branch 'master' of github.com:apache/spark into r-submit
2 parents d867756 + c07838b commit ffd134e

File tree

754 files changed

+30189
-10328
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

754 files changed

+30189
-10328
lines changed

.rat-excludes

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,5 @@ help/*
9191
html/*
9292
INDEX
9393
.lintr
94+
gen-java.*
95+
.*avpr

R/install-dev.bat

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,8 @@ set SPARK_HOME=%~dp0..
2525
MKDIR %SPARK_HOME%\R\lib
2626

2727
R.exe CMD INSTALL --library="%SPARK_HOME%\R\lib" %SPARK_HOME%\R\pkg\
28+
29+
rem Zip the SparkR package so that it can be distributed to worker nodes on YARN
30+
pushd %SPARK_HOME%\R\lib
31+
%JAVA_HOME%\bin\jar.exe cfM "%SPARK_HOME%\R\lib\sparkr.zip" SparkR
32+
popd

R/install-dev.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,16 @@ LIB_DIR="$FWDIR/lib"
3434

3535
mkdir -p $LIB_DIR
3636

37-
pushd $FWDIR
37+
pushd $FWDIR > /dev/null
3838

3939
# Generate Rd files if devtools is installed
4040
Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'
4141

4242
# Install SparkR to $LIB_DIR
4343
R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/
4444

45-
popd
45+
# Zip the SparkR package so that it can be distributed to worker nodes on YARN
46+
cd $LIB_DIR
47+
jar cfM "$LIB_DIR/sparkr.zip" SparkR
48+
49+
popd > /dev/null

R/pkg/DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ Collate:
2929
'client.R'
3030
'context.R'
3131
'deserialize.R'
32+
'mllib.R'
3233
'serialize.R'
3334
'sparkR.R'
3435
'utils.R'
35-
'zzz.R'

R/pkg/NAMESPACE

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ export("sparkR.init")
1010
export("sparkR.stop")
1111
export("print.jobj")
1212

13+
# MLlib integration
14+
exportMethods("glm",
15+
"predict")
16+
1317
# Job group lifecycle management methods
1418
export("setJobGroup",
1519
"clearJobGroup",
@@ -77,6 +81,7 @@ exportMethods("abs",
7781
"atan",
7882
"atan2",
7983
"avg",
84+
"between",
8085
"cast",
8186
"cbrt",
8287
"ceiling",

R/pkg/R/DataFrame.R

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1314,7 +1314,7 @@ setMethod("except",
13141314
#' write.df(df, "myfile", "parquet", "overwrite")
13151315
#' }
13161316
setMethod("write.df",
1317-
signature(df = "DataFrame", path = 'character'),
1317+
signature(df = "DataFrame", path = "character"),
13181318
function(df, path, source = NULL, mode = "append", ...){
13191319
if (is.null(source)) {
13201320
sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv)
@@ -1328,7 +1328,7 @@ setMethod("write.df",
13281328
jmode <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "saveMode", mode)
13291329
options <- varargsToEnv(...)
13301330
if (!is.null(path)) {
1331-
options[['path']] = path
1331+
options[["path"]] <- path
13321332
}
13331333
callJMethod(df@sdf, "save", source, jmode, options)
13341334
})
@@ -1337,7 +1337,7 @@ setMethod("write.df",
13371337
#' @aliases saveDF
13381338
#' @export
13391339
setMethod("saveDF",
1340-
signature(df = "DataFrame", path = 'character'),
1340+
signature(df = "DataFrame", path = "character"),
13411341
function(df, path, source = NULL, mode = "append", ...){
13421342
write.df(df, path, source, mode, ...)
13431343
})
@@ -1375,8 +1375,8 @@ setMethod("saveDF",
13751375
#' saveAsTable(df, "myfile")
13761376
#' }
13771377
setMethod("saveAsTable",
1378-
signature(df = "DataFrame", tableName = 'character', source = 'character',
1379-
mode = 'character'),
1378+
signature(df = "DataFrame", tableName = "character", source = "character",
1379+
mode = "character"),
13801380
function(df, tableName, source = NULL, mode="append", ...){
13811381
if (is.null(source)) {
13821382
sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv)

R/pkg/R/RDD.R

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,6 @@ setMethod("getJRDD", signature(rdd = "PipelinedRDD"),
165165
serializedFuncArr,
166166
rdd@env$prev_serializedMode,
167167
packageNamesArr,
168-
as.character(.sparkREnv[["libname"]]),
169168
broadcastArr,
170169
callJMethod(prev_jrdd, "classTag"))
171170
} else {
@@ -175,7 +174,6 @@ setMethod("getJRDD", signature(rdd = "PipelinedRDD"),
175174
rdd@env$prev_serializedMode,
176175
serializedMode,
177176
packageNamesArr,
178-
as.character(.sparkREnv[["libname"]]),
179177
broadcastArr,
180178
callJMethod(prev_jrdd, "classTag"))
181179
}

R/pkg/R/SQLContext.R

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,9 @@ infer_type <- function(x) {
8686
createDataFrame <- function(sqlContext, data, schema = NULL, samplingRatio = 1.0) {
8787
if (is.data.frame(data)) {
8888
# get the names of columns, they will be put into RDD
89-
schema <- names(data)
89+
if (is.null(schema)) {
90+
schema <- names(data)
91+
}
9092
n <- nrow(data)
9193
m <- ncol(data)
9294
# get rid of factor type
@@ -455,7 +457,7 @@ dropTempTable <- function(sqlContext, tableName) {
455457
read.df <- function(sqlContext, path = NULL, source = NULL, schema = NULL, ...) {
456458
options <- varargsToEnv(...)
457459
if (!is.null(path)) {
458-
options[['path']] <- path
460+
options[["path"]] <- path
459461
}
460462
if (is.null(source)) {
461463
sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv)
@@ -504,7 +506,7 @@ loadDF <- function(sqlContext, path = NULL, source = NULL, schema = NULL, ...) {
504506
createExternalTable <- function(sqlContext, tableName, path = NULL, source = NULL, ...) {
505507
options <- varargsToEnv(...)
506508
if (!is.null(path)) {
507-
options[['path']] <- path
509+
options[["path"]] <- path
508510
}
509511
sdf <- callJMethod(sqlContext, "createExternalTable", tableName, source, options)
510512
dataFrame(sdf)

R/pkg/R/client.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ connectBackend <- function(hostname, port, timeout = 6000) {
3636

3737
determineSparkSubmitBin <- function() {
3838
if (.Platform$OS.type == "unix") {
39-
sparkSubmitBinName = "spark-submit"
39+
sparkSubmitBinName <- "spark-submit"
4040
} else {
41-
sparkSubmitBinName = "spark-submit.cmd"
41+
sparkSubmitBinName <- "spark-submit.cmd"
4242
}
4343
sparkSubmitBinName
4444
}

R/pkg/R/column.R

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,23 @@ setMethod("substr", signature(x = "Column"),
187187
column(jc)
188188
})
189189

190+
#' between
191+
#'
192+
#' Test if the column is between the lower bound and upper bound, inclusive.
193+
#'
194+
#' @rdname column
195+
#'
196+
#' @param bounds lower and upper bounds
197+
setMethod("between", signature(x = "Column"),
198+
function(x, bounds) {
199+
if (is.vector(bounds) && length(bounds) == 2) {
200+
jc <- callJMethod(x@jc, "between", bounds[1], bounds[2])
201+
column(jc)
202+
} else {
203+
stop("bounds should be a vector of lower and upper bounds")
204+
}
205+
})
206+
190207
#' Casts the column to a different data type.
191208
#'
192209
#' @rdname column

0 commit comments

Comments
 (0)