Skip to content

Commit 71738ee

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents dfaa971 + 2f22424 commit 71738ee

File tree

364 files changed

+15530
-4105
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

364 files changed

+15530
-4105
lines changed

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -861,7 +861,7 @@ The following components are provided under a BSD-style license. See project lin
861861

862862
(BSD 3 Clause) core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core)
863863
(BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.1.15 - https://github.com/jpmml/jpmml-model)
864-
(BSD 3-clause style license) jblas (org.jblas:jblas:1.2.3 - http://jblas.org/)
864+
(BSD 3-clause style license) jblas (org.jblas:jblas:1.2.4 - http://jblas.org/)
865865
(BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/)
866866
(BSD License) Javolution (javolution:javolution:5.5.1 - http://javolution.org)
867867
(BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org)

R/pkg/NAMESPACE

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ exportMethods("arrange",
3737
"registerTempTable",
3838
"rename",
3939
"repartition",
40-
"sampleDF",
40+
"sample",
4141
"sample_frac",
4242
"saveAsParquetFile",
4343
"saveAsTable",
@@ -53,38 +53,62 @@ exportMethods("arrange",
5353
"unpersist",
5454
"where",
5555
"withColumn",
56-
"withColumnRenamed")
56+
"withColumnRenamed",
57+
"write.df")
5758

5859
exportClasses("Column")
5960

6061
exportMethods("abs",
62+
"acos",
6163
"alias",
6264
"approxCountDistinct",
6365
"asc",
66+
"asin",
67+
"atan",
68+
"atan2",
6469
"avg",
6570
"cast",
71+
"cbrt",
72+
"ceiling",
6673
"contains",
74+
"cos",
75+
"cosh",
6776
"countDistinct",
6877
"desc",
6978
"endsWith",
79+
"exp",
80+
"expm1",
81+
"floor",
7082
"getField",
7183
"getItem",
84+
"hypot",
7285
"isNotNull",
7386
"isNull",
7487
"last",
7588
"like",
89+
"log",
90+
"log10",
91+
"log1p",
7692
"lower",
7793
"max",
7894
"mean",
7995
"min",
8096
"n",
8197
"n_distinct",
98+
"rint",
8299
"rlike",
100+
"sign",
101+
"sin",
102+
"sinh",
83103
"sqrt",
84104
"startsWith",
85105
"substr",
86106
"sum",
87107
"sumDistinct",
108+
"tan",
109+
"tanh",
110+
"toDegrees",
111+
"toRadians",
88112
"upper")
89113

90114
exportClasses("GroupedData")
@@ -101,6 +125,7 @@ export("cacheTable",
101125
"jsonFile",
102126
"loadDF",
103127
"parquetFile",
128+
"read.df",
104129
"sql",
105130
"table",
106131
"tableNames",

R/pkg/R/DataFrame.R

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ setMethod("isLocal",
150150
callJMethod(x@sdf, "isLocal")
151151
})
152152

153-
#' ShowDF
153+
#' showDF
154154
#'
155155
#' Print the first numRows rows of a DataFrame
156156
#'
@@ -170,7 +170,8 @@ setMethod("isLocal",
170170
setMethod("showDF",
171171
signature(x = "DataFrame"),
172172
function(x, numRows = 20) {
173-
callJMethod(x@sdf, "showString", numToInt(numRows))
173+
s <- callJMethod(x@sdf, "showString", numToInt(numRows))
174+
cat(s)
174175
})
175176

176177
#' show
@@ -187,7 +188,7 @@ setMethod("showDF",
187188
#' sqlCtx <- sparkRSQL.init(sc)
188189
#' path <- "path/to/file.json"
189190
#' df <- jsonFile(sqlCtx, path)
190-
#' show(df)
191+
#' df
191192
#'}
192193
setMethod("show", "DataFrame",
193194
function(object) {
@@ -293,8 +294,8 @@ setMethod("registerTempTable",
293294
#'\dontrun{
294295
#' sc <- sparkR.init()
295296
#' sqlCtx <- sparkRSQL.init(sc)
296-
#' df <- loadDF(sqlCtx, path, "parquet")
297-
#' df2 <- loadDF(sqlCtx, path2, "parquet")
297+
#' df <- read.df(sqlCtx, path, "parquet")
298+
#' df2 <- read.df(sqlCtx, path2, "parquet")
298299
#' registerTempTable(df, "table1")
299300
#' insertInto(df2, "table1", overwrite = TRUE)
300301
#'}
@@ -472,14 +473,14 @@ setMethod("distinct",
472473
dataFrame(sdf)
473474
})
474475

475-
#' SampleDF
476+
#' Sample
476477
#'
477478
#' Return a sampled subset of this DataFrame using a random seed.
478479
#'
479480
#' @param x A SparkSQL DataFrame
480481
#' @param withReplacement Sampling with replacement or not
481482
#' @param fraction The (rough) sample target fraction
482-
#' @rdname sampleDF
483+
#' @rdname sample
483484
#' @aliases sample_frac
484485
#' @export
485486
#' @examples
@@ -488,10 +489,10 @@ setMethod("distinct",
488489
#' sqlCtx <- sparkRSQL.init(sc)
489490
#' path <- "path/to/file.json"
490491
#' df <- jsonFile(sqlCtx, path)
491-
#' collect(sampleDF(df, FALSE, 0.5))
492-
#' collect(sampleDF(df, TRUE, 0.5))
492+
#' collect(sample(df, FALSE, 0.5))
493+
#' collect(sample(df, TRUE, 0.5))
493494
#'}
494-
setMethod("sampleDF",
495+
setMethod("sample",
495496
# TODO : Figure out how to send integer as java.lang.Long to JVM so
496497
# we can send seed as an argument through callJMethod
497498
signature(x = "DataFrame", withReplacement = "logical",
@@ -502,13 +503,13 @@ setMethod("sampleDF",
502503
dataFrame(sdf)
503504
})
504505

505-
#' @rdname sampleDF
506-
#' @aliases sampleDF
506+
#' @rdname sample
507+
#' @aliases sample
507508
setMethod("sample_frac",
508509
signature(x = "DataFrame", withReplacement = "logical",
509510
fraction = "numeric"),
510511
function(x, withReplacement, fraction) {
511-
sampleDF(x, withReplacement, fraction)
512+
sample(x, withReplacement, fraction)
512513
})
513514

514515
#' Count
@@ -1302,17 +1303,17 @@ setMethod("except",
13021303
#' @param source A name for external data source
13031304
#' @param mode One of 'append', 'overwrite', 'error', 'ignore'
13041305
#'
1305-
#' @rdname saveAsTable
1306+
#' @rdname write.df
13061307
#' @export
13071308
#' @examples
13081309
#'\dontrun{
13091310
#' sc <- sparkR.init()
13101311
#' sqlCtx <- sparkRSQL.init(sc)
13111312
#' path <- "path/to/file.json"
13121313
#' df <- jsonFile(sqlCtx, path)
1313-
#' saveAsTable(df, "myfile")
1314+
#' write.df(df, "myfile", "parquet", "overwrite")
13141315
#' }
1315-
setMethod("saveDF",
1316+
setMethod("write.df",
13161317
signature(df = "DataFrame", path = 'character', source = 'character',
13171318
mode = 'character'),
13181319
function(df, path = NULL, source = NULL, mode = "append", ...){
@@ -1333,6 +1334,15 @@ setMethod("saveDF",
13331334
callJMethod(df@sdf, "save", source, jmode, options)
13341335
})
13351336

1337+
#' @rdname write.df
1338+
#' @aliases saveDF
1339+
#' @export
1340+
setMethod("saveDF",
1341+
signature(df = "DataFrame", path = 'character', source = 'character',
1342+
mode = 'character'),
1343+
function(df, path = NULL, source = NULL, mode = "append", ...){
1344+
write.df(df, path, source, mode, ...)
1345+
})
13361346

13371347
#' saveAsTable
13381348
#'

R/pkg/R/RDD.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -927,7 +927,7 @@ setMethod("takeSample", signature(x = "RDD", withReplacement = "logical",
927927
MAXINT)))))
928928

929929
# TODO(zongheng): investigate if this call is an in-place shuffle?
930-
sample(samples)[1:total]
930+
base::sample(samples)[1:total]
931931
})
932932

933933
# Creates tuples of the elements in this RDD by applying a function.
@@ -996,7 +996,7 @@ setMethod("coalesce",
996996
if (shuffle || numPartitions > SparkR:::numPartitions(x)) {
997997
func <- function(partIndex, part) {
998998
set.seed(partIndex) # partIndex as seed
999-
start <- as.integer(sample(numPartitions, 1) - 1)
999+
start <- as.integer(base::sample(numPartitions, 1) - 1)
10001000
lapply(seq_along(part),
10011001
function(i) {
10021002
pos <- (start + i) %% numPartitions

R/pkg/R/SQLContext.R

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,7 @@ clearCache <- function(sqlCtx) {
421421
#' \dontrun{
422422
#' sc <- sparkR.init()
423423
#' sqlCtx <- sparkRSQL.init(sc)
424-
#' df <- loadDF(sqlCtx, path, "parquet")
424+
#' df <- read.df(sqlCtx, path, "parquet")
425425
#' registerTempTable(df, "table")
426426
#' dropTempTable(sqlCtx, "table")
427427
#' }
@@ -450,10 +450,10 @@ dropTempTable <- function(sqlCtx, tableName) {
450450
#'\dontrun{
451451
#' sc <- sparkR.init()
452452
#' sqlCtx <- sparkRSQL.init(sc)
453-
#' df <- load(sqlCtx, "path/to/file.json", source = "json")
453+
#' df <- read.df(sqlCtx, "path/to/file.json", source = "json")
454454
#' }
455455

456-
loadDF <- function(sqlCtx, path = NULL, source = NULL, ...) {
456+
read.df <- function(sqlCtx, path = NULL, source = NULL, ...) {
457457
options <- varargsToEnv(...)
458458
if (!is.null(path)) {
459459
options[['path']] <- path
@@ -462,6 +462,13 @@ loadDF <- function(sqlCtx, path = NULL, source = NULL, ...) {
462462
dataFrame(sdf)
463463
}
464464

465+
#' @aliases loadDF
466+
#' @export
467+
468+
loadDF <- function(sqlCtx, path = NULL, source = NULL, ...) {
469+
read.df(sqlCtx, path, source, ...)
470+
}
471+
465472
#' Create an external table
466473
#'
467474
#' Creates an external table based on the dataset in a data source,

R/pkg/R/column.R

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,17 @@ operators <- list(
5555
"+" = "plus", "-" = "minus", "*" = "multiply", "/" = "divide", "%%" = "mod",
5656
"==" = "equalTo", ">" = "gt", "<" = "lt", "!=" = "notEqual", "<=" = "leq", ">=" = "geq",
5757
# we can not override `&&` and `||`, so use `&` and `|` instead
58-
"&" = "and", "|" = "or" #, "!" = "unary_$bang"
58+
"&" = "and", "|" = "or", #, "!" = "unary_$bang"
59+
"^" = "pow"
5960
)
6061
column_functions1 <- c("asc", "desc", "isNull", "isNotNull")
6162
column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", "getItem", "contains")
6263
functions <- c("min", "max", "sum", "avg", "mean", "count", "abs", "sqrt",
63-
"first", "last", "lower", "upper", "sumDistinct")
64+
"first", "last", "lower", "upper", "sumDistinct",
65+
"acos", "asin", "atan", "cbrt", "ceiling", "cos", "cosh", "exp",
66+
"expm1", "floor", "log", "log10", "log1p", "rint", "sign",
67+
"sin", "sinh", "tan", "tanh", "toDegrees", "toRadians")
68+
binary_mathfunctions<- c("atan2", "hypot")
6469

6570
createOperator <- function(op) {
6671
setMethod(op,
@@ -76,7 +81,11 @@ createOperator <- function(op) {
7681
if (class(e2) == "Column") {
7782
e2 <- e2@jc
7883
}
79-
callJMethod(e1@jc, operators[[op]], e2)
84+
if (op == "^") {
85+
jc <- callJStatic("org.apache.spark.sql.functions", operators[[op]], e1@jc, e2)
86+
} else {
87+
callJMethod(e1@jc, operators[[op]], e2)
88+
}
8089
}
8190
column(jc)
8291
})
@@ -106,11 +115,29 @@ createStaticFunction <- function(name) {
106115
setMethod(name,
107116
signature(x = "Column"),
108117
function(x) {
118+
if (name == "ceiling") {
119+
name <- "ceil"
120+
}
121+
if (name == "sign") {
122+
name <- "signum"
123+
}
109124
jc <- callJStatic("org.apache.spark.sql.functions", name, x@jc)
110125
column(jc)
111126
})
112127
}
113128

129+
createBinaryMathfunctions <- function(name) {
130+
setMethod(name,
131+
signature(y = "Column"),
132+
function(y, x) {
133+
if (class(x) == "Column") {
134+
x <- x@jc
135+
}
136+
jc <- callJStatic("org.apache.spark.sql.functions", name, y@jc, x)
137+
column(jc)
138+
})
139+
}
140+
114141
createMethods <- function() {
115142
for (op in names(operators)) {
116143
createOperator(op)
@@ -124,6 +151,9 @@ createMethods <- function() {
124151
for (x in functions) {
125152
createStaticFunction(x)
126153
}
154+
for (name in binary_mathfunctions) {
155+
createBinaryMathfunctions(name)
156+
}
127157
}
128158

129159
createMethods()

0 commit comments

Comments
 (0)