Skip to content

Commit 96460f1

Browse files
committed
merge conflict
2 parents 20a2811 + 5cd6a63 commit 96460f1

File tree

601 files changed

+5448
-2640
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

601 files changed

+5448
-2640
lines changed

LICENSE

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -853,6 +853,52 @@ and
853853

854854
Vis.js may be distributed under either license.
855855

856+
========================================================================
857+
For dagre-d3 (core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js):
858+
========================================================================
859+
Copyright (c) 2013 Chris Pettitt
860+
861+
Permission is hereby granted, free of charge, to any person obtaining a copy
862+
of this software and associated documentation files (the "Software"), to deal
863+
in the Software without restriction, including without limitation the rights
864+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
865+
copies of the Software, and to permit persons to whom the Software is
866+
furnished to do so, subject to the following conditions:
867+
868+
The above copyright notice and this permission notice shall be included in
869+
all copies or substantial portions of the Software.
870+
871+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
872+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
873+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
874+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
875+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
876+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
877+
THE SOFTWARE.
878+
879+
========================================================================
880+
For graphlib-dot (core/src/main/resources/org/apache/spark/ui/static/graphlib-dot.min.js):
881+
========================================================================
882+
Copyright (c) 2012-2013 Chris Pettitt
883+
884+
Permission is hereby granted, free of charge, to any person obtaining a copy
885+
of this software and associated documentation files (the "Software"), to deal
886+
in the Software without restriction, including without limitation the rights
887+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
888+
copies of the Software, and to permit persons to whom the Software is
889+
furnished to do so, subject to the following conditions:
890+
891+
The above copyright notice and this permission notice shall be included in
892+
all copies or substantial portions of the Software.
893+
894+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
895+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
896+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
897+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
898+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
899+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
900+
THE SOFTWARE.
901+
856902
========================================================================
857903
BSD-style licenses
858904
========================================================================

R/pkg/NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,11 @@ exportMethods("arrange",
1919
"count",
2020
"describe",
2121
"distinct",
22+
"dropna",
2223
"dtypes",
2324
"except",
2425
"explain",
26+
"fillna",
2527
"filter",
2628
"first",
2729
"group_by",

R/pkg/R/DataFrame.R

Lines changed: 129 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1314,9 +1314,8 @@ setMethod("except",
13141314
#' write.df(df, "myfile", "parquet", "overwrite")
13151315
#' }
13161316
setMethod("write.df",
1317-
signature(df = "DataFrame", path = 'character', source = 'character',
1318-
mode = 'character'),
1319-
function(df, path = NULL, source = NULL, mode = "append", ...){
1317+
signature(df = "DataFrame", path = 'character'),
1318+
function(df, path, source = NULL, mode = "append", ...){
13201319
if (is.null(source)) {
13211320
sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv)
13221321
source <- callJMethod(sqlContext, "getConf", "spark.sql.sources.default",
@@ -1338,9 +1337,8 @@ setMethod("write.df",
13381337
#' @aliases saveDF
13391338
#' @export
13401339
setMethod("saveDF",
1341-
signature(df = "DataFrame", path = 'character', source = 'character',
1342-
mode = 'character'),
1343-
function(df, path = NULL, source = NULL, mode = "append", ...){
1340+
signature(df = "DataFrame", path = 'character'),
1341+
function(df, path, source = NULL, mode = "append", ...){
13441342
write.df(df, path, source, mode, ...)
13451343
})
13461344

@@ -1431,3 +1429,128 @@ setMethod("describe",
14311429
sdf <- callJMethod(x@sdf, "describe", listToSeq(colList))
14321430
dataFrame(sdf)
14331431
})
1432+
1433+
#' dropna
1434+
#'
1435+
#' Returns a new DataFrame omitting rows with null values.
1436+
#'
1437+
#' @param x A SparkSQL DataFrame.
1438+
#' @param how "any" or "all".
1439+
#' if "any", drop a row if it contains any nulls.
1440+
#' if "all", drop a row only if all its values are null.
1441+
#' if minNonNulls is specified, how is ignored.
1442+
#' @param minNonNulls If specified, drop rows that have less than
1443+
#' minNonNulls non-null values.
1444+
#' This overwrites the how parameter.
1445+
#' @param cols Optional list of column names to consider.
1446+
#' @return A DataFrame
1447+
#'
1448+
#' @rdname nafunctions
1449+
#' @export
1450+
#' @examples
1451+
#'\dontrun{
1452+
#' sc <- sparkR.init()
1453+
#' sqlCtx <- sparkRSQL.init(sc)
1454+
#' path <- "path/to/file.json"
1455+
#' df <- jsonFile(sqlCtx, path)
1456+
#' dropna(df)
1457+
#' }
1458+
setMethod("dropna",
1459+
signature(x = "DataFrame"),
1460+
function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
1461+
how <- match.arg(how)
1462+
if (is.null(cols)) {
1463+
cols <- columns(x)
1464+
}
1465+
if (is.null(minNonNulls)) {
1466+
minNonNulls <- if (how == "any") { length(cols) } else { 1 }
1467+
}
1468+
1469+
naFunctions <- callJMethod(x@sdf, "na")
1470+
sdf <- callJMethod(naFunctions, "drop",
1471+
as.integer(minNonNulls), listToSeq(as.list(cols)))
1472+
dataFrame(sdf)
1473+
})
1474+
1475+
#' @aliases dropna
1476+
#' @export
1477+
setMethod("na.omit",
1478+
signature(x = "DataFrame"),
1479+
function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
1480+
dropna(x, how, minNonNulls, cols)
1481+
})
1482+
1483+
#' fillna
1484+
#'
1485+
#' Replace null values.
1486+
#'
1487+
#' @param x A SparkSQL DataFrame.
1488+
#' @param value Value to replace null values with.
1489+
#' Should be an integer, numeric, character or named list.
1490+
#' If the value is a named list, then cols is ignored and
1491+
#' value must be a mapping from column name (character) to
1492+
#' replacement value. The replacement value must be an
1493+
#' integer, numeric or character.
1494+
#' @param cols optional list of column names to consider.
1495+
#' Columns specified in cols that do not have matching data
1496+
#' type are ignored. For example, if value is a character, and
1497+
#' subset contains a non-character column, then the non-character
1498+
#' column is simply ignored.
1499+
#' @return A DataFrame
1500+
#'
1501+
#' @rdname nafunctions
1502+
#' @export
1503+
#' @examples
1504+
#'\dontrun{
1505+
#' sc <- sparkR.init()
1506+
#' sqlCtx <- sparkRSQL.init(sc)
1507+
#' path <- "path/to/file.json"
1508+
#' df <- jsonFile(sqlCtx, path)
1509+
#' fillna(df, 1)
1510+
#' fillna(df, list("age" = 20, "name" = "unknown"))
1511+
#' }
1512+
setMethod("fillna",
1513+
signature(x = "DataFrame"),
1514+
function(x, value, cols = NULL) {
1515+
if (!(class(value) %in% c("integer", "numeric", "character", "list"))) {
1516+
stop("value should be an integer, numeric, charactor or named list.")
1517+
}
1518+
1519+
if (class(value) == "list") {
1520+
# Check column names in the named list
1521+
colNames <- names(value)
1522+
if (length(colNames) == 0 || !all(colNames != "")) {
1523+
stop("value should be an a named list with each name being a column name.")
1524+
}
1525+
1526+
# Convert to the named list to an environment to be passed to JVM
1527+
valueMap <- new.env()
1528+
for (col in colNames) {
1529+
# Check each item in the named list is of valid type
1530+
v <- value[[col]]
1531+
if (!(class(v) %in% c("integer", "numeric", "character"))) {
1532+
stop("Each item in value should be an integer, numeric or charactor.")
1533+
}
1534+
valueMap[[col]] <- v
1535+
}
1536+
1537+
# When value is a named list, caller is expected not to pass in cols
1538+
if (!is.null(cols)) {
1539+
warning("When value is a named list, cols is ignored!")
1540+
cols <- NULL
1541+
}
1542+
1543+
value <- valueMap
1544+
} else if (is.integer(value)) {
1545+
# Cast an integer to a numeric
1546+
value <- as.numeric(value)
1547+
}
1548+
1549+
naFunctions <- callJMethod(x@sdf, "na")
1550+
sdf <- if (length(cols) == 0) {
1551+
callJMethod(naFunctions, "fill", value)
1552+
} else {
1553+
callJMethod(naFunctions, "fill", value, listToSeq(as.list(cols)))
1554+
}
1555+
dataFrame(sdf)
1556+
})

R/pkg/R/SQLContext.R

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,11 @@ read.df <- function(sqlContext, path = NULL, source = NULL, ...) {
457457
if (!is.null(path)) {
458458
options[['path']] <- path
459459
}
460+
if (is.null(source)) {
461+
sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv)
462+
source <- callJMethod(sqlContext, "getConf", "spark.sql.sources.default",
463+
"org.apache.spark.sql.parquet")
464+
}
460465
sdf <- callJMethod(sqlContext, "load", source, options)
461466
dataFrame(sdf)
462467
}

R/pkg/R/generics.R

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,20 @@ setGeneric("columns", function(x) {standardGeneric("columns") })
396396
#' @export
397397
setGeneric("describe", function(x, col, ...) { standardGeneric("describe") })
398398

399+
#' @rdname nafunctions
400+
#' @export
401+
setGeneric("dropna",
402+
function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
403+
standardGeneric("dropna")
404+
})
405+
406+
#' @rdname nafunctions
407+
#' @export
408+
setGeneric("na.omit",
409+
function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
410+
standardGeneric("na.omit")
411+
})
412+
399413
#' @rdname schema
400414
#' @export
401415
setGeneric("dtypes", function(x) { standardGeneric("dtypes") })
@@ -408,6 +422,10 @@ setGeneric("explain", function(x, ...) { standardGeneric("explain") })
408422
#' @export
409423
setGeneric("except", function(x, y) { standardGeneric("except") })
410424

425+
#' @rdname nafunctions
426+
#' @export
427+
setGeneric("fillna", function(x, value, cols = NULL) { standardGeneric("fillna") })
428+
411429
#' @rdname filter
412430
#' @export
413431
setGeneric("filter", function(x, condition) { standardGeneric("filter") })
@@ -482,11 +500,11 @@ setGeneric("saveAsTable", function(df, tableName, source, mode, ...) {
482500

483501
#' @rdname write.df
484502
#' @export
485-
setGeneric("write.df", function(df, path, source, mode, ...) { standardGeneric("write.df") })
503+
setGeneric("write.df", function(df, path, ...) { standardGeneric("write.df") })
486504

487505
#' @rdname write.df
488506
#' @export
489-
setGeneric("saveDF", function(df, path, source, mode, ...) { standardGeneric("saveDF") })
507+
setGeneric("saveDF", function(df, path, ...) { standardGeneric("saveDF") })
490508

491509
#' @rdname schema
492510
#' @export

R/pkg/R/serialize.R

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,14 @@ writeList <- function(con, arr) {
160160
}
161161
}
162162

163+
# Used to pass arrays where the elements can be of different types
164+
writeGenericList <- function(con, list) {
165+
writeInt(con, length(list))
166+
for (elem in list) {
167+
writeObject(con, elem)
168+
}
169+
}
170+
163171
# Used to pass in hash maps required on Java side.
164172
writeEnv <- function(con, env) {
165173
len <- length(env)
@@ -168,7 +176,7 @@ writeEnv <- function(con, env) {
168176
if (len > 0) {
169177
writeList(con, as.list(ls(env)))
170178
vals <- lapply(ls(env), function(x) { env[[x]] })
171-
writeList(con, as.list(vals))
179+
writeGenericList(con, as.list(vals))
172180
}
173181
}
174182

R/pkg/R/sparkR.R

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -225,14 +225,21 @@ sparkR.init <- function(
225225
#' sqlContext <- sparkRSQL.init(sc)
226226
#'}
227227

228-
sparkRSQL.init <- function(jsc) {
228+
sparkRSQL.init <- function(jsc = NULL) {
229229
if (exists(".sparkRSQLsc", envir = .sparkREnv)) {
230230
return(get(".sparkRSQLsc", envir = .sparkREnv))
231231
}
232232

233+
# If jsc is NULL, create a Spark Context
234+
sc <- if (is.null(jsc)) {
235+
sparkR.init()
236+
} else {
237+
jsc
238+
}
239+
233240
sqlContext <- callJStatic("org.apache.spark.sql.api.r.SQLUtils",
234-
"createSQLContext",
235-
jsc)
241+
"createSQLContext",
242+
sc)
236243
assign(".sparkRSQLsc", sqlContext, envir = .sparkREnv)
237244
sqlContext
238245
}
@@ -249,12 +256,19 @@ sparkRSQL.init <- function(jsc) {
249256
#' sqlContext <- sparkRHive.init(sc)
250257
#'}
251258

252-
sparkRHive.init <- function(jsc) {
259+
sparkRHive.init <- function(jsc = NULL) {
253260
if (exists(".sparkRHivesc", envir = .sparkREnv)) {
254261
return(get(".sparkRHivesc", envir = .sparkREnv))
255262
}
256263

257-
ssc <- callJMethod(jsc, "sc")
264+
# If jsc is NULL, create a Spark Context
265+
sc <- if (is.null(jsc)) {
266+
sparkR.init()
267+
} else {
268+
jsc
269+
}
270+
271+
ssc <- callJMethod(sc, "sc")
258272
hiveCtx <- tryCatch({
259273
newJObject("org.apache.spark.sql.hive.HiveContext", ssc)
260274
}, error = function(err) {

0 commit comments

Comments
 (0)