Skip to content

Commit d7f13a0

Browse files
committed
merge upstream
2 parents 31c6fb3 + 2cef1cd commit d7f13a0

File tree

188 files changed

+3129
-1910
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

188 files changed

+3129
-1910
lines changed

.rat-excludes

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,5 +82,4 @@ INDEX
8282
gen-java.*
8383
.*avpr
8484
org.apache.spark.sql.sources.DataSourceRegister
85-
org.apache.spark.scheduler.SparkHistoryListenerFactory
8685
.*parquet

R/pkg/NAMESPACE

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,10 @@ exportMethods("arrange",
2727
"attach",
2828
"cache",
2929
"collect",
30+
"colnames",
31+
"colnames<-",
3032
"coltypes",
33+
"coltypes<-",
3134
"columns",
3235
"count",
3336
"cov",
@@ -56,6 +59,7 @@ exportMethods("arrange",
5659
"mutate",
5760
"na.omit",
5861
"names",
62+
"names<-",
5963
"ncol",
6064
"nrow",
6165
"orderBy",
@@ -123,14 +127,14 @@ exportMethods("%in%",
123127
"count",
124128
"countDistinct",
125129
"crc32",
126-
"cumeDist",
130+
"cume_dist",
127131
"date_add",
128132
"date_format",
129133
"date_sub",
130134
"datediff",
131135
"dayofmonth",
132136
"dayofyear",
133-
"denseRank",
137+
"dense_rank",
134138
"desc",
135139
"endsWith",
136140
"exp",
@@ -188,7 +192,7 @@ exportMethods("%in%",
188192
"next_day",
189193
"ntile",
190194
"otherwise",
191-
"percentRank",
195+
"percent_rank",
192196
"pmod",
193197
"quarter",
194198
"rand",
@@ -200,7 +204,7 @@ exportMethods("%in%",
200204
"rint",
201205
"rlike",
202206
"round",
203-
"rowNumber",
207+
"row_number",
204208
"rpad",
205209
"rtrim",
206210
"second",
@@ -276,4 +280,4 @@ export("structField",
276280
"structType",
277281
"structType.jobj",
278282
"structType.structField",
279-
"print.structType")
283+
"print.structType")

R/pkg/R/DataFrame.R

Lines changed: 136 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ setMethod("dtypes",
254254
#' @family DataFrame functions
255255
#' @rdname columns
256256
#' @name columns
257+
257258
#' @export
258259
#' @examples
259260
#'\dontrun{
@@ -262,6 +263,7 @@ setMethod("dtypes",
262263
#' path <- "path/to/file.json"
263264
#' df <- jsonFile(sqlContext, path)
264265
#' columns(df)
266+
#' colnames(df)
265267
#'}
266268
setMethod("columns",
267269
signature(x = "DataFrame"),
@@ -290,6 +292,121 @@ setMethod("names<-",
290292
}
291293
})
292294

295+
#' @rdname columns
296+
#' @name colnames
297+
setMethod("colnames",
298+
signature(x = "DataFrame"),
299+
function(x) {
300+
columns(x)
301+
})
302+
303+
#' @rdname columns
304+
#' @name colnames<-
305+
setMethod("colnames<-",
306+
signature(x = "DataFrame", value = "character"),
307+
function(x, value) {
308+
sdf <- callJMethod(x@sdf, "toDF", as.list(value))
309+
dataFrame(sdf)
310+
})
311+
312+
#' coltypes
313+
#'
314+
#' Get column types of a DataFrame
315+
#'
316+
#' @param x A SparkSQL DataFrame
317+
#' @return value A character vector with the column types of the given DataFrame
318+
#' @rdname coltypes
319+
#' @name coltypes
320+
#' @family DataFrame functions
321+
#' @export
322+
#' @examples
323+
#'\dontrun{
324+
#' irisDF <- createDataFrame(sqlContext, iris)
325+
#' coltypes(irisDF)
326+
#'}
327+
setMethod("coltypes",
328+
signature(x = "DataFrame"),
329+
function(x) {
330+
# Get the data types of the DataFrame by invoking dtypes() function
331+
types <- sapply(dtypes(x), function(x) {x[[2]]})
332+
333+
# Map Spark data types into R's data types using DATA_TYPES environment
334+
rTypes <- sapply(types, USE.NAMES=F, FUN=function(x) {
335+
# Check for primitive types
336+
type <- PRIMITIVE_TYPES[[x]]
337+
338+
if (is.null(type)) {
339+
# Check for complex types
340+
for (t in names(COMPLEX_TYPES)) {
341+
if (substring(x, 1, nchar(t)) == t) {
342+
type <- COMPLEX_TYPES[[t]]
343+
break
344+
}
345+
}
346+
347+
if (is.null(type)) {
348+
stop(paste("Unsupported data type: ", x))
349+
}
350+
}
351+
type
352+
})
353+
354+
# Find which types don't have mapping to R
355+
naIndices <- which(is.na(rTypes))
356+
357+
# Assign the original scala data types to the unmatched ones
358+
rTypes[naIndices] <- types[naIndices]
359+
360+
rTypes
361+
})
362+
363+
#' coltypes
364+
#'
365+
#' Set the column types of a DataFrame.
366+
#'
367+
#' @param x A SparkSQL DataFrame
368+
#' @param value A character vector with the target column types for the given
369+
#' DataFrame. Column types can be one of integer, numeric/double, character, logical, or NA
370+
#' to keep that column as-is.
371+
#' @rdname coltypes
372+
#' @name coltypes<-
373+
#' @export
374+
#' @examples
375+
#'\dontrun{
376+
#' sc <- sparkR.init()
377+
#' sqlContext <- sparkRSQL.init(sc)
378+
#' path <- "path/to/file.json"
379+
#' df <- jsonFile(sqlContext, path)
380+
#' coltypes(df) <- c("character", "integer")
381+
#' coltypes(df) <- c(NA, "numeric")
382+
#'}
383+
setMethod("coltypes<-",
384+
signature(x = "DataFrame", value = "character"),
385+
function(x, value) {
386+
cols <- columns(x)
387+
ncols <- length(cols)
388+
if (length(value) == 0) {
389+
stop("Cannot set types of an empty DataFrame with no Column")
390+
}
391+
if (length(value) != ncols) {
392+
stop("Length of type vector should match the number of columns for DataFrame")
393+
}
394+
newCols <- lapply(seq_len(ncols), function(i) {
395+
col <- getColumn(x, cols[i])
396+
if (!is.na(value[i])) {
397+
stype <- rToSQLTypes[[value[i]]]
398+
if (is.null(stype)) {
399+
stop("Only atomic type is supported for column types")
400+
}
401+
cast(col, stype)
402+
} else {
403+
col
404+
}
405+
})
406+
nx <- select(x, newCols)
407+
dataFrame(nx@sdf)
408+
})
409+
293410
#' Register Temporary Table
294411
#'
295412
#' Registers a DataFrame as a Temporary Table in the SQLContext
@@ -676,8 +793,8 @@ setMethod("dim",
676793
setMethod("collect",
677794
signature(x = "DataFrame"),
678795
function(x, stringsAsFactors = FALSE) {
679-
names <- columns(x)
680-
ncol <- length(names)
796+
dtypes <- dtypes(x)
797+
ncol <- length(dtypes)
681798
if (ncol <= 0) {
682799
# empty data.frame with 0 columns and 0 rows
683800
data.frame()
@@ -700,25 +817,29 @@ setMethod("collect",
700817
# data of complex type can be held. But getting a cell from a column
701818
# of list type returns a list instead of a vector. So for columns of
702819
# non-complex type, append them as vector.
820+
#
821+
# For columns of complex type, be careful to access them.
822+
# Get a column of complex type returns a list.
823+
# Get a cell from a column of complex type returns a list instead of a vector.
703824
col <- listCols[[colIndex]]
825+
colName <- dtypes[[colIndex]][[1]]
704826
if (length(col) <= 0) {
705-
df[[names[colIndex]]] <- col
827+
df[[colName]] <- col
706828
} else {
707-
# TODO: more robust check on column of primitive types
708-
vec <- do.call(c, col)
709-
if (class(vec) != "list") {
710-
df[[names[colIndex]]] <- vec
829+
colType <- dtypes[[colIndex]][[2]]
830+
# Note that "binary" columns behave like complex types.
831+
if (!is.null(PRIMITIVE_TYPES[[colType]]) && colType != "binary") {
832+
vec <- do.call(c, col)
833+
stopifnot(class(vec) != "list")
834+
df[[colName]] <- vec
711835
} else {
712-
# For columns of complex type, be careful to access them.
713-
# Get a column of complex type returns a list.
714-
# Get a cell from a column of complex type returns a list instead of a vector.
715-
df[[names[colIndex]]] <- col
716-
}
836+
df[[colName]] <- col
837+
}
838+
}
717839
}
840+
df
718841
}
719-
df
720-
}
721-
})
842+
})
722843

723844
#' Limit
724845
#'
@@ -2102,52 +2223,3 @@ setMethod("with",
21022223
newEnv <- assignNewEnv(data)
21032224
eval(substitute(expr), envir = newEnv, enclos = newEnv)
21042225
})
2105-
2106-
#' Returns the column types of a DataFrame.
2107-
#'
2108-
#' @name coltypes
2109-
#' @title Get column types of a DataFrame
2110-
#' @family dataframe_funcs
2111-
#' @param x (DataFrame)
2112-
#' @return value (character) A character vector with the column types of the given DataFrame
2113-
#' @rdname coltypes
2114-
#' @examples \dontrun{
2115-
#' irisDF <- createDataFrame(sqlContext, iris)
2116-
#' coltypes(irisDF)
2117-
#' }
2118-
setMethod("coltypes",
2119-
signature(x = "DataFrame"),
2120-
function(x) {
2121-
# Get the data types of the DataFrame by invoking dtypes() function
2122-
types <- sapply(dtypes(x), function(x) {x[[2]]})
2123-
2124-
# Map Spark data types into R's data types using DATA_TYPES environment
2125-
rTypes <- sapply(types, USE.NAMES=F, FUN=function(x) {
2126-
2127-
# Check for primitive types
2128-
type <- PRIMITIVE_TYPES[[x]]
2129-
2130-
if (is.null(type)) {
2131-
# Check for complex types
2132-
for (t in names(COMPLEX_TYPES)) {
2133-
if (substring(x, 1, nchar(t)) == t) {
2134-
type <- COMPLEX_TYPES[[t]]
2135-
break
2136-
}
2137-
}
2138-
2139-
if (is.null(type)) {
2140-
stop(paste("Unsupported data type: ", x))
2141-
}
2142-
}
2143-
type
2144-
})
2145-
2146-
# Find which types don't have mapping to R
2147-
naIndices <- which(is.na(rTypes))
2148-
2149-
# Assign the original scala data types to the unmatched ones
2150-
rTypes[naIndices] <- types[naIndices]
2151-
2152-
rTypes
2153-
})

R/pkg/R/SQLContext.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ infer_type <- function(x) {
6363
})
6464
type <- Reduce(paste0, type)
6565
type <- paste0("struct<", substr(type, 1, nchar(type) - 1), ">")
66-
} else if (length(x) > 1) {
66+
} else if (length(x) > 1 && type != "binary") {
6767
paste0("array<", infer_type(x[[1]]), ">")
6868
} else {
6969
type

0 commit comments

Comments
 (0)