Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 19 additions & 15 deletions R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -676,8 +676,8 @@ setMethod("dim",
setMethod("collect",
signature(x = "DataFrame"),
function(x, stringsAsFactors = FALSE) {
names <- columns(x)
ncol <- length(names)
dtypes <- dtypes(x)
ncol <- length(dtypes)
if (ncol <= 0) {
# empty data.frame with 0 columns and 0 rows
data.frame()
Expand All @@ -700,25 +700,29 @@ setMethod("collect",
# data of complex type can be held. But getting a cell from a column
# of list type returns a list instead of a vector. So for columns of
# non-complex type, append them as vector.
#
# For columns of complex type, be careful to access them.
# Get a column of complex type returns a list.
# Get a cell from a column of complex type returns a list instead of a vector.
col <- listCols[[colIndex]]
colName <- dtypes[[colIndex]][[1]]
if (length(col) <= 0) {
df[[names[colIndex]]] <- col
df[[colName]] <- col
} else {
# TODO: more robust check on column of primitive types
vec <- do.call(c, col)
if (class(vec) != "list") {
df[[names[colIndex]]] <- vec
colType <- dtypes[[colIndex]][[2]]
# Note that "binary" columns behave like complex types.
if (!is.null(PRIMITIVE_TYPES[[colType]]) && colType != "binary") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a comment here as well ? Something like NOTE: "binary" columns behave like complex types

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added

vec <- do.call(c, col)
stopifnot(class(vec) != "list")
df[[colName]] <- vec
} else {
# For columns of complex type, be careful to access them.
# Get a column of complex type returns a list.
# Get a cell from a column of complex type returns a list instead of a vector.
df[[names[colIndex]]] <- col
}
df[[colName]] <- col
}
}
}
df
}
df
}
})
})

#' Limit
#'
Expand Down
2 changes: 1 addition & 1 deletion R/pkg/R/SQLContext.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ infer_type <- function(x) {
})
type <- Reduce(paste0, type)
type <- paste0("struct<", substr(type, 1, nchar(type) - 1), ">")
} else if (length(x) > 1) {
} else if (length(x) > 1 && type != "binary") {
paste0("array<", infer_type(x[[1]]), ">")
} else {
type
Expand Down
37 changes: 21 additions & 16 deletions R/pkg/R/types.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,30 @@
# values are equivalent R types. This is stored in an environment to allow for
# more efficient look up (environments use hashmaps).
PRIMITIVE_TYPES <- as.environment(list(
"byte"="integer",
"tinyint"="integer",
"smallint"="integer",
"integer"="integer",
"bigint"="numeric",
"float"="numeric",
"double"="numeric",
"decimal"="numeric",
"string"="character",
"binary"="raw",
"boolean"="logical",
"timestamp"="POSIXct",
"date"="Date"))
"tinyint" = "integer",
"smallint" = "integer",
"int" = "integer",
"bigint" = "numeric",
"float" = "numeric",
"double" = "numeric",
"decimal" = "numeric",
"string" = "character",
"binary" = "raw",
"boolean" = "logical",
"timestamp" = "POSIXct",
"date" = "Date",
# following types are not SQL types returned by dtypes(). They are listed here for usage
# by checkType() in schema.R.
# TODO: refactor checkType() in schema.R.
"byte" = "integer",
"integer" = "integer"
))

# The complex data types. These do not have any direct mapping to R's types.
COMPLEX_TYPES <- list(
"map"=NA,
"array"=NA,
"struct"=NA)
"map" = NA,
"array" = NA,
"struct" = NA)

# The full list of data types.
DATA_TYPES <- as.environment(c(as.list(PRIMITIVE_TYPES), COMPLEX_TYPES))
6 changes: 6 additions & 0 deletions R/pkg/inst/tests/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ test_that("infer types and check types", {
expect_equal(infer_type(e), "map<string,integer>")

expect_error(checkType("map<integer,integer>"), "Key type in a map must be string or character")

expect_equal(infer_type(as.raw(c(1, 2, 3))), "binary")
})

test_that("structType and structField", {
Expand Down Expand Up @@ -250,6 +252,10 @@ test_that("create DataFrame from list or data.frame", {

mtcarsdf <- createDataFrame(sqlContext, mtcars)
expect_equivalent(collect(mtcarsdf), mtcars)

bytes <- as.raw(c(1, 2, 3))
df <- createDataFrame(sqlContext, list(list(bytes)))
expect_equal(collect(df)[[1]][[1]], bytes)
})

test_that("create DataFrame with different data types", {
Expand Down