Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions R/pkg/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,5 @@ Collate:
'serialize.R'
'sparkR.R'
'stats.R'
'types.R'
'utils.R'
6 changes: 3 additions & 3 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@ export("setJobGroup",
exportClasses("DataFrame")

exportMethods("arrange",
"as.data.frame",
"attach",
"cache",
"collect",
"coltypes",
"columns",
"count",
"cov",
Expand Down Expand Up @@ -262,6 +264,4 @@ export("structField",
"structType",
"structType.jobj",
"structType.structField",
"print.structType")

export("as.data.frame")
"print.structType")
49 changes: 49 additions & 0 deletions R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -2152,3 +2152,52 @@ setMethod("with",
newEnv <- assignNewEnv(data)
eval(substitute(expr), envir = newEnv, enclos = newEnv)
})

#' Returns the column types of a DataFrame.
#'
#' @name coltypes
#' @title Get column types of a DataFrame
#' @family dataframe_funcs
#' @param x (DataFrame)
#' @return value (character) A character vector with the column types of the given DataFrame
#' @rdname coltypes
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add family

#' @family dataframe_funcs

and example, like

#' @examples
#' \dontrun{
#' with(irisDf, nrow(Sepal_Width))
#' }

#' @examples \dontrun{
#' irisDF <- createDataFrame(sqlContext, iris)
#' coltypes(irisDF)
#' }
setMethod("coltypes",
signature(x = "DataFrame"),
function(x) {
# Get the data types of the DataFrame by invoking dtypes() function
types <- sapply(dtypes(x), function(x) {x[[2]]})

# Map Spark data types into R's data types using DATA_TYPES environment
rTypes <- sapply(types, USE.NAMES=F, FUN=function(x) {

# Check for primitive types
type <- PRIMITIVE_TYPES[[x]]

if (is.null(type)) {
# Check for complex types
for (t in names(COMPLEX_TYPES)) {
if (substring(x, 1, nchar(t)) == t) {
type <- COMPLEX_TYPES[[t]]
break
}
}

if (is.null(type)) {
stop(paste("Unsupported data type: ", x))
}
}
type
})

# Find which types don't have mapping to R
naIndices <- which(is.na(rTypes))

# Assign the original scala data types to the unmatched ones
rTypes[naIndices] <- types[naIndices]

rTypes
})
4 changes: 4 additions & 0 deletions R/pkg/R/generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -1047,3 +1047,7 @@ setGeneric("attach")
#' @rdname with
#' @export
setGeneric("with")

#' @rdname coltypes
#' @export
setGeneric("coltypes", function(x) { standardGeneric("coltypes") })
15 changes: 1 addition & 14 deletions R/pkg/R/schema.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,20 +115,7 @@ structField.jobj <- function(x) {
}

checkType <- function(type) {
primtiveTypes <- c("byte",
"integer",
"float",
"double",
"numeric",
"character",
"string",
"binary",
"raw",
"logical",
"boolean",
"timestamp",
"date")
if (type %in% primtiveTypes) {
if (!is.null(PRIMITIVE_TYPES[[type]])) {
return()
} else {
# Check complex types
Expand Down
43 changes: 43 additions & 0 deletions R/pkg/R/types.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# types.R. This file handles the data type mapping between Spark and R

# The primitive data types, where names(PRIMITIVE_TYPES) are Scala types whereas
# values are equivalent R types. This is stored in an environment to allow for
# more efficient look up (environments use hashmaps).
PRIMITIVE_TYPES <- as.environment(list(
"byte"="integer",
"tinyint"="integer",
"smallint"="integer",
"integer"="integer",
"bigint"="numeric",
"float"="numeric",
"double"="numeric",
"decimal"="numeric",
"string"="character",
"binary"="raw",
"boolean"="logical",
"timestamp"="POSIXct",
"date"="Date"))

# The complex data types. These do not have any direct mapping to R's types.
COMPLEX_TYPES <- list(
"map"=NA,
"array"=NA,
"struct"=NA)

# The full list of data types.
DATA_TYPES <- as.environment(c(as.list(PRIMITIVE_TYPES), COMPLEX_TYPES))
24 changes: 23 additions & 1 deletion R/pkg/inst/tests/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -1467,8 +1467,9 @@ test_that("SQL error message is returned from JVM", {
expect_equal(grepl("Table not found: blah", retError), TRUE)
})

irisDF <- createDataFrame(sqlContext, iris)

test_that("Method as.data.frame as a synonym for collect()", {
irisDF <- createDataFrame(sqlContext, iris)
expect_equal(as.data.frame(irisDF), collect(irisDF))
irisDF2 <- irisDF[irisDF$Species == "setosa", ]
expect_equal(as.data.frame(irisDF2), collect(irisDF2))
Expand Down Expand Up @@ -1503,6 +1504,27 @@ test_that("with() on a DataFrame", {
expect_equal(nrow(sum2), 35)
})

test_that("Method coltypes() to get R's data types of a DataFrame", {
expect_equal(coltypes(irisDF), c(rep("numeric", 4), "character"))

data <- data.frame(c1=c(1,2,3),
c2=c(T,F,T),
c3=c("2015/01/01 10:00:00", "2015/01/02 10:00:00", "2015/01/03 10:00:00"))

schema <- structType(structField("c1", "byte"),
structField("c3", "boolean"),
structField("c4", "timestamp"))

# Test primitive types
DF <- createDataFrame(sqlContext, data, schema)
expect_equal(coltypes(DF), c("integer", "logical", "POSIXct"))

# Test complex types
x <- createDataFrame(sqlContext, list(list(as.environment(
list("a"="b", "c"="d", "e"="f")))))
expect_equal(coltypes(x), "map<string,string>")
})

unlink(parquetPath)
unlink(jsonPath)
unlink(jsonPathNa)