From 5003a8a66d9ab2e7c14c7d0e65118dce6ff11e6a Mon Sep 17 00:00:00 2001 From: felixcheung Date: Wed, 21 Oct 2015 23:48:43 -0700 Subject: [PATCH 1/9] Add support for colnames, colnames<-, coltypes<- --- R/pkg/NAMESPACE | 3 +- R/pkg/R/DataFrame.R | 52 ++++++++++++++++++++++++++++++++ R/pkg/R/generics.R | 12 ++++++++ R/pkg/inst/tests/test_sparkSQL.R | 24 +++++++++++++++ 4 files changed, 90 insertions(+), 1 deletion(-) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 5d04dd6acaab..ae7b4c2b42e8 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -27,6 +27,7 @@ exportMethods("arrange", "attach", "cache", "collect", + "colnames", "coltypes", "columns", "count", @@ -276,4 +277,4 @@ export("structField", "structType", "structType.jobj", "structType.structField", - "print.structType") \ No newline at end of file + "print.structType") diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 8a13e7a36766..1d762a7ecc2f 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -254,6 +254,7 @@ setMethod("dtypes", #' @family DataFrame functions #' @rdname columns #' @name columns + #' @export #' @examples #'\dontrun{ @@ -290,6 +291,57 @@ setMethod("names<-", } }) +#' @rdname columns +#' @name colnames +setMethod("colnames", + signature(x = "DataFrame"), + function(x) { + columns(x) + }) + +#' @rdname columns +#' @name colnames<- +setMethod("colnames<-", + signature(x = "DataFrame", value = "character"), + function(x, value) { + sdf <- callJMethod(x@sdf, "toDF", as.list(value)) + dataFrame(sdf) + }) + +#' coltypes +#' +#' Set the column types of a DataFrame. +#' +#' @name coltypes +#' @param x (DataFrame) +#' @return value (character) A character vector with the target column types for the given DataFrame +#' @rdname coltypes +#' @aliases coltypes +#' @export +#' @examples +#'\dontrun{ +#' sc <- sparkR.init() +#' sqlContext <- sparkRSQL.init(sc) +#' path <- "path/to/file.json" +#' df <- jsonFile(sqlContext, path) +#' coltypes(df) <- c("string", "integer") +#'} +setMethod("coltypes<-", + signature(x = "DataFrame", value = "character"), + function(x, value) { + cols <- columns(x) + ncols <- length(cols) + if (length(value) == 0 || length(value) != ncols) { + stop("Length of type vector should match the number of columns for DataFrame") + } + newCols <- lapply(seq_len(ncols), function(i) { + col <- getColumn(x, cols[i]) + cast(col, value[i]) + }) + nx <- select(x, newCols) + dataFrame(nx@sdf) + }) + #' Register Temporary Table #' #' Registers a DataFrame as a Temporary Table in the SQLContext diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 0c305441e043..ebea87f573d9 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -385,6 +385,18 @@ setGeneric("agg", function (x, ...) { standardGeneric("agg") }) #' @export setGeneric("arrange", function(x, col, ...) { standardGeneric("arrange") }) +#' @rdname colnames +#' @export +setGeneric("colnames", function(x) { standardGeneric("colnames") }) + +#' @rdname colnames<- +#' @export +setGeneric("colnames<-", function(x, value) { standardGeneric("colnames<-") }) + +#' @rdname coltypes<- +#' @export +setGeneric("coltypes<-", function(x, value) { standardGeneric("coltypes<-") }) + #' @rdname schema #' @export setGeneric("columns", function(x) {standardGeneric("columns") }) diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 0fbe0658265b..d4db19cbf41b 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -622,6 +622,30 @@ test_that("schema(), dtypes(), columns(), names() return the correct values/form expect_equal(testNames[2], "name") }) +test_that("names() colnames() set the column names", { + df <- jsonFile(sqlContext, jsonPath) + names(df) <- c("col1", "col2") + expect_equal(colnames(df)[2], "col2") + + colnames(df) <- c("col3", "col4") + expect_equal(names(df)[1], "col3") +}) + +test_that("coltypes() set the column types", { + df <- selectExpr(jsonFile(sqlContext, jsonPath), "name", "(age * 1.21) as age") + expect_equal(dtypes(df), list(c("name", "string"), c("age", "decimal(24,2)"))) + + df1 <- select(df, cast(df$age, "integer")) + coltypes(df) <- c("string", "integer") + expect_equal(dtypes(df), list(c("cast(name as string)", "string"), c("cast(age as int)", "int"))) + value <- collect(df[, 2])[[3, 1]] + expect_equal(value, collect(df1)[[3, 1]]) + expect_equal(value, 22) + + expect_error(coltypes(df) <- c("string"), + "Length of type vector should match the number of columns for DataFrame") +}) + test_that("head() and first() return the correct data", { df <- jsonFile(sqlContext, jsonPath) testHead <- head(df) From 033e91690161a953e6cd2e6cd6b60d0b89c148b7 Mon Sep 17 00:00:00 2001 From: felixcheung Date: Sun, 1 Nov 2015 12:33:28 -0800 Subject: [PATCH 2/9] Take R types instead to map to JVM types, add check for NA to keep column --- R/pkg/R/DataFrame.R | 24 +++++++++++++++++++++--- R/pkg/inst/tests/test_sparkSQL.R | 10 ++++++++-- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 1d762a7ecc2f..a46019de66b2 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -308,13 +308,22 @@ setMethod("colnames<-", dataFrame(sdf) }) +rToScalaTypes <- new.env() +rToScalaTypes[["integer"]] <- "integer" # in R, integer is 32bit +rToScalaTypes[["numeric"]] <- "double" # in R, numeric == double which is 64bit +rToScalaTypes[["double"]] <- "double" +rToScalaTypes[["character"]] <- "string" +rToScalaTypes[["logical"]] <- "boolean" + #' coltypes #' #' Set the column types of a DataFrame. #' #' @name coltypes #' @param x (DataFrame) -#' @return value (character) A character vector with the target column types for the given DataFrame +#' @return value (character) A character vector with the target column types for the given +#' DataFrame. Column types can be one of integer, numeric/double, character, logical, or NA +#' to keep that column as-is. #' @rdname coltypes #' @aliases coltypes #' @export @@ -324,7 +333,8 @@ setMethod("colnames<-", #' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" #' df <- jsonFile(sqlContext, path) -#' coltypes(df) <- c("string", "integer") +#' coltypes(df) <- c("character", "integer") +#' coltypes(df) <- c(NA, "numeric") #'} setMethod("coltypes<-", signature(x = "DataFrame", value = "character"), @@ -336,7 +346,15 @@ setMethod("coltypes<-", } newCols <- lapply(seq_len(ncols), function(i) { col <- getColumn(x, cols[i]) - cast(col, value[i]) + if (!is.na(value[i])) { + stype <- rToScalaTypes[[value[i]]] + if (is.null(stype)) { + stop("Only atomic type is supported for column types") + } + cast(col, stype) + } else { + col + } }) nx <- select(x, newCols) dataFrame(nx@sdf) diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index d4db19cbf41b..ffdec7070129 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -636,14 +636,20 @@ test_that("coltypes() set the column types", { expect_equal(dtypes(df), list(c("name", "string"), c("age", "decimal(24,2)"))) df1 <- select(df, cast(df$age, "integer")) - coltypes(df) <- c("string", "integer") + coltypes(df) <- c("character", "integer") expect_equal(dtypes(df), list(c("cast(name as string)", "string"), c("cast(age as int)", "int"))) value <- collect(df[, 2])[[3, 1]] expect_equal(value, collect(df1)[[3, 1]]) expect_equal(value, 22) - expect_error(coltypes(df) <- c("string"), + coltypes(df) <- c(NA, "numeric") + expect_equal(dtypes(df), list(c("cast(name as string)", "string"), + c("cast(cast(age as int) as double)", "double"))) + + expect_error(coltypes(df) <- c("character"), "Length of type vector should match the number of columns for DataFrame") + expect_error(coltypes(df) <- c("environment", "list"), + "Only atomic type is supported for column types") }) test_that("head() and first() return the correct data", { From f2b5d02397cb5bb664a2d9cc2372c60090df529f Mon Sep 17 00:00:00 2001 From: felixcheung Date: Sun, 1 Nov 2015 13:27:10 -0800 Subject: [PATCH 3/9] This seems to fix the Rd error - no idea why it worked before. --- R/pkg/R/generics.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index ebea87f573d9..9f5c84baeb5e 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -385,15 +385,15 @@ setGeneric("agg", function (x, ...) { standardGeneric("agg") }) #' @export setGeneric("arrange", function(x, col, ...) { standardGeneric("arrange") }) -#' @rdname colnames +#' @rdname columns #' @export setGeneric("colnames", function(x) { standardGeneric("colnames") }) -#' @rdname colnames<- +#' @rdname columns #' @export setGeneric("colnames<-", function(x, value) { standardGeneric("colnames<-") }) -#' @rdname coltypes<- +#' @rdname columns #' @export setGeneric("coltypes<-", function(x, value) { standardGeneric("coltypes<-") }) From b0306c28e014f91f361640f08e6bd6f5e27eb6a2 Mon Sep 17 00:00:00 2001 From: felixcheung Date: Sun, 1 Nov 2015 16:33:40 -0800 Subject: [PATCH 4/9] fix test broken from column name change from cast --- R/pkg/inst/tests/test_sparkSQL.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index ffdec7070129..233b3c4d79fb 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -637,14 +637,13 @@ test_that("coltypes() set the column types", { df1 <- select(df, cast(df$age, "integer")) coltypes(df) <- c("character", "integer") - expect_equal(dtypes(df), list(c("cast(name as string)", "string"), c("cast(age as int)", "int"))) + expect_equal(dtypes(df), list(c("name", "string"), c("age", "int"))) value <- collect(df[, 2])[[3, 1]] expect_equal(value, collect(df1)[[3, 1]]) expect_equal(value, 22) coltypes(df) <- c(NA, "numeric") - expect_equal(dtypes(df), list(c("cast(name as string)", "string"), - c("cast(cast(age as int) as double)", "double"))) + expect_equal(dtypes(df), list(c("name", "string"), c("age", "double"))) expect_error(coltypes(df) <- c("character"), "Length of type vector should match the number of columns for DataFrame") From 9006a059c899fc656c04028b5b9c4a95c7c64d3e Mon Sep 17 00:00:00 2001 From: felixcheung Date: Wed, 11 Nov 2015 22:40:08 -0800 Subject: [PATCH 5/9] rebase, merge with coltypes change, fix generic, doc --- R/pkg/NAMESPACE | 1 + R/pkg/R/DataFrame.R | 117 +++++++++++++++---------------- R/pkg/R/generics.R | 11 +-- R/pkg/R/types.R | 8 +++ R/pkg/inst/tests/test_sparkSQL.R | 44 ++++++------ 5 files changed, 97 insertions(+), 84 deletions(-) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index ae7b4c2b42e8..b6f08b78dffa 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -29,6 +29,7 @@ exportMethods("arrange", "collect", "colnames", "coltypes", + "coltypes<-", "columns", "count", "cov", diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index a46019de66b2..0af3bb0be25f 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -263,6 +263,7 @@ setMethod("dtypes", #' path <- "path/to/file.json" #' df <- jsonFile(sqlContext, path) #' columns(df) +#' colnames(df) #'} setMethod("columns", signature(x = "DataFrame"), @@ -295,7 +296,7 @@ setMethod("names<-", #' @name colnames setMethod("colnames", signature(x = "DataFrame"), - function(x) { + function(x, do.NULL = TRUE, prefix = "col") { columns(x) }) @@ -308,24 +309,67 @@ setMethod("colnames<-", dataFrame(sdf) }) -rToScalaTypes <- new.env() -rToScalaTypes[["integer"]] <- "integer" # in R, integer is 32bit -rToScalaTypes[["numeric"]] <- "double" # in R, numeric == double which is 64bit -rToScalaTypes[["double"]] <- "double" -rToScalaTypes[["character"]] <- "string" -rToScalaTypes[["logical"]] <- "boolean" +#' coltypes +#' +#' Get column types of a DataFrame +#' +#' @name coltypes +#' @param x (DataFrame) +#' @return value (character) A character vector with the column types of the given DataFrame +#' @rdname coltypes +#' @family dataframe_funcs +#' @export +#' @examples +#'\dontrun{ +#' irisDF <- createDataFrame(sqlContext, iris) +#' coltypes(irisDF) +#'} +setMethod("coltypes", + signature(x = "DataFrame"), + function(x) { + # Get the data types of the DataFrame by invoking dtypes() function + types <- sapply(dtypes(x), function(x) {x[[2]]}) + + # Map Spark data types into R's data types using DATA_TYPES environment + rTypes <- sapply(types, USE.NAMES=F, FUN=function(x) { + # Check for primitive types + type <- PRIMITIVE_TYPES[[x]] + + if (is.null(type)) { + # Check for complex types + for (t in names(COMPLEX_TYPES)) { + if (substring(x, 1, nchar(t)) == t) { + type <- COMPLEX_TYPES[[t]] + break + } + } + + if (is.null(type)) { + stop(paste("Unsupported data type: ", x)) + } + } + type + }) + + # Find which types don't have mapping to R + naIndices <- which(is.na(rTypes)) + + # Assign the original scala data types to the unmatched ones + rTypes[naIndices] <- types[naIndices] + + rTypes + }) #' coltypes #' #' Set the column types of a DataFrame. #' -#' @name coltypes +#' @name coltypes<- #' @param x (DataFrame) -#' @return value (character) A character vector with the target column types for the given +#' @param value (character) A character vector with the target column types for the given #' DataFrame. Column types can be one of integer, numeric/double, character, logical, or NA #' to keep that column as-is. #' @rdname coltypes -#' @aliases coltypes #' @export #' @examples #'\dontrun{ @@ -341,7 +385,10 @@ setMethod("coltypes<-", function(x, value) { cols <- columns(x) ncols <- length(cols) - if (length(value) == 0 || length(value) != ncols) { + if (length(value) == 0) { + stop("Cannot set types of an empty DataFrame with no Column") + } + if (length(value) != ncols) { stop("Length of type vector should match the number of columns for DataFrame") } newCols <- lapply(seq_len(ncols), function(i) { @@ -2173,51 +2220,3 @@ setMethod("with", eval(substitute(expr), envir = newEnv, enclos = newEnv) }) -#' Returns the column types of a DataFrame. -#' -#' @name coltypes -#' @title Get column types of a DataFrame -#' @family dataframe_funcs -#' @param x (DataFrame) -#' @return value (character) A character vector with the column types of the given DataFrame -#' @rdname coltypes -#' @examples \dontrun{ -#' irisDF <- createDataFrame(sqlContext, iris) -#' coltypes(irisDF) -#' } -setMethod("coltypes", - signature(x = "DataFrame"), - function(x) { - # Get the data types of the DataFrame by invoking dtypes() function - types <- sapply(dtypes(x), function(x) {x[[2]]}) - - # Map Spark data types into R's data types using DATA_TYPES environment - rTypes <- sapply(types, USE.NAMES=F, FUN=function(x) { - - # Check for primitive types - type <- PRIMITIVE_TYPES[[x]] - - if (is.null(type)) { - # Check for complex types - for (t in names(COMPLEX_TYPES)) { - if (substring(x, 1, nchar(t)) == t) { - type <- COMPLEX_TYPES[[t]] - break - } - } - - if (is.null(type)) { - stop(paste("Unsupported data type: ", x)) - } - } - type - }) - - # Find which types don't have mapping to R - naIndices <- which(is.na(rTypes)) - - # Assign the original scala data types to the unmatched ones - rTypes[naIndices] <- types[naIndices] - - rTypes - }) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 9f5c84baeb5e..c3d9abf86db0 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -387,13 +387,17 @@ setGeneric("arrange", function(x, col, ...) { standardGeneric("arrange") }) #' @rdname columns #' @export -setGeneric("colnames", function(x) { standardGeneric("colnames") }) +setGeneric("colnames", function(x, do.NULL = TRUE, prefix = "col") { standardGeneric("colnames") }) #' @rdname columns #' @export setGeneric("colnames<-", function(x, value) { standardGeneric("colnames<-") }) -#' @rdname columns +#' @rdname coltypes +#' @export +setGeneric("coltypes", function(x) { standardGeneric("coltypes") }) + +#' @rdname coltypes #' @export setGeneric("coltypes<-", function(x, value) { standardGeneric("coltypes<-") }) @@ -1094,6 +1098,3 @@ setGeneric("attach") #' @export setGeneric("with") -#' @rdname coltypes -#' @export -setGeneric("coltypes", function(x) { standardGeneric("coltypes") }) diff --git a/R/pkg/R/types.R b/R/pkg/R/types.R index 1828c23ab0f6..4b69589dfa24 100644 --- a/R/pkg/R/types.R +++ b/R/pkg/R/types.R @@ -41,3 +41,11 @@ COMPLEX_TYPES <- list( # The full list of data types. DATA_TYPES <- as.environment(c(as.list(PRIMITIVE_TYPES), COMPLEX_TYPES)) + +# An environment for mapping R to Scala, names are R types and values are Scala types. +rToScalaTypes <- new.env() +rToScalaTypes[["integer"]] <- "integer" # in R, integer is 32bit +rToScalaTypes[["numeric"]] <- "double" # in R, numeric == double which is 64bit +rToScalaTypes[["double"]] <- "double" +rToScalaTypes[["character"]] <- "string" +rToScalaTypes[["logical"]] <- "boolean" diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 233b3c4d79fb..327a369a701a 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -629,26 +629,12 @@ test_that("names() colnames() set the column names", { colnames(df) <- c("col3", "col4") expect_equal(names(df)[1], "col3") -}) - -test_that("coltypes() set the column types", { - df <- selectExpr(jsonFile(sqlContext, jsonPath), "name", "(age * 1.21) as age") - expect_equal(dtypes(df), list(c("name", "string"), c("age", "decimal(24,2)"))) - - df1 <- select(df, cast(df$age, "integer")) - coltypes(df) <- c("character", "integer") - expect_equal(dtypes(df), list(c("name", "string"), c("age", "int"))) - value <- collect(df[, 2])[[3, 1]] - expect_equal(value, collect(df1)[[3, 1]]) - expect_equal(value, 22) - - coltypes(df) <- c(NA, "numeric") - expect_equal(dtypes(df), list(c("name", "string"), c("age", "double"))) - expect_error(coltypes(df) <- c("character"), - "Length of type vector should match the number of columns for DataFrame") - expect_error(coltypes(df) <- c("environment", "list"), - "Only atomic type is supported for column types") + # Test base::colnames + m2 <- cbind(1, 1:4) + expect_equal(colnames(m2, do.NULL = FALSE), c("col1", "col2")) + colnames(m2) <- c("x","Y") + expect_equal(colnames(m2), c("x", "Y")) }) test_that("head() and first() return the correct data", { @@ -1645,7 +1631,7 @@ test_that("with() on a DataFrame", { expect_equal(nrow(sum2), 35) }) -test_that("Method coltypes() to get R's data types of a DataFrame", { +test_that("Method coltypes() to get and set R's data types of a DataFrame", { expect_equal(coltypes(irisDF), c(rep("numeric", 4), "character")) data <- data.frame(c1=c(1,2,3), @@ -1664,6 +1650,24 @@ test_that("Method coltypes() to get R's data types of a DataFrame", { x <- createDataFrame(sqlContext, list(list(as.environment( list("a"="b", "c"="d", "e"="f"))))) expect_equal(coltypes(x), "map") + + df <- selectExpr(jsonFile(sqlContext, jsonPath), "name", "(age * 1.21) as age") + expect_equal(dtypes(df), list(c("name", "string"), c("age", "decimal(24,2)"))) + + df1 <- select(df, cast(df$age, "integer")) + coltypes(df) <- c("character", "integer") + expect_equal(dtypes(df), list(c("name", "string"), c("age", "int"))) + value <- collect(df[, 2])[[3, 1]] + expect_equal(value, collect(df1)[[3, 1]]) + expect_equal(value, 22) + + coltypes(df) <- c(NA, "numeric") + expect_equal(dtypes(df), list(c("name", "string"), c("age", "double"))) + + expect_error(coltypes(df) <- c("character"), + "Length of type vector should match the number of columns for DataFrame") + expect_error(coltypes(df) <- c("environment", "list"), + "Only atomic type is supported for column types") }) unlink(parquetPath) From 2c71790c9f73de7f6834724779c905c531529462 Mon Sep 17 00:00:00 2001 From: felixcheung Date: Thu, 12 Nov 2015 20:39:31 -0800 Subject: [PATCH 6/9] fix r doc family tag --- R/pkg/R/DataFrame.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 0af3bb0be25f..91378095f3e8 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -317,7 +317,7 @@ setMethod("colnames<-", #' @param x (DataFrame) #' @return value (character) A character vector with the column types of the given DataFrame #' @rdname coltypes -#' @family dataframe_funcs +#' @family DataFrame functions #' @export #' @examples #'\dontrun{ From 1ac49c055854d7445d1ede032a340f8e02ebb894 Mon Sep 17 00:00:00 2001 From: felixcheung Date: Thu, 19 Nov 2015 17:02:57 -0800 Subject: [PATCH 7/9] rebase, changes from comment --- R/pkg/R/DataFrame.R | 17 ++++++++--------- R/pkg/R/types.R | 12 ++++++------ 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 91378095f3e8..f89e2682d9e2 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -296,7 +296,7 @@ setMethod("names<-", #' @name colnames setMethod("colnames", signature(x = "DataFrame"), - function(x, do.NULL = TRUE, prefix = "col") { + function(x) { columns(x) }) @@ -313,10 +313,10 @@ setMethod("colnames<-", #' #' Get column types of a DataFrame #' -#' @name coltypes -#' @param x (DataFrame) -#' @return value (character) A character vector with the column types of the given DataFrame +#' @param x A SparkSQL DataFrame +#' @return value A character vector with the column types of the given DataFrame #' @rdname coltypes +#' @name coltypes #' @family DataFrame functions #' @export #' @examples @@ -364,12 +364,12 @@ setMethod("coltypes", #' #' Set the column types of a DataFrame. #' -#' @name coltypes<- -#' @param x (DataFrame) -#' @param value (character) A character vector with the target column types for the given +#' @param x A SparkSQL DataFrame +#' @param value A character vector with the target column types for the given #' DataFrame. Column types can be one of integer, numeric/double, character, logical, or NA #' to keep that column as-is. #' @rdname coltypes +#' @name coltypes<- #' @export #' @examples #'\dontrun{ @@ -394,7 +394,7 @@ setMethod("coltypes<-", newCols <- lapply(seq_len(ncols), function(i) { col <- getColumn(x, cols[i]) if (!is.na(value[i])) { - stype <- rToScalaTypes[[value[i]]] + stype <- rToSQLTypes[[value[i]]] if (is.null(stype)) { stop("Only atomic type is supported for column types") } @@ -2219,4 +2219,3 @@ setMethod("with", newEnv <- assignNewEnv(data) eval(substitute(expr), envir = newEnv, enclos = newEnv) }) - diff --git a/R/pkg/R/types.R b/R/pkg/R/types.R index 4b69589dfa24..afdbc9d8a48a 100644 --- a/R/pkg/R/types.R +++ b/R/pkg/R/types.R @@ -43,9 +43,9 @@ COMPLEX_TYPES <- list( DATA_TYPES <- as.environment(c(as.list(PRIMITIVE_TYPES), COMPLEX_TYPES)) # An environment for mapping R to Scala, names are R types and values are Scala types. -rToScalaTypes <- new.env() -rToScalaTypes[["integer"]] <- "integer" # in R, integer is 32bit -rToScalaTypes[["numeric"]] <- "double" # in R, numeric == double which is 64bit -rToScalaTypes[["double"]] <- "double" -rToScalaTypes[["character"]] <- "string" -rToScalaTypes[["logical"]] <- "boolean" +rToSQLTypes <- new.env() +rToSQLTypes[["integer"]] <- "integer" # in R, integer is 32bit +rToSQLTypes[["numeric"]] <- "double" # in R, numeric == double which is 64bit +rToSQLTypes[["double"]] <- "double" +rToSQLTypes[["character"]] <- "string" +rToSQLTypes[["logical"]] <- "boolean" From e399acd4f1605d883d2cbe8e194f1c1125095c30 Mon Sep 17 00:00:00 2001 From: felixcheung Date: Thu, 19 Nov 2015 17:16:49 -0800 Subject: [PATCH 8/9] should not have ignored this file, which breaks style test --- R/pkg/R/generics.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index c3d9abf86db0..711ce38f9e10 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -1097,4 +1097,3 @@ setGeneric("attach") #' @rdname with #' @export setGeneric("with") - From d555cf89fa743f51a7d0e75ed9afda0775540027 Mon Sep 17 00:00:00 2001 From: felixcheung Date: Sat, 28 Nov 2015 17:15:35 -0800 Subject: [PATCH 9/9] update from feedback, add tests --- R/pkg/NAMESPACE | 2 ++ R/pkg/R/types.R | 12 ++++++------ R/pkg/inst/tests/test_sparkSQL.R | 7 ++++++- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index b6f08b78dffa..43e5e0119e7f 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -28,6 +28,7 @@ exportMethods("arrange", "cache", "collect", "colnames", + "colnames<-", "coltypes", "coltypes<-", "columns", @@ -58,6 +59,7 @@ exportMethods("arrange", "mutate", "na.omit", "names", + "names<-", "ncol", "nrow", "orderBy", diff --git a/R/pkg/R/types.R b/R/pkg/R/types.R index afdbc9d8a48a..dae4fe858bdb 100644 --- a/R/pkg/R/types.R +++ b/R/pkg/R/types.R @@ -43,9 +43,9 @@ COMPLEX_TYPES <- list( DATA_TYPES <- as.environment(c(as.list(PRIMITIVE_TYPES), COMPLEX_TYPES)) # An environment for mapping R to Scala, names are R types and values are Scala types. -rToSQLTypes <- new.env() -rToSQLTypes[["integer"]] <- "integer" # in R, integer is 32bit -rToSQLTypes[["numeric"]] <- "double" # in R, numeric == double which is 64bit -rToSQLTypes[["double"]] <- "double" -rToSQLTypes[["character"]] <- "string" -rToSQLTypes[["logical"]] <- "boolean" +rToSQLTypes <- as.environment(list( + "integer" = "integer", # in R, integer is 32bit + "numeric" = "double", # in R, numeric == double which is 64bit + "double" = "double", + "character" = "string", + "logical" = "boolean")) diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 327a369a701a..2001f6daaa13 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -630,11 +630,16 @@ test_that("names() colnames() set the column names", { colnames(df) <- c("col3", "col4") expect_equal(names(df)[1], "col3") - # Test base::colnames + # Test base::colnames base::names m2 <- cbind(1, 1:4) expect_equal(colnames(m2, do.NULL = FALSE), c("col1", "col2")) colnames(m2) <- c("x","Y") expect_equal(colnames(m2), c("x", "Y")) + + z <- list(a = 1, b = "c", c = 1:3) + expect_equal(names(z)[3], "c") + names(z)[3] <- "c2" + expect_equal(names(z)[3], "c2") }) test_that("head() and first() return the correct data", {