From 5003a8a66d9ab2e7c14c7d0e65118dce6ff11e6a Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Wed, 21 Oct 2015 23:48:43 -0700
Subject: [PATCH 1/9] Add support for colnames, colnames<-, coltypes<-

---
 R/pkg/NAMESPACE                  |  3 +-
 R/pkg/R/DataFrame.R              | 52 ++++++++++++++++++++++++++++++++
 R/pkg/R/generics.R               | 12 ++++++++
 R/pkg/inst/tests/test_sparkSQL.R | 24 +++++++++++++++
 4 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 5d04dd6acaab..ae7b4c2b42e8 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -27,6 +27,7 @@ exportMethods("arrange",
               "attach",
               "cache",
               "collect",
+              "colnames",
               "coltypes",
               "columns",
               "count",
@@ -276,4 +277,4 @@ export("structField",
        "structType",
        "structType.jobj",
        "structType.structField",
-       "print.structType")
\ No newline at end of file
+       "print.structType")
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 8a13e7a36766..1d762a7ecc2f 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -254,6 +254,7 @@ setMethod("dtypes",
 #' @family DataFrame functions
 #' @rdname columns
 #' @name columns
+
 #' @export
 #' @examples
 #'\dontrun{
@@ -290,6 +291,57 @@ setMethod("names<-",
             }
           })
 
+#' @rdname columns
+#' @name colnames
+setMethod("colnames",
+          signature(x = "DataFrame"),
+          function(x) {
+            columns(x)
+          })
+
+#' @rdname columns
+#' @name colnames<-
+setMethod("colnames<-",
+          signature(x = "DataFrame", value = "character"),
+          function(x, value) {
+            sdf <- callJMethod(x@sdf, "toDF", as.list(value))
+            dataFrame(sdf)
+          })
+
+#' coltypes
+#'
+#' Set the column types of a DataFrame.
+#'
+#' @name coltypes
+#' @param x (DataFrame)
+#' @return value (character) A character vector with the target column types for the given DataFrame
+#' @rdname coltypes
+#' @aliases coltypes
+#' @export
+#' @examples
+#'\dontrun{
+#' sc <- sparkR.init()
+#' sqlContext <- sparkRSQL.init(sc)
+#' path <- "path/to/file.json"
+#' df <- jsonFile(sqlContext, path)
+#' coltypes(df) <- c("string", "integer")
+#'}
+setMethod("coltypes<-",
+          signature(x = "DataFrame", value = "character"),
+          function(x, value) {
+            cols <- columns(x)
+            ncols <- length(cols)
+            if (length(value) == 0 || length(value) != ncols) {
+              stop("Length of type vector should match the number of columns for DataFrame")
+            }
+            newCols <- lapply(seq_len(ncols), function(i) {
+              col <- getColumn(x, cols[i])
+              cast(col, value[i])
+            })
+            nx <- select(x, newCols)
+            dataFrame(nx@sdf)
+          })
+
 #' Register Temporary Table
 #'
 #' Registers a DataFrame as a Temporary Table in the SQLContext
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 0c305441e043..ebea87f573d9 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -385,6 +385,18 @@ setGeneric("agg", function (x, ...) { standardGeneric("agg") })
 #' @export
 setGeneric("arrange", function(x, col, ...) { standardGeneric("arrange") })
 
+#' @rdname colnames
+#' @export
+setGeneric("colnames", function(x) { standardGeneric("colnames") })
+
+#' @rdname colnames<-
+#' @export
+setGeneric("colnames<-", function(x, value) { standardGeneric("colnames<-") })
+
+#' @rdname coltypes<-
+#' @export
+setGeneric("coltypes<-", function(x, value) { standardGeneric("coltypes<-") })
+
 #' @rdname schema
 #' @export
 setGeneric("columns", function(x) {standardGeneric("columns") })
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 0fbe0658265b..d4db19cbf41b 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -622,6 +622,30 @@ test_that("schema(), dtypes(), columns(), names() return the correct values/form
   expect_equal(testNames[2], "name")
 })
 
+test_that("names() colnames() set the column names", {
+  df <- jsonFile(sqlContext, jsonPath)
+  names(df) <- c("col1", "col2")
+  expect_equal(colnames(df)[2], "col2")
+
+  colnames(df) <- c("col3", "col4")
+  expect_equal(names(df)[1], "col3")
+})
+
+test_that("coltypes() set the column types", {
+  df <- selectExpr(jsonFile(sqlContext, jsonPath), "name", "(age * 1.21) as age")
+  expect_equal(dtypes(df), list(c("name", "string"), c("age", "decimal(24,2)")))
+
+  df1 <- select(df, cast(df$age, "integer"))
+  coltypes(df) <- c("string", "integer")
+  expect_equal(dtypes(df), list(c("cast(name as string)", "string"), c("cast(age as int)", "int")))
+  value <- collect(df[, 2])[[3, 1]]
+  expect_equal(value, collect(df1)[[3, 1]])
+  expect_equal(value, 22)
+
+  expect_error(coltypes(df) <- c("string"),
+               "Length of type vector should match the number of columns for DataFrame")
+})
+
 test_that("head() and first() return the correct data", {
   df <- jsonFile(sqlContext, jsonPath)
   testHead <- head(df)

From 033e91690161a953e6cd2e6cd6b60d0b89c148b7 Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Sun, 1 Nov 2015 12:33:28 -0800
Subject: [PATCH 2/9] Take R types instead to map to JVM types, add check for
 NA to keep column

---
 R/pkg/R/DataFrame.R              | 24 +++++++++++++++++++++---
 R/pkg/inst/tests/test_sparkSQL.R | 10 ++++++++--
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 1d762a7ecc2f..a46019de66b2 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -308,13 +308,22 @@ setMethod("colnames<-",
             dataFrame(sdf)
           })
 
+rToScalaTypes <- new.env()
+rToScalaTypes[["integer"]]   <- "integer" # in R, integer is 32bit
+rToScalaTypes[["numeric"]]   <- "double"  # in R, numeric == double which is 64bit
+rToScalaTypes[["double"]]    <- "double"
+rToScalaTypes[["character"]] <- "string"
+rToScalaTypes[["logical"]]   <- "boolean"
+
 #' coltypes
 #'
 #' Set the column types of a DataFrame.
 #'
 #' @name coltypes
 #' @param x (DataFrame)
-#' @return value (character) A character vector with the target column types for the given DataFrame
+#' @return value (character) A character vector with the target column types for the given
+#'    DataFrame. Column types can be one of integer, numeric/double, character, logical, or NA
+#'    to keep that column as-is.
 #' @rdname coltypes
 #' @aliases coltypes
 #' @export
@@ -324,7 +333,8 @@ setMethod("colnames<-",
 #' sqlContext <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
 #' df <- jsonFile(sqlContext, path)
-#' coltypes(df) <- c("string", "integer")
+#' coltypes(df) <- c("character", "integer")
+#' coltypes(df) <- c(NA, "numeric")
 #'}
 setMethod("coltypes<-",
           signature(x = "DataFrame", value = "character"),
@@ -336,7 +346,15 @@ setMethod("coltypes<-",
             }
             newCols <- lapply(seq_len(ncols), function(i) {
               col <- getColumn(x, cols[i])
-              cast(col, value[i])
+              if (!is.na(value[i])) {
+                stype <- rToScalaTypes[[value[i]]]
+                if (is.null(stype)) {
+                  stop("Only atomic type is supported for column types")
+                }
+                cast(col, stype)
+              } else {
+                col
+              }
             })
             nx <- select(x, newCols)
             dataFrame(nx@sdf)
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index d4db19cbf41b..ffdec7070129 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -636,14 +636,20 @@ test_that("coltypes() set the column types", {
   expect_equal(dtypes(df), list(c("name", "string"), c("age", "decimal(24,2)")))
 
   df1 <- select(df, cast(df$age, "integer"))
-  coltypes(df) <- c("string", "integer")
+  coltypes(df) <- c("character", "integer")
   expect_equal(dtypes(df), list(c("cast(name as string)", "string"), c("cast(age as int)", "int")))
   value <- collect(df[, 2])[[3, 1]]
   expect_equal(value, collect(df1)[[3, 1]])
   expect_equal(value, 22)
 
-  expect_error(coltypes(df) <- c("string"),
+  coltypes(df) <- c(NA, "numeric")
+  expect_equal(dtypes(df), list(c("cast(name as string)", "string"),
+              c("cast(cast(age as int) as double)", "double")))
+
+  expect_error(coltypes(df) <- c("character"),
                "Length of type vector should match the number of columns for DataFrame")
+  expect_error(coltypes(df) <- c("environment", "list"),
+               "Only atomic type is supported for column types")
 })
 
 test_that("head() and first() return the correct data", {

From f2b5d02397cb5bb664a2d9cc2372c60090df529f Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Sun, 1 Nov 2015 13:27:10 -0800
Subject: [PATCH 3/9] This seems to fix the Rd error - no idea why it worked
 before.

---
 R/pkg/R/generics.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index ebea87f573d9..9f5c84baeb5e 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -385,15 +385,15 @@ setGeneric("agg", function (x, ...) { standardGeneric("agg") })
 #' @export
 setGeneric("arrange", function(x, col, ...) { standardGeneric("arrange") })
 
-#' @rdname colnames
+#' @rdname columns
 #' @export
 setGeneric("colnames", function(x) { standardGeneric("colnames") })
 
-#' @rdname colnames<-
+#' @rdname columns
 #' @export
 setGeneric("colnames<-", function(x, value) { standardGeneric("colnames<-") })
 
-#' @rdname coltypes<-
+#' @rdname columns
 #' @export
 setGeneric("coltypes<-", function(x, value) { standardGeneric("coltypes<-") })
 

From b0306c28e014f91f361640f08e6bd6f5e27eb6a2 Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Sun, 1 Nov 2015 16:33:40 -0800
Subject: [PATCH 4/9] fix test broken from column name change from cast

---
 R/pkg/inst/tests/test_sparkSQL.R | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index ffdec7070129..233b3c4d79fb 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -637,14 +637,13 @@ test_that("coltypes() set the column types", {
 
   df1 <- select(df, cast(df$age, "integer"))
   coltypes(df) <- c("character", "integer")
-  expect_equal(dtypes(df), list(c("cast(name as string)", "string"), c("cast(age as int)", "int")))
+  expect_equal(dtypes(df), list(c("name", "string"), c("age", "int")))
   value <- collect(df[, 2])[[3, 1]]
   expect_equal(value, collect(df1)[[3, 1]])
   expect_equal(value, 22)
 
   coltypes(df) <- c(NA, "numeric")
-  expect_equal(dtypes(df), list(c("cast(name as string)", "string"),
-              c("cast(cast(age as int) as double)", "double")))
+  expect_equal(dtypes(df), list(c("name", "string"), c("age", "double")))
 
   expect_error(coltypes(df) <- c("character"),
                "Length of type vector should match the number of columns for DataFrame")

From 9006a059c899fc656c04028b5b9c4a95c7c64d3e Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Wed, 11 Nov 2015 22:40:08 -0800
Subject: [PATCH 5/9] rebase, merge with coltypes change, fix generic, doc

---
 R/pkg/NAMESPACE                  |   1 +
 R/pkg/R/DataFrame.R              | 117 +++++++++++++++----------------
 R/pkg/R/generics.R               |  11 +--
 R/pkg/R/types.R                  |   8 +++
 R/pkg/inst/tests/test_sparkSQL.R |  44 ++++++------
 5 files changed, 97 insertions(+), 84 deletions(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index ae7b4c2b42e8..b6f08b78dffa 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -29,6 +29,7 @@ exportMethods("arrange",
               "collect",
               "colnames",
               "coltypes",
+              "coltypes<-",
               "columns",
               "count",
               "cov",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index a46019de66b2..0af3bb0be25f 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -263,6 +263,7 @@ setMethod("dtypes",
 #' path <- "path/to/file.json"
 #' df <- jsonFile(sqlContext, path)
 #' columns(df)
+#' colnames(df)
 #'}
 setMethod("columns",
           signature(x = "DataFrame"),
@@ -295,7 +296,7 @@ setMethod("names<-",
 #' @name colnames
 setMethod("colnames",
           signature(x = "DataFrame"),
-          function(x) {
+          function(x, do.NULL = TRUE, prefix = "col") {
             columns(x)
           })
 
@@ -308,24 +309,67 @@ setMethod("colnames<-",
             dataFrame(sdf)
           })
 
-rToScalaTypes <- new.env()
-rToScalaTypes[["integer"]]   <- "integer" # in R, integer is 32bit
-rToScalaTypes[["numeric"]]   <- "double"  # in R, numeric == double which is 64bit
-rToScalaTypes[["double"]]    <- "double"
-rToScalaTypes[["character"]] <- "string"
-rToScalaTypes[["logical"]]   <- "boolean"
+#' coltypes
+#'
+#' Get column types of a DataFrame
+#'
+#' @name coltypes
+#' @param x (DataFrame)
+#' @return value (character) A character vector with the column types of the given DataFrame
+#' @rdname coltypes
+#' @family dataframe_funcs
+#' @export
+#' @examples
+#'\dontrun{
+#' irisDF <- createDataFrame(sqlContext, iris)
+#' coltypes(irisDF)
+#'}
+setMethod("coltypes",
+          signature(x = "DataFrame"),
+          function(x) {
+            # Get the data types of the DataFrame by invoking dtypes() function
+            types <- sapply(dtypes(x), function(x) {x[[2]]})
+
+            # Map Spark data types into R's data types using DATA_TYPES environment
+            rTypes <- sapply(types, USE.NAMES=F, FUN=function(x) {
+              # Check for primitive types
+              type <- PRIMITIVE_TYPES[[x]]
+
+              if (is.null(type)) {
+                # Check for complex types
+                for (t in names(COMPLEX_TYPES)) {
+                  if (substring(x, 1, nchar(t)) == t) {
+                    type <- COMPLEX_TYPES[[t]]
+                    break
+                  }
+                }
+
+                if (is.null(type)) {
+                  stop(paste("Unsupported data type: ", x))
+                }
+              }
+              type
+            })
+
+            # Find which types don't have mapping to R
+            naIndices <- which(is.na(rTypes))
+
+            # Assign the original scala data types to the unmatched ones
+            rTypes[naIndices] <- types[naIndices]
+
+            rTypes
+          })
 
 #' coltypes
 #'
 #' Set the column types of a DataFrame.
 #'
-#' @name coltypes
+#' @name coltypes<-
 #' @param x (DataFrame)
-#' @return value (character) A character vector with the target column types for the given
+#' @param value (character) A character vector with the target column types for the given
 #'    DataFrame. Column types can be one of integer, numeric/double, character, logical, or NA
 #'    to keep that column as-is.
 #' @rdname coltypes
-#' @aliases coltypes
 #' @export
 #' @examples
 #'\dontrun{
@@ -341,7 +385,10 @@ setMethod("coltypes<-",
           function(x, value) {
             cols <- columns(x)
             ncols <- length(cols)
-            if (length(value) == 0 || length(value) != ncols) {
+            if (length(value) == 0) {
+              stop("Cannot set types of an empty DataFrame with no Column")
+            }
+            if (length(value) != ncols) {
               stop("Length of type vector should match the number of columns for DataFrame")
             }
             newCols <- lapply(seq_len(ncols), function(i) {
@@ -2173,51 +2220,3 @@ setMethod("with",
             eval(substitute(expr), envir = newEnv, enclos = newEnv)
           })
 
-#' Returns the column types of a DataFrame.
-#'
-#' @name coltypes
-#' @title Get column types of a DataFrame
-#' @family dataframe_funcs
-#' @param x (DataFrame)
-#' @return value (character) A character vector with the column types of the given DataFrame
-#' @rdname coltypes
-#' @examples \dontrun{
-#' irisDF <- createDataFrame(sqlContext, iris)
-#' coltypes(irisDF)
-#' }
-setMethod("coltypes",
-          signature(x = "DataFrame"),
-          function(x) {
-            # Get the data types of the DataFrame by invoking dtypes() function
-            types <- sapply(dtypes(x), function(x) {x[[2]]})
-
-            # Map Spark data types into R's data types using DATA_TYPES environment
-            rTypes <- sapply(types, USE.NAMES=F, FUN=function(x) {
-
-              # Check for primitive types
-              type <- PRIMITIVE_TYPES[[x]]
-
-              if (is.null(type)) {
-                # Check for complex types
-                for (t in names(COMPLEX_TYPES)) {
-                  if (substring(x, 1, nchar(t)) == t) {
-                    type <- COMPLEX_TYPES[[t]]
-                    break
-                  }
-                }
-
-                if (is.null(type)) {
-                  stop(paste("Unsupported data type: ", x))
-                }
-              }
-              type
-            })
-
-            # Find which types don't have mapping to R
-            naIndices <- which(is.na(rTypes))
-
-            # Assign the original scala data types to the unmatched ones
-            rTypes[naIndices] <- types[naIndices]
-
-            rTypes
-          })
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 9f5c84baeb5e..c3d9abf86db0 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -387,13 +387,17 @@ setGeneric("arrange", function(x, col, ...) { standardGeneric("arrange") })
 
 #' @rdname columns
 #' @export
-setGeneric("colnames", function(x) { standardGeneric("colnames") })
+setGeneric("colnames", function(x, do.NULL = TRUE, prefix = "col") { standardGeneric("colnames") })
 
 #' @rdname columns
 #' @export
 setGeneric("colnames<-", function(x, value) { standardGeneric("colnames<-") })
 
-#' @rdname columns
+#' @rdname coltypes
+#' @export
+setGeneric("coltypes", function(x) { standardGeneric("coltypes") })
+
+#' @rdname coltypes
 #' @export
 setGeneric("coltypes<-", function(x, value) { standardGeneric("coltypes<-") })
 
@@ -1094,6 +1098,3 @@ setGeneric("attach")
 #' @export
 setGeneric("with")
 
-#' @rdname coltypes
-#' @export
-setGeneric("coltypes", function(x) { standardGeneric("coltypes") })
diff --git a/R/pkg/R/types.R b/R/pkg/R/types.R
index 1828c23ab0f6..4b69589dfa24 100644
--- a/R/pkg/R/types.R
+++ b/R/pkg/R/types.R
@@ -41,3 +41,11 @@ COMPLEX_TYPES <- list(
 
 # The full list of data types.
 DATA_TYPES <- as.environment(c(as.list(PRIMITIVE_TYPES), COMPLEX_TYPES))
+
+# An environment for mapping R to Scala, names are R types and values are Scala types.
+rToScalaTypes <- new.env()
+rToScalaTypes[["integer"]]   <- "integer" # in R, integer is 32bit
+rToScalaTypes[["numeric"]]   <- "double"  # in R, numeric == double which is 64bit
+rToScalaTypes[["double"]]    <- "double"
+rToScalaTypes[["character"]] <- "string"
+rToScalaTypes[["logical"]]   <- "boolean"
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 233b3c4d79fb..327a369a701a 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -629,26 +629,12 @@ test_that("names() colnames() set the column names", {
 
   colnames(df) <- c("col3", "col4")
   expect_equal(names(df)[1], "col3")
-})
-
-test_that("coltypes() set the column types", {
-  df <- selectExpr(jsonFile(sqlContext, jsonPath), "name", "(age * 1.21) as age")
-  expect_equal(dtypes(df), list(c("name", "string"), c("age", "decimal(24,2)")))
-
-  df1 <- select(df, cast(df$age, "integer"))
-  coltypes(df) <- c("character", "integer")
-  expect_equal(dtypes(df), list(c("name", "string"), c("age", "int")))
-  value <- collect(df[, 2])[[3, 1]]
-  expect_equal(value, collect(df1)[[3, 1]])
-  expect_equal(value, 22)
-
-  coltypes(df) <- c(NA, "numeric")
-  expect_equal(dtypes(df), list(c("name", "string"), c("age", "double")))
 
-  expect_error(coltypes(df) <- c("character"),
-               "Length of type vector should match the number of columns for DataFrame")
-  expect_error(coltypes(df) <- c("environment", "list"),
-               "Only atomic type is supported for column types")
+  # Test base::colnames
+  m2 <- cbind(1, 1:4)
+  expect_equal(colnames(m2, do.NULL = FALSE), c("col1", "col2"))
+  colnames(m2) <- c("x","Y")
+  expect_equal(colnames(m2), c("x", "Y"))
 })
 
 test_that("head() and first() return the correct data", {
@@ -1645,7 +1631,7 @@ test_that("with() on a DataFrame", {
   expect_equal(nrow(sum2), 35)
 })
 
-test_that("Method coltypes() to get R's data types of a DataFrame", {
+test_that("Method coltypes() to get and set R's data types of a DataFrame", {
   expect_equal(coltypes(irisDF), c(rep("numeric", 4), "character"))
 
   data <- data.frame(c1=c(1,2,3),
@@ -1664,6 +1650,24 @@ test_that("Method coltypes() to get R's data types of a DataFrame", {
   x <- createDataFrame(sqlContext, list(list(as.environment(
     list("a"="b", "c"="d", "e"="f")))))
   expect_equal(coltypes(x), "map<string,string>")
+
+  df <- selectExpr(jsonFile(sqlContext, jsonPath), "name", "(age * 1.21) as age")
+  expect_equal(dtypes(df), list(c("name", "string"), c("age", "decimal(24,2)")))
+
+  df1 <- select(df, cast(df$age, "integer"))
+  coltypes(df) <- c("character", "integer")
+  expect_equal(dtypes(df), list(c("name", "string"), c("age", "int")))
+  value <- collect(df[, 2])[[3, 1]]
+  expect_equal(value, collect(df1)[[3, 1]])
+  expect_equal(value, 22)
+
+  coltypes(df) <- c(NA, "numeric")
+  expect_equal(dtypes(df), list(c("name", "string"), c("age", "double")))
+
+  expect_error(coltypes(df) <- c("character"),
+               "Length of type vector should match the number of columns for DataFrame")
+  expect_error(coltypes(df) <- c("environment", "list"),
+               "Only atomic type is supported for column types")
 })
 
 unlink(parquetPath)

From 2c71790c9f73de7f6834724779c905c531529462 Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Thu, 12 Nov 2015 20:39:31 -0800
Subject: [PATCH 6/9] fix r doc family tag

---
 R/pkg/R/DataFrame.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 0af3bb0be25f..91378095f3e8 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -317,7 +317,7 @@ setMethod("colnames<-",
 #' @param x (DataFrame)
 #' @return value (character) A character vector with the column types of the given DataFrame
 #' @rdname coltypes
-#' @family dataframe_funcs
+#' @family DataFrame functions
 #' @export
 #' @examples
 #'\dontrun{

From 1ac49c055854d7445d1ede032a340f8e02ebb894 Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Thu, 19 Nov 2015 17:02:57 -0800
Subject: [PATCH 7/9] rebase, changes from comment

---
 R/pkg/R/DataFrame.R | 17 ++++++++---------
 R/pkg/R/types.R     | 12 ++++++------
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 91378095f3e8..f89e2682d9e2 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -296,7 +296,7 @@ setMethod("names<-",
 #' @name colnames
 setMethod("colnames",
           signature(x = "DataFrame"),
-          function(x, do.NULL = TRUE, prefix = "col") {
+          function(x) {
             columns(x)
           })
 
@@ -313,10 +313,10 @@ setMethod("colnames<-",
 #'
 #' Get column types of a DataFrame
 #'
-#' @name coltypes
-#' @param x (DataFrame)
-#' @return value (character) A character vector with the column types of the given DataFrame
+#' @param x A SparkSQL DataFrame
+#' @return value A character vector with the column types of the given DataFrame
 #' @rdname coltypes
+#' @name coltypes
 #' @family DataFrame functions
 #' @export
 #' @examples
@@ -364,12 +364,12 @@ setMethod("coltypes",
 #'
 #' Set the column types of a DataFrame.
 #'
-#' @name coltypes<-
-#' @param x (DataFrame)
-#' @param value (character) A character vector with the target column types for the given
+#' @param x A SparkSQL DataFrame
+#' @param value A character vector with the target column types for the given
 #'    DataFrame. Column types can be one of integer, numeric/double, character, logical, or NA
 #'    to keep that column as-is.
 #' @rdname coltypes
+#' @name coltypes<-
 #' @export
 #' @examples
 #'\dontrun{
@@ -394,7 +394,7 @@ setMethod("coltypes<-",
             newCols <- lapply(seq_len(ncols), function(i) {
               col <- getColumn(x, cols[i])
               if (!is.na(value[i])) {
-                stype <- rToScalaTypes[[value[i]]]
+                stype <- rToSQLTypes[[value[i]]]
                 if (is.null(stype)) {
                   stop("Only atomic type is supported for column types")
                 }
@@ -2219,4 +2219,3 @@ setMethod("with",
             newEnv <- assignNewEnv(data)
             eval(substitute(expr), envir = newEnv, enclos = newEnv)
           })
-
diff --git a/R/pkg/R/types.R b/R/pkg/R/types.R
index 4b69589dfa24..afdbc9d8a48a 100644
--- a/R/pkg/R/types.R
+++ b/R/pkg/R/types.R
@@ -43,9 +43,9 @@ COMPLEX_TYPES <- list(
 DATA_TYPES <- as.environment(c(as.list(PRIMITIVE_TYPES), COMPLEX_TYPES))
 
 # An environment for mapping R to Scala, names are R types and values are Scala types.
-rToScalaTypes <- new.env()
-rToScalaTypes[["integer"]]   <- "integer" # in R, integer is 32bit
-rToScalaTypes[["numeric"]]   <- "double"  # in R, numeric == double which is 64bit
-rToScalaTypes[["double"]]    <- "double"
-rToScalaTypes[["character"]] <- "string"
-rToScalaTypes[["logical"]]   <- "boolean"
+rToSQLTypes <- new.env()
+rToSQLTypes[["integer"]]   <- "integer" # in R, integer is 32bit
+rToSQLTypes[["numeric"]]   <- "double"  # in R, numeric == double which is 64bit
+rToSQLTypes[["double"]]    <- "double"
+rToSQLTypes[["character"]] <- "string"
+rToSQLTypes[["logical"]]   <- "boolean"

From e399acd4f1605d883d2cbe8e194f1c1125095c30 Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Thu, 19 Nov 2015 17:16:49 -0800
Subject: [PATCH 8/9] should not have ignored this file, which breaks style
 test

---
 R/pkg/R/generics.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index c3d9abf86db0..711ce38f9e10 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1097,4 +1097,3 @@ setGeneric("attach")
 #' @rdname with
 #' @export
 setGeneric("with")
-

From d555cf89fa743f51a7d0e75ed9afda0775540027 Mon Sep 17 00:00:00 2001
From: felixcheung <felixcheung_m@hotmail.com>
Date: Sat, 28 Nov 2015 17:15:35 -0800
Subject: [PATCH 9/9] update from feedback, add tests

---
 R/pkg/NAMESPACE                  |  2 ++
 R/pkg/R/types.R                  | 12 ++++++------
 R/pkg/inst/tests/test_sparkSQL.R |  7 ++++++-
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index b6f08b78dffa..43e5e0119e7f 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -28,6 +28,7 @@ exportMethods("arrange",
               "cache",
               "collect",
               "colnames",
+              "colnames<-",
               "coltypes",
               "coltypes<-",
               "columns",
@@ -58,6 +59,7 @@ exportMethods("arrange",
               "mutate",
               "na.omit",
               "names",
+              "names<-",
               "ncol",
               "nrow",
               "orderBy",
diff --git a/R/pkg/R/types.R b/R/pkg/R/types.R
index afdbc9d8a48a..dae4fe858bdb 100644
--- a/R/pkg/R/types.R
+++ b/R/pkg/R/types.R
@@ -43,9 +43,9 @@ COMPLEX_TYPES <- list(
 DATA_TYPES <- as.environment(c(as.list(PRIMITIVE_TYPES), COMPLEX_TYPES))
 
 # An environment for mapping R to Scala, names are R types and values are Scala types.
-rToSQLTypes <- new.env()
-rToSQLTypes[["integer"]]   <- "integer" # in R, integer is 32bit
-rToSQLTypes[["numeric"]]   <- "double"  # in R, numeric == double which is 64bit
-rToSQLTypes[["double"]]    <- "double"
-rToSQLTypes[["character"]] <- "string"
-rToSQLTypes[["logical"]]   <- "boolean"
+rToSQLTypes <- as.environment(list(
+  "integer"   = "integer", # in R, integer is 32bit
+  "numeric"   = "double",  # in R, numeric == double which is 64bit
+  "double"    = "double",
+  "character" = "string",
+  "logical"   = "boolean"))
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 327a369a701a..2001f6daaa13 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -630,11 +630,16 @@ test_that("names() colnames() set the column names", {
   colnames(df) <- c("col3", "col4")
   expect_equal(names(df)[1], "col3")
 
-  # Test base::colnames
+  # Test base::colnames base::names
   m2 <- cbind(1, 1:4)
   expect_equal(colnames(m2, do.NULL = FALSE), c("col1", "col2"))
   colnames(m2) <- c("x","Y")
   expect_equal(colnames(m2), c("x", "Y"))
+
+  z <- list(a = 1, b = "c", c = 1:3)
+  expect_equal(names(z)[3], "c")
+  names(z)[3] <- "c2"
+  expect_equal(names(z)[3], "c2")
 })
 
 test_that("head() and first() return the correct data", {