From 41b5673bdd416e9a722617b96b0abc114992bbe2 Mon Sep 17 00:00:00 2001 From: felixcheung Date: Thu, 1 Oct 2015 18:39:44 -0700 Subject: [PATCH 1/4] Fix to allow `select(df, c("col1", "col2"))` --- R/pkg/R/DataFrame.R | 14 +++++++++----- R/pkg/inst/tests/test_sparkSQL.R | 6 +++++- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 65e368c47dd81..01b5141f256d2 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1075,12 +1075,16 @@ setMethod("subset", signature(x = "DataFrame"), #' select(df, c("col1", "col2")) #' select(df, list(df$name, df$age + 1)) #' # Similar to R data frames columns can also be selected using `$` -#' df$age +#' df[,df$age] #' } setMethod("select", signature(x = "DataFrame", col = "character"), function(x, col, ...) { - sdf <- callJMethod(x@sdf, "select", col, list(...)) - dataFrame(sdf) + if (length(col) > 1) { + select(x, as.list(col)) + } else { + sdf <- callJMethod(x@sdf, "select", col, list(...)) + dataFrame(sdf) + } }) #' @rdname select @@ -1853,13 +1857,13 @@ setMethod("crosstab", #' This function downloads the contents of a DataFrame into an R's data.frame. #' Since data.frames are held in memory, ensure that you have enough memory #' in your system to accommodate the contents. -#' +#' #' @title Download data from a DataFrame into a data.frame #' @param x a DataFrame #' @return a data.frame #' @rdname as.data.frame #' @examples \dontrun{ -#' +#' #' irisDF <- createDataFrame(sqlContext, iris) #' df <- as.data.frame(irisDF[irisDF$Species == "setosa", ]) #' } diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 8f85eecbc4a97..43a86515f2f0f 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -673,6 +673,10 @@ test_that("select with column", { expect_equal(columns(df3), c("x")) expect_equal(count(df3), 3) expect_equal(collect(select(df3, "x"))[[1, 1]], "x") + + df4 <- select(df, c("name", "age")) + expect_equal(columns(df4), c("name", "age")) + expect_equal(count(df4), 3) }) test_that("subsetting", { @@ -1336,4 +1340,4 @@ test_that("Method as.data.frame as a synonym for collect()", { unlink(parquetPath) unlink(jsonPath) -unlink(jsonPathNa) \ No newline at end of file +unlink(jsonPathNa) From dfac25cbdd3dc233868fb09b807b86744be17fd4 Mon Sep 17 00:00:00 2001 From: felixcheung Date: Fri, 2 Oct 2015 14:57:19 -0700 Subject: [PATCH 2/4] Add checks for varargs --- R/pkg/R/DataFrame.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 01b5141f256d2..14aea923fcfef 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1080,6 +1080,10 @@ setMethod("subset", signature(x = "DataFrame"), setMethod("select", signature(x = "DataFrame", col = "character"), function(x, col, ...) { if (length(col) > 1) { + if (length(list(...)) > 0) { + stop("To select multiple columns, use a character vector or list for col") + } + select(x, as.list(col)) } else { sdf <- callJMethod(x@sdf, "select", col, list(...)) From d4dd39cd7b86c1644d4cb694ce9a795a425bba87 Mon Sep 17 00:00:00 2001 From: felixcheung Date: Fri, 2 Oct 2015 17:59:49 -0700 Subject: [PATCH 3/4] add test --- R/pkg/inst/tests/test_sparkSQL.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 43a86515f2f0f..7328476e63c74 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -677,6 +677,8 @@ test_that("select with column", { df4 <- select(df, c("name", "age")) expect_equal(columns(df4), c("name", "age")) expect_equal(count(df4), 3) + + expect_error(select(df, c("name", "age"), "name"), "To select multiple columns, use a character vector or list for col") }) test_that("subsetting", { From 4f8f1705da3d566fed21e364f81ee24a9dadcf9c Mon Sep 17 00:00:00 2001 From: felixcheung Date: Fri, 2 Oct 2015 18:21:08 -0700 Subject: [PATCH 4/4] fix for style test --- R/pkg/inst/tests/test_sparkSQL.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 7328476e63c74..faf42b7182c30 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -678,7 +678,8 @@ test_that("select with column", { expect_equal(columns(df4), c("name", "age")) expect_equal(count(df4), 3) - expect_error(select(df, c("name", "age"), "name"), "To select multiple columns, use a character vector or list for col") + expect_error(select(df, c("name", "age"), "name"), + "To select multiple columns, use a character vector or list for col") }) test_that("subsetting", {