From 6805952ab77ec891ce4258437af28a36cfa8960e Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Mon, 30 Nov 2015 16:41:29 +0800 Subject: [PATCH 1/6] Fix usage of isnan, isnull, isNaN, isNull, isNotNull --- R/pkg/R/column.R | 2 +- R/pkg/R/functions.R | 30 +++++++++++++++++++++++------- R/pkg/R/generics.R | 12 ++++++++++-- R/pkg/inst/tests/test_sparkSQL.R | 2 +- 4 files changed, 35 insertions(+), 11 deletions(-) diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 20de3907b7dd..7bb8ef2595b5 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -56,7 +56,7 @@ operators <- list( "&" = "and", "|" = "or", #, "!" = "unary_$bang" "^" = "pow" ) -column_functions1 <- c("asc", "desc", "isNull", "isNotNull") +column_functions1 <- c("asc", "desc", "isNaN", "isNull", "isNotNull") column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", "getItem", "contains") createOperator <- function(op) { diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index b30331c61c9a..7e59eb8ba57d 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -488,19 +488,35 @@ setMethod("initcap", column(jc) }) -#' isNaN +#' isnan #' -#' Return true iff the column is NaN. +#' Return true if the column is NaN. #' -#' @rdname isNaN -#' @name isNaN +#' @rdname isnan +#' @name isnan #' @family normal_funcs #' @export -#' @examples \dontrun{isNaN(df$c)} -setMethod("isNaN", +#' @examples \dontrun{isnan(df$c)} +setMethod("isnan", signature(x = "Column"), function(x) { - jc <- callJStatic("org.apache.spark.sql.functions", "isNaN", x@jc) + jc <- callJStatic("org.apache.spark.sql.functions", "isnan", x@jc) + column(jc) + }) + +#' isnull +#' +#' Return true if the column is NULL. +#' +#' @rdname isnull +#' @name isnull +#' @family normal_funcs +#' @export +#' @examples \dontrun{isnull(df$c)} +setMethod("isnull", + signature(x = "Column"), + function(x) { + jc <- callJStatic("org.apache.spark.sql.functions", "isnull", x@jc) column(jc) }) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 711ce38f9e10..7746893384a4 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -621,6 +621,10 @@ setGeneric("getField", function(x, ...) { standardGeneric("getField") }) #' @export setGeneric("getItem", function(x, ...) { standardGeneric("getItem") }) +#' @rdname column +#' @export +setGeneric("isNaN", function(x) { standardGeneric("isNaN") }) + #' @rdname column #' @export setGeneric("isNull", function(x) { standardGeneric("isNull") }) @@ -796,9 +800,13 @@ setGeneric("initcap", function(x) { standardGeneric("initcap") }) #' @export setGeneric("instr", function(y, x) { standardGeneric("instr") }) -#' @rdname isNaN +#' @rdname isnan #' @export -setGeneric("isNaN", function(x) { standardGeneric("isNaN") }) +setGeneric("isnan", function(x) { standardGeneric("isnan") }) + +#' @rdname isnull +#' @export +setGeneric("isnull", function(x) { standardGeneric("isnull") }) #' @rdname kurtosis #' @export diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 92ec82096c6d..132a375c8003 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -878,7 +878,7 @@ test_that("column functions", { c2 <- avg(c) + base64(c) + bin(c) + bitwiseNOT(c) + cbrt(c) + ceil(c) + cos(c) c3 <- cosh(c) + count(c) + crc32(c) + exp(c) c4 <- explode(c) + expm1(c) + factorial(c) + first(c) + floor(c) + hex(c) - c5 <- hour(c) + initcap(c) + isNaN(c) + last(c) + last_day(c) + length(c) + c5 <- hour(c) + initcap(c) + isnan(c) + isnull(c) + last(c) + last_day(c) + length(c) c6 <- log(c) + (c) + log1p(c) + log2(c) + lower(c) + ltrim(c) + max(c) + md5(c) c7 <- mean(c) + min(c) + month(c) + negate(c) + quarter(c) c8 <- reverse(c) + rint(c) + round(c) + rtrim(c) + sha1(c) From 00ce43a8c4af99354c70ac1e1c14f27a6442287b Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Wed, 2 Dec 2015 18:15:58 +0800 Subject: [PATCH 2/6] add more test cases --- R/pkg/inst/tests/test_sparkSQL.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 132a375c8003..0352da55f082 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -878,7 +878,7 @@ test_that("column functions", { c2 <- avg(c) + base64(c) + bin(c) + bitwiseNOT(c) + cbrt(c) + ceil(c) + cos(c) c3 <- cosh(c) + count(c) + crc32(c) + exp(c) c4 <- explode(c) + expm1(c) + factorial(c) + first(c) + floor(c) + hex(c) - c5 <- hour(c) + initcap(c) + isnan(c) + isnull(c) + last(c) + last_day(c) + length(c) + c5 <- hour(c) + initcap(c) + last(c) + last_day(c) + length(c) c6 <- log(c) + (c) + log1p(c) + log2(c) + lower(c) + ltrim(c) + max(c) + md5(c) c7 <- mean(c) + min(c) + month(c) + negate(c) + quarter(c) c8 <- reverse(c) + rint(c) + round(c) + rtrim(c) + sha1(c) @@ -889,6 +889,7 @@ test_that("column functions", { c13 <- lead("col", 1) + lead(c, 1) + lag("col", 1) + lag(c, 1) c14 <- cume_dist() + ntile(1) c15 <- dense_rank() + percent_rank() + rank() + row_number() + c16 <- isnan(c) + isNaN(c) + isnull(c) + isNull(c) + isNotNull(c) # Test if base::rank() is exposed expect_equal(class(rank())[[1]], "Column") From 041c9c6ca79bd6179b785392a34ce2d80251ced4 Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Wed, 2 Dec 2015 21:54:29 +0800 Subject: [PATCH 3/6] Add alias is.nan --- R/pkg/R/functions.R | 22 +++++++++++++++++----- R/pkg/R/generics.R | 6 +++++- R/pkg/inst/tests/test_sparkSQL.R | 2 +- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 7e59eb8ba57d..c2145239918f 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -488,15 +488,27 @@ setMethod("initcap", column(jc) }) -#' isnan +#' is.nan #' -#' Return true if the column is NaN. +#' Return true if the column is NaN, alias for \link{isnan} #' -#' @rdname isnan -#' @name isnan +#' @rdname is.nan +#' @name is.nan #' @family normal_funcs #' @export -#' @examples \dontrun{isnan(df$c)} +#' @examples +#' \dontrun{ +#' is.nan(df$c) +#' isnan(df$c) +#' } +setMethod("is.nan", + signature(x = "Column"), + function(x) { + isnan(x) + }) + +#' @rdname is.nan +#' @name isnan setMethod("isnan", signature(x = "Column"), function(x) { diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 7746893384a4..134786fb230f 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -800,7 +800,11 @@ setGeneric("initcap", function(x) { standardGeneric("initcap") }) #' @export setGeneric("instr", function(y, x) { standardGeneric("instr") }) -#' @rdname isnan +#' @rdname is.nan +#' @export +setGeneric("is.nan") + +#' @rdname is.nan #' @export setGeneric("isnan", function(x) { standardGeneric("isnan") }) diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 0352da55f082..4ef429936b12 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -889,7 +889,7 @@ test_that("column functions", { c13 <- lead("col", 1) + lead(c, 1) + lag("col", 1) + lag(c, 1) c14 <- cume_dist() + ntile(1) c15 <- dense_rank() + percent_rank() + rank() + row_number() - c16 <- isnan(c) + isNaN(c) + isnull(c) + isNull(c) + isNotNull(c) + c16 <- is.nan(c) + isnan(c) + isnull(c) + isNaN(c) + isNull(c) + isNotNull(c) # Test if base::rank() is exposed expect_equal(class(rank())[[1]], "Column") From 10a5fe71958ce2c939d1f2e9b4d741ec22009bb8 Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Thu, 3 Dec 2015 15:17:13 +0800 Subject: [PATCH 4/6] Not to expose isnull --- R/pkg/R/generics.R | 8 -------- R/pkg/inst/tests/test_sparkSQL.R | 5 ++++- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 134786fb230f..ae78ae6c74d9 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -800,18 +800,10 @@ setGeneric("initcap", function(x) { standardGeneric("initcap") }) #' @export setGeneric("instr", function(y, x) { standardGeneric("instr") }) -#' @rdname is.nan -#' @export -setGeneric("is.nan") - #' @rdname is.nan #' @export setGeneric("isnan", function(x) { standardGeneric("isnan") }) -#' @rdname isnull -#' @export -setGeneric("isnull", function(x) { standardGeneric("isnull") }) - #' @rdname kurtosis #' @export setGeneric("kurtosis", function(x) { standardGeneric("kurtosis") }) diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index 4ef429936b12..a52ad6079fad 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -889,7 +889,10 @@ test_that("column functions", { c13 <- lead("col", 1) + lead(c, 1) + lag("col", 1) + lag(c, 1) c14 <- cume_dist() + ntile(1) c15 <- dense_rank() + percent_rank() + rank() + row_number() - c16 <- is.nan(c) + isnan(c) + isnull(c) + isNaN(c) + isNull(c) + isNotNull(c) + c16 <- is.nan(c) + isnan(c) + isNaN(c) + + # Test if base::is.nan() is exposed + expect_equal(is.nan(c(0/0, 1/0 - 1/0)), c(TRUE, TRUE)) # Test if base::rank() is exposed expect_equal(class(rank())[[1]], "Column") From 95fdd2c5cfe1cf1d5e44c6de677fc52cc361ce19 Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Thu, 3 Dec 2015 15:21:11 +0800 Subject: [PATCH 5/6] remove setMethod for isnull --- R/pkg/R/functions.R | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index c2145239918f..49488771a457 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -516,22 +516,6 @@ setMethod("isnan", column(jc) }) -#' isnull -#' -#' Return true if the column is NULL. -#' -#' @rdname isnull -#' @name isnull -#' @family normal_funcs -#' @export -#' @examples \dontrun{isnull(df$c)} -setMethod("isnull", - signature(x = "Column"), - function(x) { - jc <- callJStatic("org.apache.spark.sql.functions", "isnull", x@jc) - column(jc) - }) - #' kurtosis #' #' Aggregate function: returns the kurtosis of the values in a group. From 3ee7d5c37a0b3815c2ff139964775d23e593837b Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Thu, 3 Dec 2015 15:37:50 +0800 Subject: [PATCH 6/6] fix test case --- R/pkg/inst/tests/test_sparkSQL.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index a52ad6079fad..01ef5858a05e 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -892,7 +892,7 @@ test_that("column functions", { c16 <- is.nan(c) + isnan(c) + isNaN(c) # Test if base::is.nan() is exposed - expect_equal(is.nan(c(0/0, 1/0 - 1/0)), c(TRUE, TRUE)) + expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE)) # Test if base::rank() is exposed expect_equal(class(rank())[[1]], "Column")