[SPARK-12025][SPARKR] Rename some window rank function names for SparkR

yanboliang · shivaram · commit ba02f6cb5a40 · 2015-11-27T11:48:01.000-08:00
Change ```cumeDist -> cume_dist, denseRank -> dense_rank, percentRank -> percent_rank, rowNumber -> row_number``` at SparkR side. There are two reasons that we should make this change: * We should follow the [naming convention rule of R](http://www.inside-r.org/node/230645) * Spark DataFrame has deprecated the old convention (such as ```cumeDist```) and will remove it in Spark 2.0. It's better to fix this issue before 1.6 release, otherwise we will make breaking API change. cc shivaram sun-rui Author: Yanbo Liang <ybliang8@gmail.com> Closes #10016 from yanboliang/SPARK-12025.
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
@@ -123,14 +123,14 @@ exportMethods("%in%",
               "count",
               "countDistinct",
               "crc32",
-              "cumeDist",
+              "cume_dist",
               "date_add",
               "date_format",
               "date_sub",
               "datediff",
               "dayofmonth",
               "dayofyear",
-              "denseRank",
+              "dense_rank",
               "desc",
               "endsWith",
               "exp",
@@ -188,7 +188,7 @@ exportMethods("%in%",
               "next_day",
               "ntile",
               "otherwise",
-              "percentRank",
+              "percent_rank",
               "pmod",
               "quarter",
               "rand",
@@ -200,7 +200,7 @@ exportMethods("%in%",
               "rint",
               "rlike",
               "round",
-              "rowNumber",
+              "row_number",
               "rpad",
               "rtrim",
               "second",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
@@ -2146,47 +2146,47 @@ setMethod("ifelse",
 
 ###################### Window functions######################
 
-#' cumeDist
+#' cume_dist
 #'
 #' Window function: returns the cumulative distribution of values within a window partition,
 #' i.e. the fraction of rows that are below the current row.
 #'
 #'   N = total number of rows in the partition
-#'   cumeDist(x) = number of values before (and including) x / N
+#'   cume_dist(x) = number of values before (and including) x / N
 #'
 #' This is equivalent to the CUME_DIST function in SQL.
 #'
-#' @rdname cumeDist
-#' @name cumeDist
+#' @rdname cume_dist
+#' @name cume_dist
 #' @family window_funcs
 #' @export
-#' @examples \dontrun{cumeDist()}
-setMethod("cumeDist",
+#' @examples \dontrun{cume_dist()}
+setMethod("cume_dist",
           signature(x = "missing"),
           function() {
-            jc <- callJStatic("org.apache.spark.sql.functions", "cumeDist")
+            jc <- callJStatic("org.apache.spark.sql.functions", "cume_dist")
             column(jc)
           })
 
-#' denseRank
+#' dense_rank
 #'
 #' Window function: returns the rank of rows within a window partition, without any gaps.
-#' The difference between rank and denseRank is that denseRank leaves no gaps in ranking
-#' sequence when there are ties. That is, if you were ranking a competition using denseRank
+#' The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
+#' sequence when there are ties. That is, if you were ranking a competition using dense_rank
 #' and had three people tie for second place, you would say that all three were in second
 #' place and that the next person came in third.
 #'
 #' This is equivalent to the DENSE_RANK function in SQL.
 #'
-#' @rdname denseRank
-#' @name denseRank
+#' @rdname dense_rank
+#' @name dense_rank
 #' @family window_funcs
 #' @export
-#' @examples \dontrun{denseRank()}
-setMethod("denseRank",
+#' @examples \dontrun{dense_rank()}
+setMethod("dense_rank",
           signature(x = "missing"),
           function() {
-            jc <- callJStatic("org.apache.spark.sql.functions", "denseRank")
+            jc <- callJStatic("org.apache.spark.sql.functions", "dense_rank")
             column(jc)
           })
 
@@ -2264,7 +2264,7 @@ setMethod("ntile",
             column(jc)
           })
 
-#' percentRank
+#' percent_rank
 #'
 #' Window function: returns the relative rank (i.e. percentile) of rows within a window partition.
 #'
@@ -2274,15 +2274,15 @@ setMethod("ntile",
 #'
 #' This is equivalent to the PERCENT_RANK function in SQL.
 #'
-#' @rdname percentRank
-#' @name percentRank
+#' @rdname percent_rank
+#' @name percent_rank
 #' @family window_funcs
 #' @export
-#' @examples \dontrun{percentRank()}
-setMethod("percentRank",
+#' @examples \dontrun{percent_rank()}
+setMethod("percent_rank",
           signature(x = "missing"),
           function() {
-            jc <- callJStatic("org.apache.spark.sql.functions", "percentRank")
+            jc <- callJStatic("org.apache.spark.sql.functions", "percent_rank")
             column(jc)
           })
 
@@ -2316,21 +2316,21 @@ setMethod("rank",
             base::rank(x, ...)
           })
 
-#' rowNumber
+#' row_number
 #'
 #' Window function: returns a sequential number starting at 1 within a window partition.
 #'
 #' This is equivalent to the ROW_NUMBER function in SQL.
 #'
-#' @rdname rowNumber
-#' @name rowNumber
+#' @rdname row_number
+#' @name row_number
 #' @family window_funcs
 #' @export
-#' @examples \dontrun{rowNumber()}
-setMethod("rowNumber",
+#' @examples \dontrun{row_number()}
+setMethod("row_number",
           signature(x = "missing"),
           function() {
-            jc <- callJStatic("org.apache.spark.sql.functions", "rowNumber")
+            jc <- callJStatic("org.apache.spark.sql.functions", "row_number")
             column(jc)
           })
 
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
@@ -700,9 +700,9 @@ setGeneric("countDistinct", function(x, ...) { standardGeneric("countDistinct")
 #' @export
 setGeneric("crc32", function(x) { standardGeneric("crc32") })
 
-#' @rdname cumeDist
+#' @rdname cume_dist
 #' @export
-setGeneric("cumeDist", function(x) { standardGeneric("cumeDist") })
+setGeneric("cume_dist", function(x) { standardGeneric("cume_dist") })
 
 #' @rdname datediff
 #' @export
@@ -728,9 +728,9 @@ setGeneric("dayofmonth", function(x) { standardGeneric("dayofmonth") })
 #' @export
 setGeneric("dayofyear", function(x) { standardGeneric("dayofyear") })
 
-#' @rdname denseRank
+#' @rdname dense_rank
 #' @export
-setGeneric("denseRank", function(x) { standardGeneric("denseRank") })
+setGeneric("dense_rank", function(x) { standardGeneric("dense_rank") })
 
 #' @rdname explode
 #' @export
@@ -872,9 +872,9 @@ setGeneric("ntile", function(x) { standardGeneric("ntile") })
 #' @export
 setGeneric("n_distinct", function(x, ...) { standardGeneric("n_distinct") })
 
-#' @rdname percentRank
+#' @rdname percent_rank
 #' @export
-setGeneric("percentRank", function(x) { standardGeneric("percentRank") })
+setGeneric("percent_rank", function(x) { standardGeneric("percent_rank") })
 
 #' @rdname pmod
 #' @export
@@ -913,9 +913,9 @@ setGeneric("reverse", function(x) { standardGeneric("reverse") })
 #' @export
 setGeneric("rint", function(x, ...) { standardGeneric("rint") })
 
-#' @rdname rowNumber
+#' @rdname row_number
 #' @export
-setGeneric("rowNumber", function(x) { standardGeneric("rowNumber") })
+setGeneric("row_number", function(x) { standardGeneric("row_number") })
 
 #' @rdname rpad
 #' @export
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
@@ -861,8 +861,8 @@ test_that("column functions", {
   c11 <- to_date(c) + trim(c) + unbase64(c) + unhex(c) + upper(c)
   c12 <- variance(c)
   c13 <- lead("col", 1) + lead(c, 1) + lag("col", 1) + lag(c, 1)
-  c14 <- cumeDist() + ntile(1)
-  c15 <- denseRank() + percentRank() + rank() + rowNumber()
+  c14 <- cume_dist() + ntile(1)
+  c15 <- dense_rank() + percent_rank() + rank() + row_number()
 
   # Test if base::rank() is exposed
   expect_equal(class(rank())[[1]], "Column")