Skip to content

Commit ba02f6c

Browse files
yanboliangshivaram
authored andcommitted
[SPARK-12025][SPARKR] Rename some window rank function names for SparkR
Change ```cumeDist -> cume_dist, denseRank -> dense_rank, percentRank -> percent_rank, rowNumber -> row_number``` at SparkR side. There are two reasons that we should make this change: * We should follow the [naming convention rule of R](http://www.inside-r.org/node/230645) * Spark DataFrame has deprecated the old convention (such as ```cumeDist```) and will remove it in Spark 2.0. It's better to fix this issue before 1.6 release, otherwise we will make breaking API change. cc shivaram sun-rui Author: Yanbo Liang <[email protected]> Closes #10016 from yanboliang/SPARK-12025.
1 parent a374e20 commit ba02f6c

File tree

4 files changed

+41
-41
lines changed

4 files changed

+41
-41
lines changed

R/pkg/NAMESPACE

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,14 +123,14 @@ exportMethods("%in%",
123123
"count",
124124
"countDistinct",
125125
"crc32",
126-
"cumeDist",
126+
"cume_dist",
127127
"date_add",
128128
"date_format",
129129
"date_sub",
130130
"datediff",
131131
"dayofmonth",
132132
"dayofyear",
133-
"denseRank",
133+
"dense_rank",
134134
"desc",
135135
"endsWith",
136136
"exp",
@@ -188,7 +188,7 @@ exportMethods("%in%",
188188
"next_day",
189189
"ntile",
190190
"otherwise",
191-
"percentRank",
191+
"percent_rank",
192192
"pmod",
193193
"quarter",
194194
"rand",
@@ -200,7 +200,7 @@ exportMethods("%in%",
200200
"rint",
201201
"rlike",
202202
"round",
203-
"rowNumber",
203+
"row_number",
204204
"rpad",
205205
"rtrim",
206206
"second",

R/pkg/R/functions.R

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2146,47 +2146,47 @@ setMethod("ifelse",
21462146

21472147
###################### Window functions######################
21482148

2149-
#' cumeDist
2149+
#' cume_dist
21502150
#'
21512151
#' Window function: returns the cumulative distribution of values within a window partition,
21522152
#' i.e. the fraction of rows that are below the current row.
21532153
#'
21542154
#' N = total number of rows in the partition
2155-
#' cumeDist(x) = number of values before (and including) x / N
2155+
#' cume_dist(x) = number of values before (and including) x / N
21562156
#'
21572157
#' This is equivalent to the CUME_DIST function in SQL.
21582158
#'
2159-
#' @rdname cumeDist
2160-
#' @name cumeDist
2159+
#' @rdname cume_dist
2160+
#' @name cume_dist
21612161
#' @family window_funcs
21622162
#' @export
2163-
#' @examples \dontrun{cumeDist()}
2164-
setMethod("cumeDist",
2163+
#' @examples \dontrun{cume_dist()}
2164+
setMethod("cume_dist",
21652165
signature(x = "missing"),
21662166
function() {
2167-
jc <- callJStatic("org.apache.spark.sql.functions", "cumeDist")
2167+
jc <- callJStatic("org.apache.spark.sql.functions", "cume_dist")
21682168
column(jc)
21692169
})
21702170

2171-
#' denseRank
2171+
#' dense_rank
21722172
#'
21732173
#' Window function: returns the rank of rows within a window partition, without any gaps.
2174-
#' The difference between rank and denseRank is that denseRank leaves no gaps in ranking
2175-
#' sequence when there are ties. That is, if you were ranking a competition using denseRank
2174+
#' The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
2175+
#' sequence when there are ties. That is, if you were ranking a competition using dense_rank
21762176
#' and had three people tie for second place, you would say that all three were in second
21772177
#' place and that the next person came in third.
21782178
#'
21792179
#' This is equivalent to the DENSE_RANK function in SQL.
21802180
#'
2181-
#' @rdname denseRank
2182-
#' @name denseRank
2181+
#' @rdname dense_rank
2182+
#' @name dense_rank
21832183
#' @family window_funcs
21842184
#' @export
2185-
#' @examples \dontrun{denseRank()}
2186-
setMethod("denseRank",
2185+
#' @examples \dontrun{dense_rank()}
2186+
setMethod("dense_rank",
21872187
signature(x = "missing"),
21882188
function() {
2189-
jc <- callJStatic("org.apache.spark.sql.functions", "denseRank")
2189+
jc <- callJStatic("org.apache.spark.sql.functions", "dense_rank")
21902190
column(jc)
21912191
})
21922192

@@ -2264,7 +2264,7 @@ setMethod("ntile",
22642264
column(jc)
22652265
})
22662266

2267-
#' percentRank
2267+
#' percent_rank
22682268
#'
22692269
#' Window function: returns the relative rank (i.e. percentile) of rows within a window partition.
22702270
#'
@@ -2274,15 +2274,15 @@ setMethod("ntile",
22742274
#'
22752275
#' This is equivalent to the PERCENT_RANK function in SQL.
22762276
#'
2277-
#' @rdname percentRank
2278-
#' @name percentRank
2277+
#' @rdname percent_rank
2278+
#' @name percent_rank
22792279
#' @family window_funcs
22802280
#' @export
2281-
#' @examples \dontrun{percentRank()}
2282-
setMethod("percentRank",
2281+
#' @examples \dontrun{percent_rank()}
2282+
setMethod("percent_rank",
22832283
signature(x = "missing"),
22842284
function() {
2285-
jc <- callJStatic("org.apache.spark.sql.functions", "percentRank")
2285+
jc <- callJStatic("org.apache.spark.sql.functions", "percent_rank")
22862286
column(jc)
22872287
})
22882288

@@ -2316,21 +2316,21 @@ setMethod("rank",
23162316
base::rank(x, ...)
23172317
})
23182318

2319-
#' rowNumber
2319+
#' row_number
23202320
#'
23212321
#' Window function: returns a sequential number starting at 1 within a window partition.
23222322
#'
23232323
#' This is equivalent to the ROW_NUMBER function in SQL.
23242324
#'
2325-
#' @rdname rowNumber
2326-
#' @name rowNumber
2325+
#' @rdname row_number
2326+
#' @name row_number
23272327
#' @family window_funcs
23282328
#' @export
2329-
#' @examples \dontrun{rowNumber()}
2330-
setMethod("rowNumber",
2329+
#' @examples \dontrun{row_number()}
2330+
setMethod("row_number",
23312331
signature(x = "missing"),
23322332
function() {
2333-
jc <- callJStatic("org.apache.spark.sql.functions", "rowNumber")
2333+
jc <- callJStatic("org.apache.spark.sql.functions", "row_number")
23342334
column(jc)
23352335
})
23362336

R/pkg/R/generics.R

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -700,9 +700,9 @@ setGeneric("countDistinct", function(x, ...) { standardGeneric("countDistinct")
700700
#' @export
701701
setGeneric("crc32", function(x) { standardGeneric("crc32") })
702702

703-
#' @rdname cumeDist
703+
#' @rdname cume_dist
704704
#' @export
705-
setGeneric("cumeDist", function(x) { standardGeneric("cumeDist") })
705+
setGeneric("cume_dist", function(x) { standardGeneric("cume_dist") })
706706

707707
#' @rdname datediff
708708
#' @export
@@ -728,9 +728,9 @@ setGeneric("dayofmonth", function(x) { standardGeneric("dayofmonth") })
728728
#' @export
729729
setGeneric("dayofyear", function(x) { standardGeneric("dayofyear") })
730730

731-
#' @rdname denseRank
731+
#' @rdname dense_rank
732732
#' @export
733-
setGeneric("denseRank", function(x) { standardGeneric("denseRank") })
733+
setGeneric("dense_rank", function(x) { standardGeneric("dense_rank") })
734734

735735
#' @rdname explode
736736
#' @export
@@ -872,9 +872,9 @@ setGeneric("ntile", function(x) { standardGeneric("ntile") })
872872
#' @export
873873
setGeneric("n_distinct", function(x, ...) { standardGeneric("n_distinct") })
874874

875-
#' @rdname percentRank
875+
#' @rdname percent_rank
876876
#' @export
877-
setGeneric("percentRank", function(x) { standardGeneric("percentRank") })
877+
setGeneric("percent_rank", function(x) { standardGeneric("percent_rank") })
878878

879879
#' @rdname pmod
880880
#' @export
@@ -913,9 +913,9 @@ setGeneric("reverse", function(x) { standardGeneric("reverse") })
913913
#' @export
914914
setGeneric("rint", function(x, ...) { standardGeneric("rint") })
915915

916-
#' @rdname rowNumber
916+
#' @rdname row_number
917917
#' @export
918-
setGeneric("rowNumber", function(x) { standardGeneric("rowNumber") })
918+
setGeneric("row_number", function(x) { standardGeneric("row_number") })
919919

920920
#' @rdname rpad
921921
#' @export

R/pkg/inst/tests/test_sparkSQL.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -861,8 +861,8 @@ test_that("column functions", {
861861
c11 <- to_date(c) + trim(c) + unbase64(c) + unhex(c) + upper(c)
862862
c12 <- variance(c)
863863
c13 <- lead("col", 1) + lead(c, 1) + lag("col", 1) + lag(c, 1)
864-
c14 <- cumeDist() + ntile(1)
865-
c15 <- denseRank() + percentRank() + rank() + rowNumber()
864+
c14 <- cume_dist() + ntile(1)
865+
c15 <- dense_rank() + percent_rank() + rank() + row_number()
866866

867867
# Test if base::rank() is exposed
868868
expect_equal(class(rank())[[1]], "Column")

0 commit comments

Comments
 (0)