Skip to content

Commit 6979edf

Browse files
yanboliangshivaram
authored andcommitted
[SPARK-12115][SPARKR] Change numPartitions() to getNumPartitions() to be consistent with Scala/Python
Change ```numPartitions()``` to ```getNumPartitions()``` to be consistent with Scala/Python. <del>Note: If we can not catch up with 1.6 release, it will be breaking change for 1.7 that we also need to explain in release note.<del> cc sun-rui felixcheung shivaram Author: Yanbo Liang <[email protected]> Closes #10123 from yanboliang/spark-12115.
1 parent 895b6c4 commit 6979edf

File tree

4 files changed

+45
-30
lines changed

4 files changed

+45
-30
lines changed

R/pkg/R/RDD.R

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -306,17 +306,28 @@ setMethod("checkpoint",
306306
#'\dontrun{
307307
#' sc <- sparkR.init()
308308
#' rdd <- parallelize(sc, 1:10, 2L)
309-
#' numPartitions(rdd) # 2L
309+
#' getNumPartitions(rdd) # 2L
310310
#'}
311-
#' @rdname numPartitions
311+
#' @rdname getNumPartitions
312+
#' @aliases getNumPartitions,RDD-method
313+
#' @noRd
314+
setMethod("getNumPartitions",
315+
signature(x = "RDD"),
316+
function(x) {
317+
callJMethod(getJRDD(x), "getNumPartitions")
318+
})
319+
320+
#' Gets the number of partitions of an RDD, the same as getNumPartitions.
321+
#' But this function has been deprecated, please use getNumPartitions.
322+
#'
323+
#' @rdname getNumPartitions
312324
#' @aliases numPartitions,RDD-method
313325
#' @noRd
314326
setMethod("numPartitions",
315327
signature(x = "RDD"),
316328
function(x) {
317-
jrdd <- getJRDD(x)
318-
partitions <- callJMethod(jrdd, "partitions")
319-
callJMethod(partitions, "size")
329+
.Deprecated("getNumPartitions")
330+
getNumPartitions(x)
320331
})
321332

322333
#' Collect elements of an RDD
@@ -443,7 +454,7 @@ setMethod("countByValue",
443454
signature(x = "RDD"),
444455
function(x) {
445456
ones <- lapply(x, function(item) { list(item, 1L) })
446-
collect(reduceByKey(ones, `+`, numPartitions(x)))
457+
collect(reduceByKey(ones, `+`, getNumPartitions(x)))
447458
})
448459

449460
#' Apply a function to all elements
@@ -759,7 +770,7 @@ setMethod("take",
759770
resList <- list()
760771
index <- -1
761772
jrdd <- getJRDD(x)
762-
numPartitions <- numPartitions(x)
773+
numPartitions <- getNumPartitions(x)
763774
serializedModeRDD <- getSerializedMode(x)
764775

765776
# TODO(shivaram): Collect more than one partition based on size
@@ -823,7 +834,7 @@ setMethod("first",
823834
#' @noRd
824835
setMethod("distinct",
825836
signature(x = "RDD"),
826-
function(x, numPartitions = SparkR:::numPartitions(x)) {
837+
function(x, numPartitions = SparkR:::getNumPartitions(x)) {
827838
identical.mapped <- lapply(x, function(x) { list(x, NULL) })
828839
reduced <- reduceByKey(identical.mapped,
829840
function(x, y) { x },
@@ -993,8 +1004,8 @@ setMethod("keyBy",
9931004
#'\dontrun{
9941005
#' sc <- sparkR.init()
9951006
#' rdd <- parallelize(sc, list(1, 2, 3, 4, 5, 6, 7), 4L)
996-
#' numPartitions(rdd) # 4
997-
#' numPartitions(repartition(rdd, 2L)) # 2
1007+
#' getNumPartitions(rdd) # 4
1008+
#' getNumPartitions(repartition(rdd, 2L)) # 2
9981009
#'}
9991010
#' @rdname repartition
10001011
#' @aliases repartition,RDD
@@ -1014,8 +1025,8 @@ setMethod("repartition",
10141025
#'\dontrun{
10151026
#' sc <- sparkR.init()
10161027
#' rdd <- parallelize(sc, list(1, 2, 3, 4, 5), 3L)
1017-
#' numPartitions(rdd) # 3
1018-
#' numPartitions(coalesce(rdd, 1L)) # 1
1028+
#' getNumPartitions(rdd) # 3
1029+
#' getNumPartitions(coalesce(rdd, 1L)) # 1
10191030
#'}
10201031
#' @rdname coalesce
10211032
#' @aliases coalesce,RDD
@@ -1024,7 +1035,7 @@ setMethod("coalesce",
10241035
signature(x = "RDD", numPartitions = "numeric"),
10251036
function(x, numPartitions, shuffle = FALSE) {
10261037
numPartitions <- numToInt(numPartitions)
1027-
if (shuffle || numPartitions > SparkR:::numPartitions(x)) {
1038+
if (shuffle || numPartitions > SparkR:::getNumPartitions(x)) {
10281039
func <- function(partIndex, part) {
10291040
set.seed(partIndex) # partIndex as seed
10301041
start <- as.integer(base::sample(numPartitions, 1) - 1)
@@ -1112,7 +1123,7 @@ setMethod("saveAsTextFile",
11121123
#' @noRd
11131124
setMethod("sortBy",
11141125
signature(x = "RDD", func = "function"),
1115-
function(x, func, ascending = TRUE, numPartitions = SparkR:::numPartitions(x)) {
1126+
function(x, func, ascending = TRUE, numPartitions = SparkR:::getNumPartitions(x)) {
11161127
values(sortByKey(keyBy(x, func), ascending, numPartitions))
11171128
})
11181129

@@ -1144,7 +1155,7 @@ takeOrderedElem <- function(x, num, ascending = TRUE) {
11441155
resList <- list()
11451156
index <- -1
11461157
jrdd <- getJRDD(newRdd)
1147-
numPartitions <- numPartitions(newRdd)
1158+
numPartitions <- getNumPartitions(newRdd)
11481159
serializedModeRDD <- getSerializedMode(newRdd)
11491160

11501161
while (TRUE) {
@@ -1368,7 +1379,7 @@ setMethod("setName",
13681379
setMethod("zipWithUniqueId",
13691380
signature(x = "RDD"),
13701381
function(x) {
1371-
n <- numPartitions(x)
1382+
n <- getNumPartitions(x)
13721383

13731384
partitionFunc <- function(partIndex, part) {
13741385
mapply(
@@ -1409,7 +1420,7 @@ setMethod("zipWithUniqueId",
14091420
setMethod("zipWithIndex",
14101421
signature(x = "RDD"),
14111422
function(x) {
1412-
n <- numPartitions(x)
1423+
n <- getNumPartitions(x)
14131424
if (n > 1) {
14141425
nums <- collect(lapplyPartition(x,
14151426
function(part) {
@@ -1521,8 +1532,8 @@ setMethod("unionRDD",
15211532
setMethod("zipRDD",
15221533
signature(x = "RDD", other = "RDD"),
15231534
function(x, other) {
1524-
n1 <- numPartitions(x)
1525-
n2 <- numPartitions(other)
1535+
n1 <- getNumPartitions(x)
1536+
n2 <- getNumPartitions(other)
15261537
if (n1 != n2) {
15271538
stop("Can only zip RDDs which have the same number of partitions.")
15281539
}
@@ -1588,7 +1599,7 @@ setMethod("cartesian",
15881599
#' @noRd
15891600
setMethod("subtract",
15901601
signature(x = "RDD", other = "RDD"),
1591-
function(x, other, numPartitions = SparkR:::numPartitions(x)) {
1602+
function(x, other, numPartitions = SparkR:::getNumPartitions(x)) {
15921603
mapFunction <- function(e) { list(e, NA) }
15931604
rdd1 <- map(x, mapFunction)
15941605
rdd2 <- map(other, mapFunction)
@@ -1620,7 +1631,7 @@ setMethod("subtract",
16201631
#' @noRd
16211632
setMethod("intersection",
16221633
signature(x = "RDD", other = "RDD"),
1623-
function(x, other, numPartitions = SparkR:::numPartitions(x)) {
1634+
function(x, other, numPartitions = SparkR:::getNumPartitions(x)) {
16241635
rdd1 <- map(x, function(v) { list(v, NA) })
16251636
rdd2 <- map(other, function(v) { list(v, NA) })
16261637

@@ -1661,7 +1672,7 @@ setMethod("zipPartitions",
16611672
if (length(rrdds) == 1) {
16621673
return(rrdds[[1]])
16631674
}
1664-
nPart <- sapply(rrdds, numPartitions)
1675+
nPart <- sapply(rrdds, getNumPartitions)
16651676
if (length(unique(nPart)) != 1) {
16661677
stop("Can only zipPartitions RDDs which have the same number of partitions.")
16671678
}

R/pkg/R/generics.R

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,11 @@ setGeneric("sumRDD", function(x) { standardGeneric("sumRDD") })
133133
# @export
134134
setGeneric("name", function(x) { standardGeneric("name") })
135135

136-
# @rdname numPartitions
136+
# @rdname getNumPartitions
137+
# @export
138+
setGeneric("getNumPartitions", function(x) { standardGeneric("getNumPartitions") })
139+
140+
# @rdname getNumPartitions
137141
# @export
138142
setGeneric("numPartitions", function(x) { standardGeneric("numPartitions") })
139143

R/pkg/R/pairRDD.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -750,7 +750,7 @@ setMethod("cogroup",
750750
#' @noRd
751751
setMethod("sortByKey",
752752
signature(x = "RDD"),
753-
function(x, ascending = TRUE, numPartitions = SparkR:::numPartitions(x)) {
753+
function(x, ascending = TRUE, numPartitions = SparkR:::getNumPartitions(x)) {
754754
rangeBounds <- list()
755755

756756
if (numPartitions > 1) {
@@ -818,7 +818,7 @@ setMethod("sortByKey",
818818
#' @noRd
819819
setMethod("subtractByKey",
820820
signature(x = "RDD", other = "RDD"),
821-
function(x, other, numPartitions = SparkR:::numPartitions(x)) {
821+
function(x, other, numPartitions = SparkR:::getNumPartitions(x)) {
822822
filterFunction <- function(elem) {
823823
iters <- elem[[2]]
824824
(length(iters[[1]]) > 0) && (length(iters[[2]]) == 0)

R/pkg/inst/tests/test_rdd.R

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ intPairs <- list(list(1L, -1), list(2L, 100), list(2L, 1), list(1L, 200))
2828
intRdd <- parallelize(sc, intPairs, 2L)
2929

3030
test_that("get number of partitions in RDD", {
31-
expect_equal(numPartitions(rdd), 2)
32-
expect_equal(numPartitions(intRdd), 2)
31+
expect_equal(getNumPartitions(rdd), 2)
32+
expect_equal(getNumPartitions(intRdd), 2)
3333
})
3434

3535
test_that("first on RDD", {
@@ -304,18 +304,18 @@ test_that("repartition/coalesce on RDDs", {
304304

305305
# repartition
306306
r1 <- repartition(rdd, 2)
307-
expect_equal(numPartitions(r1), 2L)
307+
expect_equal(getNumPartitions(r1), 2L)
308308
count <- length(collectPartition(r1, 0L))
309309
expect_true(count >= 8 && count <= 12)
310310

311311
r2 <- repartition(rdd, 6)
312-
expect_equal(numPartitions(r2), 6L)
312+
expect_equal(getNumPartitions(r2), 6L)
313313
count <- length(collectPartition(r2, 0L))
314314
expect_true(count >= 0 && count <= 4)
315315

316316
# coalesce
317317
r3 <- coalesce(rdd, 1)
318-
expect_equal(numPartitions(r3), 1L)
318+
expect_equal(getNumPartitions(r3), 1L)
319319
count <- length(collectPartition(r3, 0L))
320320
expect_equal(count, 20)
321321
})

0 commit comments

Comments
 (0)