Skip to content

Commit e7e471c

Browse files
committed
Add spark_partition_id in SparkR
1 parent ce3b98b commit e7e471c

File tree

4 files changed

+27
-0
lines changed

4 files changed

+27
-0
lines changed

R/pkg/NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ exportMethods("%in%",
258258
"skewness",
259259
"sort_array",
260260
"soundex",
261+
"spark_partition_id",
261262
"stddev",
262263
"stddev_pop",
263264
"stddev_samp",

R/pkg/R/functions.R

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1179,6 +1179,27 @@ setMethod("soundex",
11791179
column(jc)
11801180
})
11811181

1182+
#' spark_partition_id
1183+
#'
1184+
#' Return the column for partition ID of the Spark task.
1185+
#' Note that this is indeterministic because it depends on data partitioning and
1186+
#' task scheduling.
1187+
#'
1188+
#' This is equivalent to the SPARK_PARTITION_ID function in SQL.
1189+
#'
1190+
#' @rdname spark_partition_id
1191+
#' @name spark_partition_id
1192+
#' @export
1193+
#' @examples
1194+
#' \dontrun{select(df, spark_partition_id())}
1195+
#' @note spark_partition_id since 2.0.0
1196+
setMethod("spark_partition_id",
1197+
signature(x = "missing"),
1198+
function() {
1199+
jc <- callJStatic("org.apache.spark.sql.functions", "spark_partition_id")
1200+
column(jc)
1201+
})
1202+
11821203
#' @rdname sd
11831204
#' @name stddev
11841205
setMethod("stddev",

R/pkg/R/generics.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1126,6 +1126,10 @@ setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array")
11261126
#' @export
11271127
setGeneric("soundex", function(x) { standardGeneric("soundex") })
11281128

1129+
#' @rdname spark_partition_id
1130+
#' @export
1131+
setGeneric("spark_partition_id", function(x) { standardGeneric("spark_partition_id") })
1132+
11291133
#' @rdname sd
11301134
#' @export
11311135
setGeneric("stddev", function(x) { standardGeneric("stddev") })

R/pkg/inst/tests/testthat/test_sparkSQL.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,6 +1058,7 @@ test_that("column functions", {
10581058
c16 <- is.nan(c) + isnan(c) + isNaN(c)
10591059
c17 <- cov(c, c1) + cov("c", "c1") + covar_samp(c, c1) + covar_samp("c", "c1")
10601060
c18 <- covar_pop(c, c1) + covar_pop("c", "c1")
1061+
c19 <- spark_partition_id()
10611062

10621063
# Test if base::is.nan() is exposed
10631064
expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))

0 commit comments

Comments
 (0)