Skip to content

Commit 9c36aa2

Browse files
zero323Felix Cheung
authored andcommitted
[SPARK-20585][SPARKR] R generic hint support
## What changes were proposed in this pull request? Adds support for generic hints on `SparkDataFrame` ## How was this patch tested? Unit tests, `check-cran.sh` Author: zero323 <[email protected]> Closes #17851 from zero323/SPARK-20585.
1 parent b8302cc commit 9c36aa2

File tree

4 files changed

+47
-0
lines changed

4 files changed

+47
-0
lines changed

R/pkg/NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ exportMethods("arrange",
123123
"group_by",
124124
"groupBy",
125125
"head",
126+
"hint",
126127
"insertInto",
127128
"intersect",
128129
"isLocal",

R/pkg/R/DataFrame.R

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3715,3 +3715,33 @@ setMethod("rollup",
37153715
sgd <- callJMethod(x@sdf, "rollup", jcol)
37163716
groupedData(sgd)
37173717
})
3718+
3719+
#' hint
3720+
#'
3721+
#' Specifies execution plan hint and return a new SparkDataFrame.
3722+
#'
3723+
#' @param x a SparkDataFrame.
3724+
#' @param name a name of the hint.
3725+
#' @param ... optional parameters for the hint.
3726+
#' @return A SparkDataFrame.
3727+
#' @family SparkDataFrame functions
3728+
#' @aliases hint,SparkDataFrame,character-method
3729+
#' @rdname hint
3730+
#' @name hint
3731+
#' @export
3732+
#' @examples
3733+
#' \dontrun{
3734+
#' df <- createDataFrame(mtcars)
3735+
#' avg_mpg <- mean(groupBy(createDataFrame(mtcars), "cyl"), "mpg")
3736+
#'
3737+
#' head(join(df, hint(avg_mpg, "broadcast"), df$cyl == avg_mpg$cyl))
3738+
#' }
3739+
#' @note hint since 2.2.0
3740+
setMethod("hint",
3741+
signature(x = "SparkDataFrame", name = "character"),
3742+
function(x, name, ...) {
3743+
parameters <- list(...)
3744+
stopifnot(all(sapply(parameters, is.character)))
3745+
jdf <- callJMethod(x@sdf, "hint", name, parameters)
3746+
dataFrame(jdf)
3747+
})

R/pkg/R/generics.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,10 @@ setGeneric("group_by", function(x, ...) { standardGeneric("group_by") })
576576
#' @export
577577
setGeneric("groupBy", function(x, ...) { standardGeneric("groupBy") })
578578

579+
#' @rdname hint
580+
#' @export
581+
setGeneric("hint", function(x, name, ...) { standardGeneric("hint") })
582+
579583
#' @rdname insertInto
580584
#' @export
581585
setGeneric("insertInto", function(x, tableName, ...) { standardGeneric("insertInto") })

R/pkg/inst/tests/testthat/test_sparkSQL.R

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2182,6 +2182,18 @@ test_that("join(), crossJoin() and merge() on a DataFrame", {
21822182

21832183
unlink(jsonPath2)
21842184
unlink(jsonPath3)
2185+
2186+
# Join with broadcast hint
2187+
df1 <- sql("SELECT * FROM range(10e10)")
2188+
df2 <- sql("SELECT * FROM range(10e10)")
2189+
2190+
execution_plan <- capture.output(explain(join(df1, df2, df1$id == df2$id)))
2191+
expect_false(any(grepl("BroadcastHashJoin", execution_plan)))
2192+
2193+
execution_plan_hint <- capture.output(
2194+
explain(join(df1, hint(df2, "broadcast"), df1$id == df2$id))
2195+
)
2196+
expect_true(any(grepl("BroadcastHashJoin", execution_plan_hint)))
21852197
})
21862198

21872199
test_that("toJSON() on DataFrame", {

0 commit comments

Comments
 (0)