@@ -678,14 +678,53 @@ setMethod("storageLevel",
678678 storageLevelToString(callJMethod(x @ sdf , " storageLevel" ))
679679 })
680680
681+ # ' Coalesce
682+ # '
683+ # ' Returns a new SparkDataFrame that has exactly \code{numPartitions} partitions.
684+ # ' This operation results in a narrow dependency, e.g. if you go from 1000 partitions to 100
685+ # ' partitions, there will not be a shuffle, instead each of the 100 new partitions will claim 10 of
686+ # ' the current partitions. If a larger number of partitions is requested, it will stay at the
687+ # ' current number of partitions.
688+ # '
689+ # ' However, if you're doing a drastic coalesce on a SparkDataFrame, e.g. to numPartitions = 1,
690+ # ' this may result in your computation taking place on fewer nodes than
691+ # ' you like (e.g. one node in the case of numPartitions = 1). To avoid this,
692+ # ' call \code{repartition}. This will add a shuffle step, but means the
693+ # ' current upstream partitions will be executed in parallel (per whatever
694+ # ' the current partitioning is).
695+ # '
696+ # ' @param numPartitions the number of partitions to use.
697+ # '
698+ # ' @family SparkDataFrame functions
699+ # ' @rdname coalesce
700+ # ' @name coalesce
701+ # ' @aliases coalesce,SparkDataFrame-method
702+ # ' @seealso \link{repartition}
703+ # ' @export
704+ # ' @examples
705+ # '\dontrun{
706+ # ' sparkR.session()
707+ # ' path <- "path/to/file.json"
708+ # ' df <- read.json(path)
709+ # ' newDF <- coalesce(df, 1L)
710+ # '}
711+ # ' @note coalesce(SparkDataFrame) since 2.1.1
712+ setMethod ("coalesce ",
713+ signature(x = " SparkDataFrame" ),
714+ function (x , numPartitions ) {
715+ stopifnot(is.numeric(numPartitions ))
716+ sdf <- callJMethod(x @ sdf , " coalesce" , numToInt(numPartitions ))
717+ dataFrame(sdf )
718+ })
719+
681720# ' Repartition
682721# '
683722# ' The following options for repartition are possible:
684723# ' \itemize{
685- # ' \item{1.} {Return a new SparkDataFrame partitioned by
724+ # ' \item{1.} {Return a new SparkDataFrame that has exactly \code{numPartitions}.}
725+ # ' \item{2.} {Return a new SparkDataFrame hash partitioned by
686726# ' the given columns into \code{numPartitions}.}
687- # ' \item{2.} {Return a new SparkDataFrame that has exactly \code{numPartitions}.}
688- # ' \item{3.} {Return a new SparkDataFrame partitioned by the given column(s),
727+ # ' \item{3.} {Return a new SparkDataFrame hash partitioned by the given column(s),
689728# ' using \code{spark.sql.shuffle.partitions} as number of partitions.}
690729# '}
691730# ' @param x a SparkDataFrame.
@@ -697,6 +736,7 @@ setMethod("storageLevel",
697736# ' @rdname repartition
698737# ' @name repartition
699738# ' @aliases repartition,SparkDataFrame-method
739+ # ' @seealso \link{coalesce}
700740# ' @export
701741# ' @examples
702742# '\dontrun{
@@ -1764,6 +1804,10 @@ setClassUnion("numericOrcharacter", c("numeric", "character"))
17641804# ' @note [[ since 1.4.0
17651805setMethod ("[[ ", signature(x = "SparkDataFrame", i = "numericOrcharacter"),
17661806 function (x , i ) {
1807+ if (length(i ) > 1 ) {
1808+ warning(" Subset index has length > 1. Only the first index is used." )
1809+ i <- i [1 ]
1810+ }
17671811 if (is.numeric(i )) {
17681812 cols <- columns(x )
17691813 i <- cols [[i ]]
@@ -1777,6 +1821,10 @@ setMethod("[[", signature(x = "SparkDataFrame", i = "numericOrcharacter"),
17771821# ' @note [[<- since 2.1.1
17781822setMethod(" [[<-" , signature(x = " SparkDataFrame" , i = " numericOrcharacter" ),
17791823 function (x , i , value ) {
1824+ if (length(i ) > 1 ) {
1825+ warning(" Subset index has length > 1. Only the first index is used." )
1826+ i <- i [1 ]
1827+ }
17801828 if (is.numeric(i )) {
17811829 cols <- columns(x )
17821830 i <- cols [[i ]]
0 commit comments