@@ -570,14 +570,16 @@ setMethod("unpersist",
570570
571571# ' Repartition
572572# '
573- # ' There are two different options for repartition
574- # ' Option 1
575- # ' Return a new SparkDataFrame that has exactly numPartitions partitions.
576- # ' Option 2
577- # ' Return a new SparkDataFrame which has as many partitions as the number of unique
578- # ' groups identified by column(s) values which are being specified by the input.
579- # ' If both numPartitions and columns are specified, Option 1 will be chosen.
580- # '
573+ # ' The following options for repartitioning are possible:
574+ # ' \itemize{
575+ # ' \item{"Option 1"} {Return a new SparkDataFrame partitioned by
576+ # ' the given columns into `numPartitions`.}
577+ # ' \item{"Option 2"} {Return a new SparkDataFrame that has exactly `numPartitions`.}
578+ # ' \item{"Option 3"} {Return a new SparkDataFrame partitioned by the given columns,
579+ # ' preserving the existing number of partitions.}
580+ # ' \item{"Option 4"} {Return a new SparkDataFrame that has exactly the default
581+ # ' number of numPartitions: 200.}
582+ # '}
581583# ' @param x A SparkDataFrame
582584# ' @param numPartitions The number of partitions to use.
583585# ' @param col The column by which the partitioning will be performed.
@@ -595,19 +597,29 @@ setMethod("unpersist",
595597# ' newDF <- repartition(df, 2L)
596598# ' newDF <- repartition(df, numPartitions = 2L)
597599# ' newDF <- repartition(df, col = df$"col1", df$"col2")
600+ # ' newDF <- repartition(df, 3L, col = df$"col1", df$"col2")
598601# '}
599602setMethod ("repartition ",
600603 signature(x = " SparkDataFrame" ),
601604 function (x , numPartitions = NULL , col = NULL , ... ) {
602- if (! is.null(numPartitions ) && (class(numPartitions ) == " numeric"
603- || class(numPartitions ) == " integer" )) {
604- sdf <- callJMethod(x @ sdf , " repartition" , numToInt(numPartitions ))
605+ if (! is.null(numPartitions ) && (class(numPartitions ) == " numeric" ||
606+ class(numPartitions ) == " integer" )) {
607+ # number of partitions and columns both are specified
608+ if (! is.null(col ) && class(col ) == " Column" ) {
609+ cols <- list (col , ... )
610+ jcol <- lapply(cols , function (c ) { c @ jc })
611+ sdf <- callJMethod(x @ sdf , " repartition" , numToInt(numPartitions ), jcol )
612+ } else {
613+ # only number of partitions is specified
614+ sdf <- callJMethod(x @ sdf , " repartition" , numToInt(numPartitions ))
615+ }
605616 } else if (! is.null(col ) && class(col ) == " Column" ) {
617+ # only columns are specified
606618 cols <- list (col , ... )
607619 jcol <- lapply(cols , function (c ) { c @ jc })
608620 sdf <- callJMethod(x @ sdf , " repartition" , jcol )
609621 } else {
610- stop( " Please specify numPartitions or at least one column " )
622+ sdf <- callJMethod( x @ sdf , " repartition " , 200L )
611623 }
612624 dataFrame(sdf )
613625 })
0 commit comments