Skip to content

Commit b1e76dd

Browse files
committed
added documentation on repartitions
1 parent 5807e35 commit b1e76dd

File tree

2 files changed

+12
-0
lines changed

2 files changed

+12
-0
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,12 @@ case class Distinct(child: LogicalPlan) extends UnaryNode {
310310
override def output: Seq[Attribute] = child.output
311311
}
312312

313+
/**
314+
* Return a new RDD that has exactly `numPartitions` partitions. Differs from
315+
* [[RepartitionByExpression]] as this method is called directly by DataFrame's, because the user
316+
* asked for `coalesce` or `repartition`. [[RepartitionByExpression]] is used when the consumer
317+
* of the output requires some specific ordering or distribution of the data.
318+
*/
313319
case class Repartition(numPartitions: Int, shuffle: Boolean, child: LogicalPlan)
314320
extends UnaryNode {
315321
override def output: Seq[Attribute] = child.output

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/partitioning.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,5 +32,11 @@ abstract class RedistributeData extends UnaryNode {
3232
case class SortPartitions(sortExpressions: Seq[SortOrder], child: LogicalPlan)
3333
extends RedistributeData
3434

35+
/**
36+
* This method repartitions data using [[Expression]]s, and receives information about the
37+
* number of partitions during execution. Used when a specific ordering or distribution is
38+
* expected by the consumer of the query result. Use [[Repartition]] for RDD-like
39+
* `coalesce` and `repartition`.
40+
*/
3541
case class RepartitionByExpression(partitionExpressions: Seq[Expression], child: LogicalPlan)
3642
extends RedistributeData

0 commit comments

Comments
 (0)