Response to Matei's feedback

pwendell · pwendell · commit f9da752e034d · 2014-05-11T17:09:57.000-07:00
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -331,8 +331,10 @@ abstract class RDD[T: ClassTag](
       /** Distributes elements evenly across output partitions, starting from a random partition. */
       def distributePartition(index: Int, items: Iterator[T]): Iterator[(Int, T)] = {
         var position = (new Random(index)).nextInt(numPartitions)
-        items.map{ t =>
-          position = position + 1 % numPartitions
+        items.map { t =>
+          // Note that the hash code of the key will just be the key itself. The HashPartitioner 
+          // will mod it with the number of total partitions.
+          position = position + 1
           (position, t)
         }
       }
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -219,7 +219,7 @@ class RDDSuite extends FunSuite with SharedSparkContext {
     def testSplitPartitions(input: Seq[Int], initialPartitions: Int, finalPartitions: Int) {
       val data = sc.parallelize(input, initialPartitions)
       val repartitioned = data.repartition(finalPartitions)
-      assert(repartitioned.partitions.size == finalPartitions)
+      assert(repartitioned.partitions.size === finalPartitions)
       val partitions = repartitioned.glom().collect()
       // assert all elements are present
       assert(repartitioned.collect().sortWith(_ > _).toSeq === input.toSeq.sortWith(_ > _).toSeq)

Original file line number	Diff line number	Diff line change
`@@ -331,8 +331,10 @@ abstract class RDD[T: ClassTag](`
`331`	`331`	`/** Distributes elements evenly across output partitions, starting from a random partition. */`
`332`	`332`	`def distributePartition(index: Int, items: Iterator[T]): Iterator[(Int, T)] = {`
`333`	`333`	`var position = (new Random(index)).nextInt(numPartitions)`
`334`		`- items.map{ t =>`
`335`		`- position = position + 1 % numPartitions`
	`334`	`+ items.map { t =>`
	`335`	`+ // Note that the hash code of the key will just be the key itself. The HashPartitioner`
	`336`	`+ // will mod it with the number of total partitions.`
	`337`	`+ position = position + 1`
`336`	`338`	`(position, t)`
`337`	`339`	`}`
`338`	`340`	`}`