[SPARK-4964] add foreachPartitionWithIndex, to avoid doing equivalent map + empty foreach boilerplate

koeninger · koeninger · commit e09045b6088f · 2014-12-26T16:07:38.000-06:00
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -787,6 +787,20 @@ abstract class RDD[T: ClassTag](
     sc.runJob(this, (iter: Iterator[T]) => cleanF(iter))
   }
 
+  /**
+   * Applies a function to each partition of this RDD, while tracking the index
+   * of the original partition.
+   */
+  def foreachPartitionWithIndex(
+    f: (Int, Iterator[T]) => Unit) {
+    val func = (index: Int, iter: Iterator[T]) => {
+      f(index, iter)
+      Iterator.empty
+    }
+    sc.runJob(
+      mapPartitionsWithIndex(func, true), (iter: Iterator[T]) => ())
+  }
+
   /**
    * Return an array that contains all of the elements in this RDD.
    */
diff --git a/external/kafka/src/main/scala/org/apache/spark/rdd/kafka/KafkaRDD.scala b/external/kafka/src/main/scala/org/apache/spark/rdd/kafka/KafkaRDD.scala
@@ -128,11 +128,19 @@ class KafkaRDD[
               .dropWhile(_.offset < requestOffset)
           }
           if (!iter.hasNext) {
+            assert(requestOffset == part.untilOffset,
+              s"ran out of messages before reaching ending offset ${part.untilOffset} " +
+                s"for topic ${part.topic} partition ${part.partition} start ${part.fromOffset}." +
+                " This should not happen, and indicates that messages may have been lost")
             finished = true
             null.asInstanceOf[R]
           } else {
             val item = iter.next
             if (item.offset >= part.untilOffset) {
+              assert(item.offset == part.untilOffset,
+                s"got ${item.offset} > ending offset ${part.untilOffset} " +
+                  s"for topic ${part.topic} partition ${part.partition} start ${part.fromOffset}." +
+                  " This should not happen, and indicates a message may have been skipped")
               finished = true
               null.asInstanceOf[R]
             } else {