Possibly fixed flakiness

tdas · tdas · commit 53846f56eaa5 · 2015-12-03T01:25:14.000-08:00
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
@@ -220,7 +220,8 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
     logInfo("Batches pending processing (" + pendingTimes.size + " batches): " +
       pendingTimes.mkString(", "))
     // Reschedule jobs for these times
-    val timesToReschedule = (pendingTimes ++ downTimes).distinct.sorted(Time.ordering)
+    val timesToReschedule = (pendingTimes ++ downTimes).filter { _ != restartTime }
+      .distinct.sorted(Time.ordering)
     logInfo("Batches to reschedule (" + timesToReschedule.size + " batches): " +
       timesToReschedule.mkString(", "))
     timesToReschedule.foreach { time =>
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
@@ -68,13 +68,9 @@ trait DStreamCheckpointTester { self: SparkFunSuite =>
     require(StreamingContext.getActive().isEmpty,
       "Cannot run test with already active streaming context")
 
-    // Current code assumes that:
-    // number of inputs = number of outputs = number of batches to be run
+    // Current code assumes that number of batches to be run = number of inputs
     val totalNumBatches = input.size
-    val nextNumBatches = totalNumBatches - numBatchesBeforeRestart
-    val initialNumExpectedOutputs = numBatchesBeforeRestart
-    val nextNumExpectedOutputs = expectedOutput.size - initialNumExpectedOutputs + 1
-    // because the last batch will be processed again
+    val batchDurationMillis = batchDuration.milliseconds
 
     // Setup the stream computation
     val checkpointDir = Utils.createTempDir(this.getClass.getSimpleName()).toString
@@ -92,20 +88,20 @@ trait DStreamCheckpointTester { self: SparkFunSuite =>
     ssc.checkpoint(checkpointDir)
 
     // Do the computation for initial number of batches, create checkpoint file and quit
-    generateAndAssertOutput[V](ssc, batchDuration, checkpointDir, numBatchesBeforeRestart,
-      expectedOutput.take(numBatchesBeforeRestart), stopSparkContextAfterTest)
-
+    val beforeRestartOutput = generateOutput[V](ssc,
+      Time(batchDurationMillis * numBatchesBeforeRestart), checkpointDir, stopSparkContextAfterTest)
+    assertOutput(beforeRestartOutput, expectedOutput, beforeRestart = true)
     // Restart and complete the computation from checkpoint file
-    // scalastyle:off println
-    print(
+    logInfo(
       "\n-------------------------------------------\n" +
         "        Restarting stream computation          " +
         "\n-------------------------------------------\n"
     )
-    // scalastyle:on println
+
     val restartedSsc = new StreamingContext(checkpointDir)
-    generateAndAssertOutput[V](restartedSsc, batchDuration, checkpointDir, nextNumBatches,
-      expectedOutput.takeRight(nextNumExpectedOutputs), stopSparkContextAfterTest)
+    val afterRestartOutput = generateOutput[V](restartedSsc,
+      Time(batchDurationMillis * totalNumBatches), checkpointDir, stopSparkContextAfterTest)
+    assertOutput(afterRestartOutput, expectedOutput, beforeRestart = false)
   }
 
   protected def createContextForCheckpointOperation(batchDuration: Duration): StreamingContext = {
@@ -114,32 +110,30 @@ trait DStreamCheckpointTester { self: SparkFunSuite =>
     new StreamingContext(SparkContext.getOrCreate(conf), batchDuration)
   }
 
-  private def generateAndAssertOutput[V: ClassTag](
+  private def generateOutput[V: ClassTag](
       ssc: StreamingContext,
-      batchDuration: Duration,
+      targetBatchTime: Time,
       checkpointDir: String,
-      numBatchesToRun: Int,
-      expectedOutput: Seq[Seq[V]],
       stopSparkContext: Boolean
-    ) {
+    ): Seq[Seq[V]] = {
     try {
+      val batchDuration = ssc.graph.batchDuration
       val batchCounter = new BatchCounter(ssc)
       ssc.start()
-      val numBatches = expectedOutput.size
       val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-      // scalastyle:off println
+      val currentTime = clock.getTimeMillis()
+
       logInfo("Manual clock before advancing = " + clock.getTimeMillis())
-      clock.advance((batchDuration * numBatches).milliseconds)
+      clock.setTime(targetBatchTime.milliseconds)
       logInfo("Manual clock after advancing = " + clock.getTimeMillis())
-      // scalastyle:on println
 
       val outputStream = ssc.graph.getOutputStreams().filter { dstream =>
         dstream.isInstanceOf[TestOutputStreamWithPartitions[V]]
       }.head.asInstanceOf[TestOutputStreamWithPartitions[V]]
 
       eventually(timeout(10 seconds)) {
         ssc.awaitTerminationOrTimeout(10)
-        assert(batchCounter.getNumCompletedBatches === numBatchesToRun)
+        assert(batchCounter.getLastCompletedBatchTime === targetBatchTime)
       }
 
       eventually(timeout(10 seconds)) {
@@ -150,17 +144,30 @@ trait DStreamCheckpointTester { self: SparkFunSuite =>
         // are written to make sure that both of them have been written.
         assert(checkpointFilesOfLatestTime.size === 2)
       }
+      outputStream.output.map(_.flatten)
 
-      val output = outputStream.output.map(_.flatten)
-      val setComparison = output.zip(expectedOutput).forall { case (o, e) => o.toSet === e.toSet }
-      assert(setComparison, s"set comparison failed\n" +
-        s"Expected output (${expectedOutput.size} items):\n${expectedOutput.mkString("\n")}\n" +
-        s"Generated output (${output.size} items): ${output.mkString("\n")}"
-      )
     } finally {
       ssc.stop(stopSparkContext = stopSparkContext)
     }
   }
+
+  private def assertOutput[V: ClassTag](
+      output: Seq[Seq[V]],
+      expectedOutput: Seq[Seq[V]],
+      beforeRestart: Boolean): Unit = {
+    val expectedPartialOutput = if (beforeRestart) {
+      expectedOutput.take(output.size)
+    } else {
+      expectedOutput.takeRight(output.size)
+    }
+    val setComparison = output.zip(expectedPartialOutput).forall {
+      case (o, e) => o.toSet === e.toSet
+    }
+    assert(setComparison, s"set comparison failed\n" +
+      s"Expected output items:\n${expectedPartialOutput.mkString("\n")}\n" +
+      s"Generated output items: ${output.mkString("\n")}"
+    )
+  }
 }
 
 /**
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -142,6 +142,7 @@ class BatchCounter(ssc: StreamingContext) {
   // All access to this state should be guarded by `BatchCounter.this.synchronized`
   private var numCompletedBatches = 0
   private var numStartedBatches = 0
+  private var lastCompletedBatchTime: Time = null
 
   private val listener = new StreamingListener {
     override def onBatchStarted(batchStarted: StreamingListenerBatchStarted): Unit =
@@ -152,6 +153,7 @@ class BatchCounter(ssc: StreamingContext) {
     override def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted): Unit =
       BatchCounter.this.synchronized {
         numCompletedBatches += 1
+        lastCompletedBatchTime = batchCompleted.batchInfo.batchTime
         BatchCounter.this.notifyAll()
       }
   }
@@ -165,6 +167,10 @@ class BatchCounter(ssc: StreamingContext) {
     numStartedBatches
   }
 
+  def getLastCompletedBatchTime: Time = this.synchronized {
+    lastCompletedBatchTime
+  }
+
   /**
    * Wait until `expectedNumCompletedBatches` batches are completed, or timeout. Return true if
    * `expectedNumCompletedBatches` batches are completed. Otherwise, return false to indicate it's