Preserve attemptId behavior and deprecate it:

JoshRosen · JoshRosen · commit cbe4d76a7968 · 2014-12-30T18:10:01.000-08:00
- Introduce new `attemptNumber` and `taskAttemptId` methods to avoid
  ambuiguity.
- Change `attemptNumber` to return Int instead of Long, since it was
  being treated as an Int elsewhere.
- Add more Javadocs.
- Add Mima excludes for new methods.
diff --git a/core/src/main/java/org/apache/spark/TaskContext.java b/core/src/main/java/org/apache/spark/TaskContext.java
@@ -62,7 +62,7 @@ static void unset() {
    */
   public abstract boolean isInterrupted();
 
-  /** @deprecated: use isRunningLocally() */
+  /** @deprecated use {@link #isRunningLocally()} */
   @Deprecated
   public abstract boolean runningLocally();
 
@@ -87,28 +87,38 @@ static void unset() {
    * is for HadoopRDD to register a callback to close the input stream.
    * Will be called in any situation - success, failure, or cancellation.
    *
-   * @deprecated: use addTaskCompletionListener
+   * @deprecated use {@link #addTaskCompletionListener(scala.Function1)}
    *
    * @param f Callback function.
    */
   @Deprecated
   public abstract void addOnCompleteCallback(final Function0<Unit> f);
 
+  /**
+   * The ID of the stage that this task belong to.
+   */
   public abstract int stageId();
 
+  /**
+   * The ID of the RDD partition that is computed by this task.
+   */
   public abstract int partitionId();
 
   /**
-   * An ID recording how many times this task has been attempted.  The first task attempt will be
-   * assigned attemptId = 0, and subsequent attempts will have increasing IDs.
+   * How many times this task has been attempted.  The first task attempt will be assigned
+   * attemptNumber = 0, and subsequent attempts will have increasing attempt numbers.
    */
+  public abstract int attemptNumber();
+
+  /** @deprecated use {@link #taskAttemptId()}; it was renamed to avoid ambiguity. */
+  @Deprecated
   public abstract long attemptId();
 
   /**
    * An ID that is unique to this task attempt (within the same SparkContext, no two task attempts
-   * will share the same task ID).
+   * will share the same attempt ID).  This is roughly equivalent to Hadoop's TaskAttemptID.
    */
-  public abstract long taskId();
+  public abstract long taskAttemptId();
 
   /** ::DeveloperApi:: */
   @DeveloperApi
diff --git a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
@@ -25,13 +25,16 @@ import scala.collection.mutable.ArrayBuffer
 private[spark] class TaskContextImpl(
     val stageId: Int,
     val partitionId: Int,
-    val taskId: Long,
-    val attemptId: Long,
+    override val taskAttemptId: Long,
+    override val attemptNumber: Int,
     val runningLocally: Boolean = false,
     val taskMetrics: TaskMetrics = TaskMetrics.empty)
   extends TaskContext
   with Logging {
 
+  // For backwards-compatibility; this method is now deprecated as of 1.3.0.
+  override def attemptId: Long = taskAttemptId
+
   // List of callback functions to execute when the task completes.
   @transient private val onCompleteCallbacks = new ArrayBuffer[TaskCompletionListener]
 
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -71,7 +71,7 @@ private[spark] class CoarseGrainedExecutorBackend(
         val ser = env.closureSerializer.newInstance()
         val taskDesc = ser.deserialize[TaskDescription](data.value)
         logInfo("Got assigned task " + taskDesc.taskId)
-        executor.launchTask(this, taskId = taskDesc.taskId, attemptId = taskDesc.attemptId,
+        executor.launchTask(this, taskId = taskDesc.taskId, attemptNumber = taskDesc.attemptNumber,
           taskDesc.name, taskDesc.serializedTask)
       }
 
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -110,11 +110,11 @@ private[spark] class Executor(
   def launchTask(
       context: ExecutorBackend,
       taskId: Long,
-      attemptId: Long,
+      attemptNumber: Int,
       taskName: String,
       serializedTask: ByteBuffer) {
-    val tr =
-      new TaskRunner(context, taskId = taskId, attemptId = attemptId, taskName, serializedTask)
+    val tr = new TaskRunner(context, taskId = taskId, attemptNumber = attemptNumber, taskName,
+      serializedTask)
     runningTasks.put(taskId, tr)
     threadPool.execute(tr)
   }
@@ -141,7 +141,7 @@ private[spark] class Executor(
   class TaskRunner(
       execBackend: ExecutorBackend,
       val taskId: Long,
-      val attemptId: Long,
+      val attemptNumber: Int,
       taskName: String,
       serializedTask: ByteBuffer)
     extends Runnable {
@@ -189,7 +189,7 @@ private[spark] class Executor(
 
         // Run the actual task and measure its runtime.
         taskStart = System.currentTimeMillis()
-        val value = task.run(taskId = taskId, attemptId = attemptId)
+        val value = task.run(taskAttemptId = taskId, attemptNumber = attemptNumber)
         val taskFinish = System.currentTimeMillis()
 
         // If the task has been killed, let's fail it.
diff --git a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
@@ -77,13 +77,14 @@ private[spark] class MesosExecutorBackend
 
   override def launchTask(d: ExecutorDriver, taskInfo: TaskInfo) {
     val taskId = taskInfo.getTaskId.getValue.toLong
-    val attemptIdPlusData = taskInfo.getData.asReadOnlyByteBuffer()
-    val attemptId = attemptIdPlusData.getLong  // Updates the position by 8 bytes
-    val data = attemptIdPlusData.slice()  // Subsequence starting at the current position
+    val attemptNumberPlusData = taskInfo.getData.asReadOnlyByteBuffer()
+    val attemptNumber = attemptNumberPlusData.getInt  // Updates the position by 4 bytes
+    val data = attemptNumberPlusData.slice()  // Subsequence starting at the current position
     if (executor == null) {
       logError("Received launchTask but executor was null")
     } else {
-      executor.launchTask(this, taskId = taskId, attemptId = attemptId, taskInfo.getName, data)
+      executor.launchTask(this, taskId = taskId, attemptNumber = attemptNumber, taskInfo.getName,
+        data)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
@@ -95,7 +95,8 @@ private[spark] object CheckpointRDD extends Logging {
 
     val finalOutputName = splitIdToFile(ctx.partitionId)
     val finalOutputPath = new Path(outputDir, finalOutputName)
-    val tempOutputPath = new Path(outputDir, "." + finalOutputName + "-attempt-" + ctx.attemptId)
+    val tempOutputPath =
+      new Path(outputDir, "." + finalOutputName + "-attempt-" + ctx.attemptNumber)
 
     if (fs.exists(tempOutputPath)) {
       throw new IOException("Checkpoint failed: temporary path " +
@@ -119,7 +120,7 @@ private[spark] object CheckpointRDD extends Logging {
         logInfo("Deleting tempOutputPath " + tempOutputPath)
         fs.delete(tempOutputPath, false)
         throw new IOException("Checkpoint failed: failed to save output of task: "
-          + ctx.attemptId + " and final output path does not exist")
+          + ctx.attemptNumber + " and final output path does not exist")
       } else {
         // Some other copy of this task must've finished before us and renamed it
         logInfo("Final output path " + finalOutputPath + " already exists; not overwriting it")
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -229,7 +229,7 @@ class HadoopRDD[K, V](
       var reader: RecordReader[K, V] = null
       val inputFormat = getInputFormat(jobConf)
       HadoopRDD.addLocalConfiguration(new SimpleDateFormat("yyyyMMddHHmm").format(createTime),
-        context.stageId, theSplit.index, context.attemptId.toInt, jobConf)
+        context.stageId, theSplit.index, context.attemptNumber, jobConf)
       reader = inputFormat.getRecordReader(split.inputSplit.value, jobConf, Reporter.NULL)
 
       // Register an on-task-completion callback to close the input stream.
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -971,12 +971,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
 
     val writeShard = (context: TaskContext, iter: Iterator[(K,V)]) => {
       val config = wrappedConf.value
-      // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it
-      // around by taking a mod. We expect that no task will be attempted 2 billion times.
-      val attemptNumber = (context.attemptId % Int.MaxValue).toInt
       /* "reduce task" <split #> <attempt # = spark task #> */
       val attemptId = newTaskAttemptID(jobtrackerID, stageId, isMap = false, context.partitionId,
-        attemptNumber)
+        context.attemptNumber)
       val hadoopContext = newTaskAttemptContext(config, attemptId)
       val format = outfmt.newInstance
       format match {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -634,8 +634,8 @@ class DAGScheduler(
     try {
       val rdd = job.finalStage.rdd
       val split = rdd.partitions(job.partitions(0))
-      val taskContext = new TaskContextImpl(job.finalStage.id, job.partitions(0), taskId = 0,
-        attemptId = 0, runningLocally = true)
+      val taskContext = new TaskContextImpl(job.finalStage.id, job.partitions(0), taskAttemptId = 0,
+        attemptNumber = 0, runningLocally = true)
       TaskContextHelper.setTaskContext(taskContext)
       try {
         val result = job.func(taskContext, rdd.iterator(split, taskContext))
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -44,9 +44,16 @@ import org.apache.spark.util.Utils
  */
 private[spark] abstract class Task[T](val stageId: Int, var partitionId: Int) extends Serializable {
 
-  final def run(taskId: Long, attemptId: Long): T = {
-    context = new TaskContextImpl(stageId = stageId, partitionId = partitionId, taskId = taskId,
-      attemptId = attemptId, runningLocally = false)
+  /**
+   * Called by Executor to run this task.
+   *
+   * @param taskAttemptId an identifier for this task attempt that is unique within a SparkContext.
+   * @param attemptNumber how many times this task has been attempted (0 for the first attempt)
+   * @return the result of the task
+   */
+  final def run(taskAttemptId: Long, attemptNumber: Int): T = {
+    context = new TaskContextImpl(stageId = stageId, partitionId = partitionId,
+      taskAttemptId = taskAttemptId, attemptNumber = attemptNumber, runningLocally = false)
     TaskContextHelper.setTaskContext(context)
     context.taskMetrics.hostname = Utils.localHostName()
     taskThread = Thread.currentThread()
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala
@@ -27,7 +27,7 @@ import org.apache.spark.util.SerializableBuffer
  */
 private[spark] class TaskDescription(
     val taskId: Long,
-    val attemptId: Long,
+    val attemptNumber: Int,
     val executorId: String,
     val name: String,
     val index: Int,    // Index within this task's TaskSet
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -477,8 +477,8 @@ private[spark] class TaskSetManager(
               taskName, taskId, host, taskLocality, serializedTask.limit))
 
           sched.dagScheduler.taskStarted(task, info)
-          return Some(new TaskDescription(taskId = taskId, attemptId = attemptNum, execId, taskName,
-            index, serializedTask))
+          return Some(new TaskDescription(taskId = taskId, attemptNumber = attemptNum, execId,
+            taskName, index, serializedTask))
         }
         case _ =>
       }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala
@@ -295,8 +295,8 @@ private[spark] class MesosSchedulerBackend(
     // to hold it:
     val data: ByteString = {
       val serializedTask = task.serializedTask
-      val dataBuffer = ByteBuffer.allocate(8 + serializedTask.limit())
-      dataBuffer.putLong(task.attemptId)
+      val dataBuffer = ByteBuffer.allocate(4 + serializedTask.limit())
+      dataBuffer.putLong(task.attemptNumber)
       dataBuffer.put(serializedTask)
       ByteString.copyFrom(dataBuffer)
     }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala
@@ -76,7 +76,7 @@ private[spark] class LocalActor(
     val offers = Seq(new WorkerOffer(localExecutorId, localExecutorHostname, freeCores))
     for (task <- scheduler.resourceOffers(offers).flatten) {
       freeCores -= scheduler.CPUS_PER_TASK
-      executor.launchTask(executorBackend, taskId = task.taskId, attemptId = task.attemptId,
+      executor.launchTask(executorBackend, taskId = task.taskId, attemptNumber = task.attemptNumber,
         task.name, task.serializedTask)
     }
   }
diff --git a/core/src/test/java/org/apache/spark/JavaAPISuite.java b/core/src/test/java/org/apache/spark/JavaAPISuite.java
@@ -820,7 +820,7 @@ public void persist() {
   @Test
   public void iterator() {
     JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 2);
-    TaskContext context = new TaskContextImpl(0, 0, 0L, 0L, false, new TaskMetrics());
+    TaskContext context = new TaskContextImpl(0, 0, 0L, 0, false, new TaskMetrics());
     Assert.assertEquals(1, rdd.iterator(rdd.partitions().get(0), context).next().intValue());
   }
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
@@ -64,24 +64,32 @@ class TaskContextSuite extends FunSuite with BeforeAndAfter with LocalSparkConte
     verify(listener, times(1)).onTaskCompletion(any())
   }
 
-  test("TaskContext.attemptId should return attempt number, not task id (SPARK-4014)") {
+  test("TaskContext.attemptNumber should return attempt number, not task id (SPARK-4014)") {
     sc = new SparkContext("local-cluster[2,1,512]", "test")
     // Check that attemptIds are 0 for all tasks' initial attempts
     val attemptIds = sc.parallelize(Seq(1, 2), 2).mapPartitions { iter =>
-      Seq(TaskContext.get().attemptId()).iterator
+      Seq(TaskContext.get().attemptNumber).iterator
     }.collect()
     assert(attemptIds.toSet === Set(0))
 
     // Test a job with failed tasks
     val attemptIdsWithFailedTask = sc.parallelize(Seq(1, 2), 2).mapPartitions { iter =>
-      val attemptId = TaskContext.get().attemptId()
+      val attemptId = TaskContext.get().attemptNumber
       if (iter.next() == 1 && attemptId == 0) {
         throw new Exception("First execution of task failed")
       }
       Seq(attemptId).iterator
     }.collect()
     assert(attemptIdsWithFailedTask.toSet === Set(0, 1))
   }
+
+  test("TaskContext.attemptId returns taskAttemptId for backwards-compatibility (SPARK-4014)") {
+    sc = new SparkContext("local-cluster[2,1,512]", "test")
+    val attemptIds = sc.parallelize(Seq(1, 2, 3, 4), 4).mapPartitions { iter =>
+      Seq(TaskContext.get().attemptId).iterator
+    }.collect()
+    assert(attemptIds.toSet === Set(0, 1, 2, 3))
+  }
 }
 
 private object TaskContextSuite {
diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
@@ -173,7 +173,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers {
       // Simulate fetch failures:
       val mappedData = data.map { x =>
         val taskContext = TaskContext.get
-        if (taskContext.attemptId() == 1) {  // Cause this stage to fail on its first attempt.
+        if (taskContext.attemptNumber == 0) {  // Cause this stage to fail on its first attempt.
           val env = SparkEnv.get
           val bmAddress = env.blockManager.blockManagerId
           val shuffleId = shuffleHandle.shuffleId
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
@@ -53,6 +53,11 @@ object MimaExcludes {
               "org.apache.spark.mllib.linalg.Matrices.randn"),
             ProblemFilters.exclude[MissingMethodProblem](
               "org.apache.spark.mllib.linalg.Matrices.rand")
+          ) ++ Seq(
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.TaskContext.taskAttemptId"),
+            ProblemFilters.exclude[MissingMethodProblem](
+              "org.apache.spark.TaskContext.attemptNumber")
           )
 
         case v if v.startsWith("1.2") =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
@@ -292,12 +292,9 @@ case class InsertIntoParquetTable(
       }
 
     def writeShard(context: TaskContext, iter: Iterator[Row]): Int = {
-      // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it
-      // around by taking a mod. We expect that no task will be attempted 2 billion times.
-      val attemptNumber = (context.attemptId % Int.MaxValue).toInt
       /* "reduce task" <split #> <attempt # = spark task #> */
       val attemptId = newTaskAttemptID(jobtrackerID, stageId, isMap = false, context.partitionId,
-        attemptNumber)
+        context.attemptNumber)
       val hadoopContext = newTaskAttemptContext(wrappedConf.value, attemptId)
       val format = new AppendingParquetOutputFormat(taskIdOffset)
       val committer = format.getOutputCommitter(hadoopContext)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -100,10 +100,7 @@ case class InsertIntoHiveTable(
       val wrappers = fieldOIs.map(wrapperFor)
       val outputData = new Array[Any](fieldOIs.length)
 
-      // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it
-      // around by taking a mod. We expect that no task will be attempted 2 billion times.
-      val attemptNumber = (context.attemptId % Int.MaxValue).toInt
-      writerContainer.executorSideSetup(context.stageId, context.partitionId, attemptNumber)
+      writerContainer.executorSideSetup(context.stageId, context.partitionId, context.attemptNumber)
 
       iterator.foreach { row =>
         var i = 0

Original file line number	Diff line number	Diff line change
`@@ -71,7 +71,7 @@ private[spark] class CoarseGrainedExecutorBackend(`
`71`	`71`	`val ser = env.closureSerializer.newInstance()`
`72`	`72`	`val taskDesc = ser.deserialize[TaskDescription](data.value)`
`73`	`73`	`logInfo("Got assigned task " + taskDesc.taskId)`
`74`		`- executor.launchTask(this, taskId = taskDesc.taskId, attemptId = taskDesc.attemptId,`
	`74`	`+ executor.launchTask(this, taskId = taskDesc.taskId, attemptNumber = taskDesc.attemptNumber,`
`75`	`75`	`taskDesc.name, taskDesc.serializedTask)`
`76`	`76`	`}`
`77`	`77`