apache
diff --git a/‎core/src/main/resources/org/apache/spark/ui/static/historypage-template.html‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/resources/org/apache/spark/ui/static/historypage-template.html‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/resources/org/apache/spark/ui/static/historypage.js‎
Lines changed: 32 additions & 1 deletion b/‎core/src/main/resources/org/apache/spark/ui/static/historypage.js‎
Lines changed: 32 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/SparkConf.scala‎
Lines changed: 25 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/SparkConf.scala‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 14 additions & 25 deletions b/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 14 additions & 25 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala‎
Lines changed: 12 additions & 9 deletions b/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala‎
Lines changed: 12 additions & 9 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/status/api/v1/AllRDDResource.scala‎
Lines changed: 2 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/status/api/v1/AllRDDResource.scala‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/status/api/v1/ExecutorListResource.scala‎
Lines changed: 2 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/status/api/v1/ExecutorListResource.scala‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/status/api/v1/api.scala‎
Lines changed: 1 addition & 0 deletions b/‎core/src/main/scala/org/apache/spark/status/api/v1/api.scala‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala‎
Lines changed: 1 addition & 0 deletions b/‎core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala‎
Lines changed: 14 additions & 3 deletions b/‎core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala‎
Lines changed: 14 additions & 3 deletions
@@ -64,7 +64,7 @@
   <tbody>
   {{#applications}}
     <tr>
-      <td class="rowGroupColumn"><a href="/history/{{id}}/{{num}}/jobs/">{{id}}</a></td>
+      <td class="rowGroupColumn"><span title="{{id}}"><a href="/history/{{id}}/{{num}}/jobs/">{{id}}</a></span></td>
       <td class="rowGroupColumn">{{name}}</td>
       {{#attempts}}
       <td class="attemptIDSpan"><a href="/history/{{id}}/{{attemptId}}/jobs/">{{attemptId}}</a></td>
 
@@ -37,6 +37,22 @@ function formatDuration(milliseconds) {
   return hours.toFixed(1) + " h";
 }
 
+function makeIdNumeric(id) {
+  var strs = id.split("_");
+  if (strs.length < 3) {
+    return id;
+  }
+  var appSeqNum = strs[2];
+  var resl = strs[0] + "_" + strs[1] + "_";
+  var diff = 10 - appSeqNum.length;
+  while (diff > 0) {
+      resl += "0"; // padding 0 before the app sequence number to make sure it has 10 characters
+      diff--;
+  }
+  resl += appSeqNum;
+  return resl;
+}
+
 function formatDate(date) {
   return date.split(".")[0].replace("T", " ");
 }
@@ -62,6 +78,21 @@ jQuery.extend( jQuery.fn.dataTableExt.oSort, {
     }
 } );
 
+jQuery.extend( jQuery.fn.dataTableExt.oSort, {
+    "appid-numeric-pre": function ( a ) {
+        var x = a.match(/title="*(-?[0-9a-zA-Z\-\_]+)/)[1];
+        return makeIdNumeric(x);
+    },
+
+    "appid-numeric-asc": function ( a, b ) {
+        return ((a < b) ? -1 : ((a > b) ? 1 : 0));
+    },
+
+    "appid-numeric-desc": function ( a, b ) {
+        return ((a < b) ? 1 : ((a > b) ? -1 : 0));
+    }
+} );
+
 $(document).ajaxStop($.unblockUI);
 $(document).ajaxStart(function(){
     $.blockUI({ message: '<h3>Loading history summary...</h3>'});
@@ -109,7 +140,7 @@ $(document).ready(function() {
         var selector = "#history-summary-table";
         var conf = {
                     "columns": [
-                        {name: 'first'},
+                        {name: 'first', type: "appid-numeric"},
                         {name: 'second'},
                         {name: 'third'},
                         {name: 'fourth'},
 
@@ -503,6 +503,31 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
         set("spark.executor.instances", value)
       }
     }
+
+    if (contains("spark.master") && get("spark.master").startsWith("yarn-")) {
+      val warning = s"spark.master ${get("spark.master")} is deprecated in Spark 2.0+, please " +
+        "instead use \"yarn\" with specified deploy mode."
+
+      get("spark.master") match {
+        case "yarn-cluster" =>
+          logWarning(warning)
+          set("spark.master", "yarn")
+          set("spark.submit.deployMode", "cluster")
+        case "yarn-client" =>
+          logWarning(warning)
+          set("spark.master", "yarn")
+          set("spark.submit.deployMode", "client")
+        case _ => // Any other unexpected master will be checked when creating scheduler backend.
+      }
+    }
+
+    if (contains("spark.submit.deployMode")) {
+      get("spark.submit.deployMode") match {
+        case "cluster" | "client" =>
+        case e => throw new SparkException("spark.submit.deployMode can only be \"cluster\" or " +
+          "\"client\".")
+      }
+    }
   }
 
   /**
 
@@ -237,6 +237,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
   def jars: Seq[String] = _jars
   def files: Seq[String] = _files
   def master: String = _conf.get("spark.master")
+  def deployMode: String = _conf.getOption("spark.submit.deployMode").getOrElse("client")
   def appName: String = _conf.get("spark.app.name")
 
   private[spark] def isEventLogEnabled: Boolean = _conf.getBoolean("spark.eventLog.enabled", false)
@@ -297,9 +298,6 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
   val sparkUser = Utils.getCurrentUserName()
 
   private[spark] def schedulerBackend: SchedulerBackend = _schedulerBackend
-  private[spark] def schedulerBackend_=(sb: SchedulerBackend): Unit = {
-    _schedulerBackend = sb
-  }
 
   private[spark] def taskScheduler: TaskScheduler = _taskScheduler
   private[spark] def taskScheduler_=(ts: TaskScheduler): Unit = {
@@ -322,8 +320,6 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
   def applicationId: String = _applicationId
   def applicationAttemptId: Option[String] = _applicationAttemptId
 
-  def metricsSystem: MetricsSystem = if (_env != null) _env.metricsSystem else null
-
   private[spark] def eventLogger: Option[EventLoggingListener] = _eventLogger
 
   private[spark] def executorAllocationManager: Option[ExecutorAllocationManager] =
@@ -380,10 +376,8 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
     }
 
     // System property spark.yarn.app.id must be set if user code ran by AM on a YARN cluster
-    // yarn-standalone is deprecated, but still supported
-    if ((master == "yarn-cluster" || master == "yarn-standalone") &&
-        !_conf.contains("spark.yarn.app.id")) {
-      throw new SparkException("Detected yarn-cluster mode, but isn't running on a cluster. " +
+    if (master == "yarn" && deployMode == "cluster" && !_conf.contains("spark.yarn.app.id")) {
+      throw new SparkException("Detected yarn cluster mode, but isn't running on a cluster. " +
         "Deployment to YARN is not supported directly by SparkContext. Please use spark-submit.")
     }
 
@@ -419,7 +413,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
       }
     }
 
-    if (master == "yarn-client") System.setProperty("SPARK_YARN_MODE", "true")
+    if (master == "yarn" && deployMode == "client") System.setProperty("SPARK_YARN_MODE", "true")
 
     // "_jobProgressListener" should be set up before creating SparkEnv because when creating
     // "SparkEnv", some messages will be posted to "listenerBus" and we should not miss them.
@@ -496,7 +490,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
       HeartbeatReceiver.ENDPOINT_NAME, new HeartbeatReceiver(this))
 
     // Create and start the scheduler
-    val (sched, ts) = SparkContext.createTaskScheduler(this, master)
+    val (sched, ts) = SparkContext.createTaskScheduler(this, master, deployMode)
     _schedulerBackend = sched
     _taskScheduler = ts
     _dagScheduler = new DAGScheduler(this)
@@ -514,9 +508,9 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
 
     // The metrics system for Driver need to be set spark.app.id to app ID.
     // So it should start after we get app ID from the task scheduler and set spark.app.id.
-    metricsSystem.start()
+    _env.metricsSystem.start()
     // Attach the driver metrics servlet handler to the web ui after the metrics system is started.
-    metricsSystem.getServletHandlers.foreach(handler => ui.foreach(_.attachHandler(handler)))
+    _env.metricsSystem.getServletHandlers.foreach(handler => ui.foreach(_.attachHandler(handler)))
 
     _eventLogger =
       if (isEventLogEnabled) {
@@ -1595,10 +1589,8 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
         key = uri.getScheme match {
           // A JAR file which exists only on the driver node
           case null | "file" =>
-            // yarn-standalone is deprecated, but still supported
-            if (SparkHadoopUtil.get.isYarnMode() &&
-                (master == "yarn-standalone" || master == "yarn-cluster")) {
-              // In order for this to work in yarn-cluster mode the user must specify the
+            if (master == "yarn" && deployMode == "cluster") {
+              // In order for this to work in yarn cluster mode the user must specify the
               // --addJars option to the client to upload the file into the distributed cache
               // of the AM to make it show up in the current working directory.
               val fileName = new Path(uri.getPath).getName()
@@ -2324,7 +2316,8 @@ object SparkContext extends Logging {
    */
   private def createTaskScheduler(
       sc: SparkContext,
-      master: String): (SchedulerBackend, TaskScheduler) = {
+      master: String,
+      deployMode: String): (SchedulerBackend, TaskScheduler) = {
     import SparkMasterRegex._
 
     // When running locally, don't try to re-execute tasks on failure.
@@ -2386,11 +2379,7 @@ object SparkContext extends Logging {
         }
         (backend, scheduler)
 
-      case "yarn-standalone" | "yarn-cluster" =>
-        if (master == "yarn-standalone") {
-          logWarning(
-            "\"yarn-standalone\" is deprecated as of Spark 1.0. Use \"yarn-cluster\" instead.")
-        }
+      case "yarn" if deployMode == "cluster" =>
         val scheduler = try {
           val clazz = Utils.classForName("org.apache.spark.scheduler.cluster.YarnClusterScheduler")
           val cons = clazz.getConstructor(classOf[SparkContext])
@@ -2415,7 +2404,7 @@ object SparkContext extends Logging {
         scheduler.initialize(backend)
         (backend, scheduler)
 
-      case "yarn-client" =>
+      case "yarn" if deployMode == "client" =>
         val scheduler = try {
           val clazz = Utils.classForName("org.apache.spark.scheduler.cluster.YarnScheduler")
           val cons = clazz.getConstructor(classOf[SparkContext])
@@ -2456,7 +2445,7 @@ object SparkContext extends Logging {
       case zkUrl if zkUrl.startsWith("zk://") =>
         logWarning("Master URL for a multi-master Mesos cluster managed by ZooKeeper should be " +
           "in the form mesos://zk://host:port. Current Master URL will stop working in Spark 2.0.")
-        createTaskScheduler(sc, "mesos://" + zkUrl)
+        createTaskScheduler(sc, "mesos://" + zkUrl, deployMode)
 
       case _ =>
         throw new SparkException("Could not parse Master URL: '" + master + "'")
 
@@ -226,11 +226,17 @@ object SparkSubmit {
 
     // Set the cluster manager
     val clusterManager: Int = args.master match {
-      case m if m.startsWith("yarn") => YARN
+      case "yarn" => YARN
+      case "yarn-client" | "yarn-cluster" =>
+        printWarning(s"Master ${args.master} is deprecated since 2.0." +
+          " Please use master \"yarn\" with specified deploy mode instead.")
+        YARN
       case m if m.startsWith("spark") => STANDALONE
       case m if m.startsWith("mesos") => MESOS
       case m if m.startsWith("local") => LOCAL
-      case _ => printErrorAndExit("Master must start with yarn, spark, mesos, or local"); -1
+      case _ =>
+        printErrorAndExit("Master must either be yarn or start with spark, mesos, local")
+        -1
     }
 
     // Set the deploy mode; default is client mode
@@ -240,23 +246,20 @@ object SparkSubmit {
       case _ => printErrorAndExit("Deploy mode must be either client or cluster"); -1
     }
 
-    // Because "yarn-cluster" and "yarn-client" encapsulate both the master
-    // and deploy mode, we have some logic to infer the master and deploy mode
+    // Because the deprecated way of specifying "yarn-cluster" and "yarn-client" encapsulate both
+    // the master and deploy mode, we have some logic to infer the master and deploy mode
     // from each other if only one is specified, or exit early if they are at odds.
     if (clusterManager == YARN) {
-      if (args.master == "yarn-standalone") {
-        printWarning("\"yarn-standalone\" is deprecated. Use \"yarn-cluster\" instead.")
-        args.master = "yarn-cluster"
-      }
       (args.master, args.deployMode) match {
         case ("yarn-cluster", null) =>
           deployMode = CLUSTER
+          args.master = "yarn"
         case ("yarn-cluster", "client") =>
           printErrorAndExit("Client deploy mode is not compatible with master \"yarn-cluster\"")
         case ("yarn-client", "cluster") =>
           printErrorAndExit("Cluster deploy mode is not compatible with master \"yarn-client\"")
         case (_, mode) =>
-          args.master = "yarn-" + Option(mode).getOrElse("client")
+          args.master = "yarn"
       }
 
       // Make sure YARN is included in our build if we're trying to use it
 
@@ -28,7 +28,7 @@ private[v1] class AllRDDResource(ui: SparkUI) {
 
   @GET
   def rddList(): Seq[RDDStorageInfo] = {
-    val storageStatusList = ui.storageListener.storageStatusList
+    val storageStatusList = ui.storageListener.activeStorageStatusList
     val rddInfos = ui.storageListener.rddInfoList
     rddInfos.map{rddInfo =>
       AllRDDResource.getRDDStorageInfo(rddInfo.id, rddInfo, storageStatusList,
@@ -44,7 +44,7 @@ private[spark] object AllRDDResource {
       rddId: Int,
       listener: StorageListener,
       includeDetails: Boolean): Option[RDDStorageInfo] = {
-    val storageStatusList = listener.storageStatusList
+    val storageStatusList = listener.activeStorageStatusList
     listener.rddInfoList.find { _.id == rddId }.map { rddInfo =>
       getRDDStorageInfo(rddId, rddInfo, storageStatusList, includeDetails)
     }
 
@@ -31,9 +31,9 @@ private[v1] class ExecutorListResource(ui: SparkUI) {
     listener.synchronized {
       // The follow codes should be protected by `listener` to make sure no executors will be
       // removed before we query their status. See SPARK-12784.
-      val storageStatusList = listener.storageStatusList
+      val storageStatusList = listener.activeStorageStatusList
       (0 until storageStatusList.size).map { statusId =>
-        ExecutorsPage.getExecInfo(listener, statusId)
+        ExecutorsPage.getExecInfo(listener, statusId, isActive = true)
       }
     }
   }
 
@@ -54,6 +54,7 @@ class ExecutorStageSummary private[spark](
 class ExecutorSummary private[spark](
     val id: String,
     val hostPort: String,
+    val isActive: Boolean,
     val rddBlocks: Int,
     val memoryUsed: Long,
     val diskUsed: Long,
 
@@ -203,6 +203,7 @@ private[spark] class DiskBlockObjectWriter(
     numRecordsWritten += 1
     writeMetrics.incRecordsWritten(1)
 
+    // TODO: call updateBytesWritten() less frequently.
     if (numRecordsWritten % 32 == 0) {
       updateBytesWritten()
     }
 
@@ -19,6 +19,7 @@ package org.apache.spark.storage
 
 import scala.collection.mutable
 
+import org.apache.spark.SparkConf
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.scheduler._
 
@@ -29,14 +30,20 @@ import org.apache.spark.scheduler._
  * This class is thread-safe (unlike JobProgressListener)
  */
 @DeveloperApi
-class StorageStatusListener extends SparkListener {
+class StorageStatusListener(conf: SparkConf) extends SparkListener {
   // This maintains only blocks that are cached (i.e. storage level is not StorageLevel.NONE)
   private[storage] val executorIdToStorageStatus = mutable.Map[String, StorageStatus]()
+  private[storage] val deadExecutorStorageStatus = new mutable.ListBuffer[StorageStatus]()
+  private[this] val retainedDeadExecutors = conf.getInt("spark.ui.retainedDeadExecutors", 100)
 
   def storageStatusList: Seq[StorageStatus] = synchronized {
     executorIdToStorageStatus.values.toSeq
   }
 
+  def deadStorageStatusList: Seq[StorageStatus] = synchronized {
+    deadExecutorStorageStatus.toSeq
+  }
+
   /** Update storage status list to reflect updated block statuses */
   private def updateStorageStatus(execId: String, updatedBlocks: Seq[(BlockId, BlockStatus)]) {
     executorIdToStorageStatus.get(execId).foreach { storageStatus =>
@@ -87,8 +94,12 @@ class StorageStatusListener extends SparkListener {
   override def onBlockManagerRemoved(blockManagerRemoved: SparkListenerBlockManagerRemoved) {
     synchronized {
       val executorId = blockManagerRemoved.blockManagerId.executorId
-      executorIdToStorageStatus.remove(executorId)
+      executorIdToStorageStatus.remove(executorId).foreach { status =>
+        deadExecutorStorageStatus += status
+      }
+      if (deadExecutorStorageStatus.size > retainedDeadExecutors) {
+        deadExecutorStorageStatus.trimStart(1)
+      }
     }
   }
-
 }
Original file line number	Diff line number	Diff line change
`@@ -31,9 +31,9 @@ private[v1] class ExecutorListResource(ui: SparkUI) {`
`31`	`31`	`listener.synchronized {`
`32`	`32`	// The follow codes should be protected by `listener` to make sure no executors will be
`33`	`33`	`// removed before we query their status. See SPARK-12784.`
`34`		`- val storageStatusList = listener.storageStatusList`
	`34`	`+ val storageStatusList = listener.activeStorageStatusList`
`35`	`35`	`(0 until storageStatusList.size).map { statusId =>`
`36`		`- ExecutorsPage.getExecInfo(listener, statusId)`
	`36`	`+ ExecutorsPage.getExecInfo(listener, statusId, isActive = true)`
`37`	`37`	`}`
`38`	`38`	`}`
`39`	`39`	`}`
Original file line number	Diff line number	Diff line change
`@@ -203,6 +203,7 @@ private[spark] class DiskBlockObjectWriter(`
`203`	`203`	`numRecordsWritten += 1`
`204`	`204`	`writeMetrics.incRecordsWritten(1)`
`205`	`205`
	`206`	`+ // TODO: call updateBytesWritten() less frequently.`
`206`	`207`	`if (numRecordsWritten % 32 == 0) {`
`207`	`208`	`updateBytesWritten()`
`208`	`209`	`}`