apache
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala‎
Lines changed: 6 additions & 26 deletions b/‎core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala‎
Lines changed: 6 additions & 26 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala‎
Lines changed: 5 additions & 3 deletions b/‎core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala‎
Lines changed: 4 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala‎
Lines changed: 3 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala‎
Lines changed: 18 additions & 7 deletions b/‎core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala‎
Lines changed: 18 additions & 7 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala‎
Lines changed: 5 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎docs/running-on-yarn.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/running-on-yarn.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎repl/pom.xml‎
Lines changed: 5 additions & 0 deletions b/‎repl/pom.xml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala‎
Lines changed: 67 additions & 18 deletions b/‎repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala‎
Lines changed: 67 additions & 18 deletions
@@ -91,7 +91,7 @@ private[deploy] class ApplicationInfo(
     }
   }
 
-  private[master] val requestedCores = desc.maxCores.getOrElse(defaultCores)
+  private val requestedCores = desc.maxCores.getOrElse(defaultCores)
 
   private[master] def coresLeft: Int = requestedCores - coresGranted
 
 
@@ -75,16 +75,12 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
     val workers = state.workers.sortBy(_.id)
     val workerTable = UIUtils.listingTable(workerHeaders, workerRow, workers)
 
-    val activeAppHeaders = Seq("Application ID", "Name", "Cores in Use",
-      "Cores Requested", "Memory per Node", "Submitted Time", "User", "State", "Duration")
+    val appHeaders = Seq("Application ID", "Name", "Cores", "Memory per Node", "Submitted Time",
+      "User", "State", "Duration")
     val activeApps = state.activeApps.sortBy(_.startTime).reverse
-    val activeAppsTable = UIUtils.listingTable(activeAppHeaders, activeAppRow, activeApps)
-
-    val completedAppHeaders = Seq("Application ID", "Name", "Cores Requested", "Memory per Node",
-      "Submitted Time", "User", "State", "Duration")
+    val activeAppsTable = UIUtils.listingTable(appHeaders, appRow, activeApps)
     val completedApps = state.completedApps.sortBy(_.endTime).reverse
-    val completedAppsTable = UIUtils.listingTable(completedAppHeaders, completeAppRow,
-      completedApps)
+    val completedAppsTable = UIUtils.listingTable(appHeaders, appRow, completedApps)
 
     val driverHeaders = Seq("Submission ID", "Submitted Time", "Worker", "State", "Cores",
       "Memory", "Main Class")
@@ -191,7 +187,7 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
     </tr>
   }
 
-  private def appRow(app: ApplicationInfo, active: Boolean): Seq[Node] = {
+  private def appRow(app: ApplicationInfo): Seq[Node] = {
     val killLink = if (parent.killEnabled &&
       (app.state == ApplicationState.RUNNING || app.state == ApplicationState.WAITING)) {
     val killLinkUri = s"app/kill?id=${app.id}&terminate=true"
@@ -201,7 +197,6 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
         (<a href={killLinkUri} onclick={confirm}>kill</a>)
       </span>
     }
-
     <tr>
       <td>
         <a href={"app?appId=" + app.id}>{app.id}</a>
@@ -210,15 +205,8 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
       <td>
         <a href={app.desc.appUiUrl}>{app.desc.name}</a>
       </td>
-      {
-        if (active) {
-          <td>
-            {app.coresGranted}
-          </td>
-        }
-      }
       <td>
-        {if (app.requestedCores == Int.MaxValue) "*" else app.requestedCores}
+        {app.coresGranted}
       </td>
       <td sorttable_customkey={app.desc.memoryPerSlave.toString}>
         {Utils.megabytesToString(app.desc.memoryPerSlave)}
@@ -230,14 +218,6 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
     </tr>
   }
 
-  private def activeAppRow(app: ApplicationInfo): Seq[Node] = {
-    appRow(app, active = true)
-  }
-
-  private def completeAppRow(app: ApplicationInfo): Seq[Node] = {
-    appRow(app, active = false)
-  }
-
   private def driverRow(driver: DriverInfo): Seq[Node] = {
     val killLink = if (parent.killEnabled &&
       (driver.state == DriverState.RUNNING ||
 
@@ -81,9 +81,11 @@ class TaskInfo(
 
   def status: String = {
     if (running) {
-      "RUNNING"
-    } else if (gettingResult) {
-      "GET RESULT"
+      if (gettingResult) {
+        "GET RESULT"
+      } else {
+        "RUNNING"
+      }
     } else if (failed) {
       "FAILED"
     } else if (successful) {
 
@@ -112,6 +112,7 @@ class FileShuffleBlockManager(conf: SparkConf)
       private val shuffleState = shuffleStates(shuffleId)
       private var fileGroup: ShuffleFileGroup = null
 
+      val openStartTime = System.nanoTime
       val writers: Array[BlockObjectWriter] = if (consolidateShuffleFiles) {
         fileGroup = getUnusedFileGroup()
         Array.tabulate[BlockObjectWriter](numBuckets) { bucketId =>
@@ -135,6 +136,9 @@ class FileShuffleBlockManager(conf: SparkConf)
           blockManager.getDiskWriter(blockId, blockFile, serializer, bufferSize, writeMetrics)
         }
       }
+      // Creating the file to write to and creating a disk writer both involve interacting with
+      // the disk, so should be included in the shuffle write time.
+      writeMetrics.incShuffleWriteTime(System.nanoTime - openStartTime)
 
       override def releaseWriters(success: Boolean) {
         if (consolidateShuffleFiles) {
 
@@ -63,6 +63,9 @@ private[spark] class SortShuffleWriter[K, V, C](
       sorter.insertAll(records)
     }
 
+    // Don't bother including the time to open the merged output file in the shuffle write time,
+    // because it just opens a single file, so is typically too fast to measure accurately
+    // (see SPARK-3570).
     val outputFile = shuffleBlockManager.getDataFile(dep.shuffleId, mapId)
     val blockId = shuffleBlockManager.consolidateId(dep.shuffleId, mapId)
     val partitionLengths = sorter.writePartitionedFile(blockId, context, outputFile)
 
@@ -269,11 +269,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
             </td> +: getFormattedTimeQuantiles(serializationTimes)
 
           val gettingResultTimes = validTasks.map { case TaskUIData(info, _, _) =>
-            if (info.gettingResultTime > 0) {
-              (info.finishTime - info.gettingResultTime).toDouble
-            } else {
-              0.0
-            }
+            getGettingResultTime(info).toDouble
           }
           val gettingResultQuantiles =
             <td>
@@ -464,7 +460,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
       val gcTime = metrics.map(_.jvmGCTime).getOrElse(0L)
       val taskDeserializationTime = metrics.map(_.executorDeserializeTime).getOrElse(0L)
       val serializationTime = metrics.map(_.resultSerializationTime).getOrElse(0L)
-      val gettingResultTime = info.gettingResultTime
+      val gettingResultTime = getGettingResultTime(info)
 
       val maybeAccumulators = info.accumulables
       val accumulatorsReadable = maybeAccumulators.map{acc => s"${acc.name}: ${acc.update.get}"}
@@ -627,6 +623,19 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
     <td>{errorSummary}{details}</td>
   }
 
+  private def getGettingResultTime(info: TaskInfo): Long = {
+    if (info.gettingResultTime > 0) {
+      if (info.finishTime > 0) {
+        info.finishTime - info.gettingResultTime
+      } else {
+        // The task is still fetching the result.
+        System.currentTimeMillis - info.gettingResultTime
+      }
+    } else {
+      0L
+    }
+  }
+
   private def getSchedulerDelay(info: TaskInfo, metrics: TaskMetrics): Long = {
     val totalExecutionTime =
       if (info.gettingResult) {
@@ -638,6 +647,8 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
       }
     val executorOverhead = (metrics.executorDeserializeTime +
       metrics.resultSerializationTime)
-    math.max(0, totalExecutionTime - metrics.executorRunTime - executorOverhead)
+    math.max(
+      0,
+      totalExecutionTime - metrics.executorRunTime - executorOverhead - getGettingResultTime(info))
   }
 }
@@ -352,13 +352,18 @@ private[spark] class ExternalSorter[K, V, C](
     // Create our file writers if we haven't done so yet
     if (partitionWriters == null) {
       curWriteMetrics = new ShuffleWriteMetrics()
+      val openStartTime = System.nanoTime
       partitionWriters = Array.fill(numPartitions) {
         // Because these files may be read during shuffle, their compression must be controlled by
         // spark.shuffle.compress instead of spark.shuffle.spill.compress, so we need to use
         // createTempShuffleBlock here; see SPARK-3426 for more context.
         val (blockId, file) = diskBlockManager.createTempShuffleBlock()
         blockManager.getDiskWriter(blockId, file, ser, fileBufferSize, curWriteMetrics).open()
       }
+      // Creating the file to write to and creating a disk writer both involve interacting with
+      // the disk, and can take a long time in aggregate when we open many files, so should be
+      // included in the shuffle write time.
+      curWriteMetrics.incShuffleWriteTime(System.nanoTime - openStartTime)
     }
 
     // No need to sort stuff, just write each element out
 
@@ -274,6 +274,6 @@ If you need a reference to the proper location to put log files in the YARN so t
 # Important notes
 
 - Whether core requests are honored in scheduling decisions depends on which scheduler is in use and how it is configured.
-- The local directories used by Spark executors will be the local directories configured for YARN (Hadoop YARN config `yarn.nodemanager.local-dirs`). If the user specifies `spark.local.dir`, it will be ignored.
+- In `yarn-cluster` mode, the local directories used by the Spark executors and the Spark driver will be the local directories configured for YARN (Hadoop YARN config `yarn.nodemanager.local-dirs`). If the user specifies `spark.local.dir`, it will be ignored. In `yarn-client` mode, the Spark executors will use the local directories configured for YARN while the Spark driver will use those defined in `spark.local.dir`. This is because the Spark driver does not run on the YARN cluster in `yarn-client` mode, only the Spark executors do.
 - The `--files` and `--archives` options support specifying file names with the # similar to Hadoop. For example you can specify: `--files localtest.txt#appSees.txt` and this will upload the file you have locally named localtest.txt into HDFS but this will be linked to by the name `appSees.txt`, and your application should use the name as `appSees.txt` to reference it when running on YARN.
 - The `--jars` option allows the `SparkContext.addJar` function to work if you are using it with local files and running in `yarn-cluster` mode. It does not need to be used if you are using it with HDFS, HTTP, HTTPS, or FTP files.
@@ -84,6 +84,11 @@
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
 
     <!-- Explicit listing of transitive deps that are shaded. Otherwise, odd compiler crashes. -->
     <dependency>
 
@@ -17,9 +17,10 @@
 
 package org.apache.spark.repl
 
-import java.io.{ByteArrayOutputStream, InputStream, FileNotFoundException}
-import java.net.{URI, URL, URLEncoder}
-import java.util.concurrent.{Executors, ExecutorService}
+import java.io.{IOException, ByteArrayOutputStream, InputStream}
+import java.net.{HttpURLConnection, URI, URL, URLEncoder}
+
+import scala.util.control.NonFatal
 
 import org.apache.hadoop.fs.{FileSystem, Path}
 
@@ -43,6 +44,9 @@ class ExecutorClassLoader(conf: SparkConf, classUri: String, parent: ClassLoader
 
   val parentLoader = new ParentClassLoader(parent)
 
+  // Allows HTTP connect and read timeouts to be controlled for testing / debugging purposes
+  private[repl] var httpUrlConnectionTimeoutMillis: Int = -1
+
   // Hadoop FileSystem object for our URI, if it isn't using HTTP
   var fileSystem: FileSystem = {
     if (Set("http", "https", "ftp").contains(uri.getScheme)) {
@@ -71,37 +75,82 @@ class ExecutorClassLoader(conf: SparkConf, classUri: String, parent: ClassLoader
     }
   }
 
+  private def getClassFileInputStreamFromHttpServer(pathInDirectory: String): InputStream = {
+    val url = if (SparkEnv.get.securityManager.isAuthenticationEnabled()) {
+      val uri = new URI(classUri + "/" + urlEncode(pathInDirectory))
+      val newuri = Utils.constructURIForAuthentication(uri, SparkEnv.get.securityManager)
+      newuri.toURL
+    } else {
+      new URL(classUri + "/" + urlEncode(pathInDirectory))
+    }
+    val connection: HttpURLConnection = Utils.setupSecureURLConnection(url.openConnection(),
+      SparkEnv.get.securityManager).asInstanceOf[HttpURLConnection]
+    // Set the connection timeouts (for testing purposes)
+    if (httpUrlConnectionTimeoutMillis != -1) {
+      connection.setConnectTimeout(httpUrlConnectionTimeoutMillis)
+      connection.setReadTimeout(httpUrlConnectionTimeoutMillis)
+    }
+    connection.connect()
+    try {
+      if (connection.getResponseCode != 200) {
+        // Close the error stream so that the connection is eligible for re-use
+        try {
+          connection.getErrorStream.close()
+        } catch {
+          case ioe: IOException =>
+            logError("Exception while closing error stream", ioe)
+        }
+        throw new ClassNotFoundException(s"Class file not found at URL $url")
+      } else {
+        connection.getInputStream
+      }
+    } catch {
+      case NonFatal(e) if !e.isInstanceOf[ClassNotFoundException] =>
+        connection.disconnect()
+        throw e
+    }
+  }
+
+  private def getClassFileInputStreamFromFileSystem(pathInDirectory: String): InputStream = {
+    val path = new Path(directory, pathInDirectory)
+    if (fileSystem.exists(path)) {
+      fileSystem.open(path)
+    } else {
+      throw new ClassNotFoundException(s"Class file not found at path $path")
+    }
+  }
+
   def findClassLocally(name: String): Option[Class[_]] = {
+    val pathInDirectory = name.replace('.', '/') + ".class"
+    var inputStream: InputStream = null
     try {
-      val pathInDirectory = name.replace('.', '/') + ".class"
-      val inputStream = {
+      inputStream = {
         if (fileSystem != null) {
-          fileSystem.open(new Path(directory, pathInDirectory))
+          getClassFileInputStreamFromFileSystem(pathInDirectory)
         } else {
-          val url = if (SparkEnv.get.securityManager.isAuthenticationEnabled()) {
-            val uri = new URI(classUri + "/" + urlEncode(pathInDirectory))
-            val newuri = Utils.constructURIForAuthentication(uri, SparkEnv.get.securityManager)
-            newuri.toURL
-          } else {
-            new URL(classUri + "/" + urlEncode(pathInDirectory))
-          }
-
-          Utils.setupSecureURLConnection(url.openConnection(), SparkEnv.get.securityManager)
-            .getInputStream
+          getClassFileInputStreamFromHttpServer(pathInDirectory)
         }
       }
       val bytes = readAndTransformClass(name, inputStream)
-      inputStream.close()
       Some(defineClass(name, bytes, 0, bytes.length))
     } catch {
-      case e: FileNotFoundException =>
+      case e: ClassNotFoundException =>
         // We did not find the class
         logDebug(s"Did not load class $name from REPL class server at $uri", e)
         None
       case e: Exception =>
         // Something bad happened while checking if the class exists
         logError(s"Failed to check existence of class $name on REPL class server at $uri", e)
         None
+    } finally {
+      if (inputStream != null) {
+        try {
+          inputStream.close()
+        } catch {
+          case e: Exception =>
+            logError("Exception while closing inputStream", e)
+        }
+      }
     }
   }
Original file line number	Diff line number	Diff line change
`@@ -91,7 +91,7 @@ private[deploy] class ApplicationInfo(`
`91`	`91`	`}`
`92`	`92`	`}`
`93`	`93`
`94`		`- private[master] val requestedCores = desc.maxCores.getOrElse(defaultCores)`
	`94`	`+ private val requestedCores = desc.maxCores.getOrElse(defaultCores)`
`95`	`95`
`96`	`96`	`private[master] def coresLeft: Int = requestedCores - coresGranted`
`97`	`97`
Original file line number	Diff line number	Diff line change
`@@ -63,6 +63,9 @@ private[spark] class SortShuffleWriter[K, V, C](`
`63`	`63`	`sorter.insertAll(records)`
`64`	`64`	`}`
`65`	`65`
	`66`	`+ // Don't bother including the time to open the merged output file in the shuffle write time,`
	`67`	`+ // because it just opens a single file, so is typically too fast to measure accurately`
	`68`	`+ // (see SPARK-3570).`
`66`	`69`	`val outputFile = shuffleBlockManager.getDataFile(dep.shuffleId, mapId)`
`67`	`70`	`val blockId = shuffleBlockManager.consolidateId(dep.shuffleId, mapId)`
`68`	`71`	`val partitionLengths = sorter.writePartitionedFile(blockId, context, outputFile)`