CodingCat
diff --git a/‎assembly/pom.xml‎
Lines changed: 4 additions & 4 deletions b/‎assembly/pom.xml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎bin/spark-shell.cmd‎
100755100644 b/‎bin/spark-shell.cmd‎
100755100644
diff --git a/‎bin/spark-submit2.cmd‎
Lines changed: 1 addition & 1 deletion b/‎bin/spark-submit2.cmd‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala‎
Lines changed: 7 additions & 8 deletions b/‎core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala‎
Lines changed: 7 additions & 8 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 18 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 18 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/TestUtils.scala‎
Lines changed: 27 additions & 7 deletions b/‎core/src/main/scala/org/apache/spark/TestUtils.scala‎
Lines changed: 27 additions & 7 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala‎
Lines changed: 28 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala‎
Lines changed: 13 additions & 8 deletions b/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala‎
Lines changed: 13 additions & 8 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala‎
Lines changed: 14 additions & 8 deletions b/‎core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala‎
Lines changed: 14 additions & 8 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala‎
Lines changed: 7 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala‎
Lines changed: 7 additions & 0 deletions
@@ -39,7 +39,7 @@
     <deb.pkg.name>spark</deb.pkg.name>
     <deb.install.path>/usr/share/spark</deb.install.path>
     <deb.user>root</deb.user>
-    <deb.bin.filemode>744</deb.bin.filemode>
+    <deb.bin.filemode>755</deb.bin.filemode>
   </properties>
 
   <dependencies>
@@ -280,7 +280,7 @@
                         <user>${deb.user}</user>
                         <group>${deb.user}</group>
                         <prefix>${deb.install.path}/conf</prefix>
-                        <filemode>744</filemode>
+                        <filemode>${deb.bin.filemode}</filemode>
                       </mapper>
                     </data>
                     <data>
@@ -302,7 +302,7 @@
                         <user>${deb.user}</user>
                         <group>${deb.user}</group>
                         <prefix>${deb.install.path}/sbin</prefix>
-                        <filemode>744</filemode>
+                        <filemode>${deb.bin.filemode}</filemode>
                       </mapper>
                     </data>
                     <data>
@@ -313,7 +313,7 @@
                         <user>${deb.user}</user>
                         <group>${deb.user}</group>
                         <prefix>${deb.install.path}/python</prefix>
-                        <filemode>744</filemode>
+                        <filemode>${deb.bin.filemode}</filemode>
                       </mapper>
                     </data>
                   </dataSet>
 
@@ -25,7 +25,7 @@ set ORIG_ARGS=%*
 rem Reset the values of all variables used
 set SPARK_SUBMIT_DEPLOY_MODE=client
 
-if not defined %SPARK_CONF_DIR% (
+if [%SPARK_CONF_DIR%] == [] (
   set SPARK_CONF_DIR=%SPARK_HOME%\conf
 )
 set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_CONF_DIR%\spark-defaults.conf
 
@@ -76,15 +76,15 @@ private[spark] class ExecutorAllocationManager(
   private val maxNumExecutors = conf.getInt("spark.dynamicAllocation.maxExecutors",
     Integer.MAX_VALUE)
 
-  // How long there must be backlogged tasks for before an addition is triggered
+  // How long there must be backlogged tasks for before an addition is triggered (seconds)
   private val schedulerBacklogTimeout = conf.getLong(
-    "spark.dynamicAllocation.schedulerBacklogTimeout", 60)
+    "spark.dynamicAllocation.schedulerBacklogTimeout", 5)
 
   // Same as above, but used only after `schedulerBacklogTimeout` is exceeded
   private val sustainedSchedulerBacklogTimeout = conf.getLong(
     "spark.dynamicAllocation.sustainedSchedulerBacklogTimeout", schedulerBacklogTimeout)
 
-  // How long an executor must be idle for before it is removed
+  // How long an executor must be idle for before it is removed (seconds)
   private val executorIdleTimeout = conf.getLong(
     "spark.dynamicAllocation.executorIdleTimeout", 600)
 
@@ -486,8 +486,8 @@ private[spark] class ExecutorAllocationManager(
       }
     }
 
-    override def onBlockManagerAdded(blockManagerAdded: SparkListenerBlockManagerAdded): Unit = {
-      val executorId = blockManagerAdded.blockManagerId.executorId
+    override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): Unit = {
+      val executorId = executorAdded.executorId
       if (executorId != SparkContext.DRIVER_IDENTIFIER) {
         // This guards against the race condition in which the `SparkListenerTaskStart`
         // event is posted before the `SparkListenerBlockManagerAdded` event, which is
@@ -498,9 +498,8 @@ private[spark] class ExecutorAllocationManager(
       }
     }
 
-    override def onBlockManagerRemoved(
-        blockManagerRemoved: SparkListenerBlockManagerRemoved): Unit = {
-      allocationManager.onExecutorRemoved(blockManagerRemoved.blockManagerId.executorId)
+    override def onExecutorRemoved(executorRemoved: SparkListenerExecutorRemoved): Unit = {
+      allocationManager.onExecutorRemoved(executorRemoved.executorId)
     }
 
     /**
 
@@ -288,7 +288,12 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
   // the bound port to the cluster manager properly
   ui.foreach(_.bind())
 
-  /** A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse. */
+  /**
+   * A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse.
+   *
+   * '''Note:''' As it will be reused in all Hadoop RDDs, it's better not to modify it unless you
+   * plan to set some global configurations for all Hadoop RDDs.
+   */
   val hadoopConfiguration = SparkHadoopUtil.get.newConfiguration(conf)
 
   // Add each JAR given through the constructor
@@ -694,7 +699,10 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
    * necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable),
    * using the older MapReduce API (`org.apache.hadoop.mapred`).
    *
-   * @param conf JobConf for setting up the dataset
+   * @param conf JobConf for setting up the dataset. Note: This will be put into a Broadcast.
+   *             Therefore if you plan to reuse this conf to create multiple RDDs, you need to make
+   *             sure you won't modify the conf. A safe approach is always creating a new conf for
+   *             a new RDD.
    * @param inputFormatClass Class of the InputFormat
    * @param keyClass Class of the keys
    * @param valueClass Class of the values
@@ -830,6 +838,14 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
    * Get an RDD for a given Hadoop file with an arbitrary new API InputFormat
    * and extra configuration options to pass to the input format.
    *
+   * @param conf Configuration for setting up the dataset. Note: This will be put into a Broadcast.
+   *             Therefore if you plan to reuse this conf to create multiple RDDs, you need to make
+   *             sure you won't modify the conf. A safe approach is always creating a new conf for
+   *             a new RDD.
+   * @param fClass Class of the InputFormat
+   * @param kClass Class of the keys
+   * @param vClass Class of the values
+   *
    * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
    * operation will create many references to the same object.
 
@@ -43,11 +43,20 @@ private[spark] object TestUtils {
    * Note: if this is used during class loader tests, class names should be unique
    * in order to avoid interference between tests.
    */
-  def createJarWithClasses(classNames: Seq[String], value: String = ""): URL = {
+  def createJarWithClasses(
+      classNames: Seq[String],
+      toStringValue: String = "",
+      classNamesWithBase: Seq[(String, String)] = Seq(),
+      classpathUrls: Seq[URL] = Seq()): URL = {
     val tempDir = Utils.createTempDir()
-    val files = for (name <- classNames) yield createCompiledClass(name, tempDir, value)
+    val files1 = for (name <- classNames) yield {
+      createCompiledClass(name, tempDir, toStringValue, classpathUrls = classpathUrls) 
+    }
+    val files2 = for ((childName, baseName) <- classNamesWithBase) yield {
+      createCompiledClass(childName, tempDir, toStringValue, baseName, classpathUrls)
+    }
     val jarFile = new File(tempDir, "testJar-%s.jar".format(System.currentTimeMillis()))
-    createJar(files, jarFile)
+    createJar(files1 ++ files2, jarFile)
   }
 
 
@@ -85,15 +94,26 @@ private[spark] object TestUtils {
   }
 
   /** Creates a compiled class with the given name. Class file will be placed in destDir. */
-  def createCompiledClass(className: String, destDir: File, value: String = ""): File = {
+  def createCompiledClass(
+      className: String,
+      destDir: File,
+      toStringValue: String = "",
+      baseClass: String = null,
+      classpathUrls: Seq[URL] = Seq()): File = {
     val compiler = ToolProvider.getSystemJavaCompiler
+    val extendsText = Option(baseClass).map { c => s" extends ${c}" }.getOrElse("")
     val sourceFile = new JavaSourceFromString(className,
-      "public class " + className + " implements java.io.Serializable {" +
-      "  @Override public String toString() { return \"" + value + "\"; }}")
+      "public class " + className + extendsText + " implements java.io.Serializable {" +
+      "  @Override public String toString() { return \"" + toStringValue + "\"; }}")
 
     // Calling this outputs a class file in pwd. It's easier to just rename the file than
     // build a custom FileManager that controls the output location.
-    compiler.getTask(null, null, null, null, null, Seq(sourceFile)).call()
+    val options = if (classpathUrls.nonEmpty) {
+      Seq("-classpath", classpathUrls.map { _.getFile }.mkString(File.pathSeparator))
+    } else {
+      Seq()
+    }
+    compiler.getTask(null, null, null, options, null, Seq(sourceFile)).call()
 
     val fileName = className + ".class"
     val result = new File(fileName)
 
@@ -373,6 +373,15 @@ class JavaSparkContext(val sc: SparkContext)
    * other necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable,
    * etc).
    *
+   * @param conf JobConf for setting up the dataset. Note: This will be put into a Broadcast.
+   *             Therefore if you plan to reuse this conf to create multiple RDDs, you need to make
+   *             sure you won't modify the conf. A safe approach is always creating a new conf for
+   *             a new RDD.
+   * @param inputFormatClass Class of the InputFormat
+   * @param keyClass Class of the keys
+   * @param valueClass Class of the values
+   * @param minPartitions Minimum number of Hadoop Splits to generate.
+   *
    * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD will create many references to the same object.
    * If you plan to directly cache Hadoop writable objects, you should first copy them using
@@ -395,6 +404,14 @@ class JavaSparkContext(val sc: SparkContext)
    * Get an RDD for a Hadoop-readable dataset from a Hadooop JobConf giving its InputFormat and any
    * other necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable,
    *
+   * @param conf JobConf for setting up the dataset. Note: This will be put into a Broadcast.
+   *             Therefore if you plan to reuse this conf to create multiple RDDs, you need to make
+   *             sure you won't modify the conf. A safe approach is always creating a new conf for
+   *             a new RDD.
+   * @param inputFormatClass Class of the InputFormat
+   * @param keyClass Class of the keys
+   * @param valueClass Class of the values
+   *
    * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD will create many references to the same object.
    * If you plan to directly cache Hadoop writable objects, you should first copy them using
@@ -476,6 +493,14 @@ class JavaSparkContext(val sc: SparkContext)
    * Get an RDD for a given Hadoop file with an arbitrary new API InputFormat
    * and extra configuration options to pass to the input format.
    *
+   * @param conf Configuration for setting up the dataset. Note: This will be put into a Broadcast.
+   *             Therefore if you plan to reuse this conf to create multiple RDDs, you need to make
+   *             sure you won't modify the conf. A safe approach is always creating a new conf for
+   *             a new RDD.
+   * @param fClass Class of the InputFormat
+   * @param kClass Class of the keys
+   * @param vClass Class of the values
+   *
    * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD will create many references to the same object.
    * If you plan to directly cache Hadoop writable objects, you should first copy them using
@@ -675,6 +700,9 @@ class JavaSparkContext(val sc: SparkContext)
 
   /**
    * Returns the Hadoop configuration used for the Hadoop code (e.g. file systems) we reuse.
+   *
+   * '''Note:''' As it will be reused in all Hadoop RDDs, it's better not to modify it unless you
+   * plan to set some global configurations for all Hadoop RDDs.
    */
   def hadoopConfiguration(): Configuration = {
     sc.hadoopConfiguration
 
@@ -18,17 +18,16 @@
 package org.apache.spark.deploy
 
 import java.io.{File, PrintStream}
-import java.lang.reflect.{Modifier, InvocationTargetException}
+import java.lang.reflect.{InvocationTargetException, Modifier}
 import java.net.URL
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, Map}
 
 import org.apache.hadoop.fs.Path
-
 import org.apache.ivy.Ivy
 import org.apache.ivy.core.LogOptions
-import org.apache.ivy.core.module.descriptor.{DefaultExcludeRule, DefaultDependencyDescriptor, DefaultModuleDescriptor}
-import org.apache.ivy.core.module.id.{ModuleId, ArtifactId, ModuleRevisionId}
+import org.apache.ivy.core.module.descriptor._
+import org.apache.ivy.core.module.id.{ArtifactId, ModuleId, ModuleRevisionId}
 import org.apache.ivy.core.report.ResolveReport
 import org.apache.ivy.core.resolve.ResolveOptions
 import org.apache.ivy.core.retrieve.RetrieveOptions
@@ -37,7 +36,7 @@ import org.apache.ivy.plugins.matcher.GlobPatternMatcher
 import org.apache.ivy.plugins.resolver.{ChainResolver, IBiblioResolver}
 
 import org.apache.spark.deploy.rest._
-import org.apache.spark.executor.ExecutorURLClassLoader
+import org.apache.spark.executor._
 import org.apache.spark.util.Utils
 
 /**
@@ -467,8 +466,14 @@ object SparkSubmit {
       printStream.println("\n")
     }
 
-    val loader = new ExecutorURLClassLoader(new Array[URL](0),
-      Thread.currentThread.getContextClassLoader)
+    val loader =
+      if (sysProps.getOrElse("spark.files.userClassPathFirst", "false").toBoolean) {
+        new ChildExecutorURLClassLoader(new Array[URL](0),
+          Thread.currentThread.getContextClassLoader)
+      } else {
+        new ExecutorURLClassLoader(new Array[URL](0),
+          Thread.currentThread.getContextClassLoader)
+      }
     Thread.currentThread.setContextClassLoader(loader)
 
     for (jar <- childClasspath) {
@@ -512,7 +517,7 @@ object SparkSubmit {
     }
   }
 
-  private def addJarToClasspath(localJar: String, loader: ExecutorURLClassLoader) {
+  private def addJarToClasspath(localJar: String, loader: MutableURLClassLoader) {
     val uri = Utils.resolveURI(localJar)
     uri.getScheme match {
       case "file" | "local" =>
 
@@ -173,9 +173,10 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
       val logInfos = statusList
         .filter { entry =>
           try {
-            val modTime = getModificationTime(entry)
-            newLastModifiedTime = math.max(newLastModifiedTime, modTime)
-            modTime >= lastModifiedTime
+            getModificationTime(entry).map { time =>
+              newLastModifiedTime = math.max(newLastModifiedTime, time)
+              time >= lastModifiedTime
+            }.getOrElse(false)
           } catch {
             case e: AccessControlException =>
               // Do not use "logInfo" since these messages can get pretty noisy if printed on
@@ -251,7 +252,7 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
         appListener.appName.getOrElse(NOT_STARTED),
         appListener.startTime.getOrElse(-1L),
         appListener.endTime.getOrElse(-1L),
-        getModificationTime(eventLog),
+        getModificationTime(eventLog).get,
         appListener.sparkUser.getOrElse(NOT_STARTED),
         isApplicationCompleted(eventLog))
     } finally {
@@ -310,11 +311,16 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
    */
   private def isLegacyLogDirectory(entry: FileStatus): Boolean = entry.isDir()
 
-  private def getModificationTime(fsEntry: FileStatus): Long = {
-    if (fsEntry.isDir) {
-      fs.listStatus(fsEntry.getPath).map(_.getModificationTime()).max
+  /**
+   * Returns the modification time of the given event log. If the status points at an empty
+   * directory, `None` is returned, indicating that there isn't an event log at that location.
+   */
+  private def getModificationTime(fsEntry: FileStatus): Option[Long] = {
+    if (isLegacyLogDirectory(fsEntry)) {
+      val statusList = fs.listStatus(fsEntry.getPath)
+      if (!statusList.isEmpty) Some(statusList.map(_.getModificationTime()).max) else None
     } else {
-      fsEntry.getModificationTime()
+      Some(fsEntry.getModificationTime())
     }
   }
 
 
@@ -43,6 +43,7 @@ private[spark] class ExecutorRunner(
     val worker: ActorRef,
     val workerId: String,
     val host: String,
+    val webUiPort: Int,
     val sparkHome: File,
     val executorDir: File,
     val workerUrl: String,
@@ -134,6 +135,12 @@ private[spark] class ExecutorRunner(
       // In case we are running this from within the Spark Shell, avoid creating a "scala"
       // parent process for the executor command
       builder.environment.put("SPARK_LAUNCH_WITH_SCALA", "0")
+
+      // Add webUI log urls
+      val baseUrl = s"http://$host:$webUiPort/logPage/?appId=$appId&executorId=$execId&logType="
+      builder.environment.put("SPARK_LOG_URL_STDERR", s"${baseUrl}stderr")
+      builder.environment.put("SPARK_LOG_URL_STDOUT", s"${baseUrl}stdout")
+
       process = builder.start()
       val header = "Spark Executor Command: %s\n%s\n\n".format(
         command.mkString("\"", "\" \"", "\""), "=" * 40)
Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,7 @@ set ORIG_ARGS=%*`
`25`	`25`	`rem Reset the values of all variables used`
`26`	`26`	`set SPARK_SUBMIT_DEPLOY_MODE=client`
`27`	`27`
`28`		`-if not defined %SPARK_CONF_DIR% (`
	`28`	`+if [%SPARK_CONF_DIR%] == [] (`
`29`	`29`	`set SPARK_CONF_DIR=%SPARK_HOME%\conf`
`30`	`30`	`)`
`31`	`31`	`set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_CONF_DIR%\spark-defaults.conf`