apache
diff --git a/‎R/pkg/R/deserialize.R‎
Lines changed: 2 additions & 2 deletions b/‎R/pkg/R/deserialize.R‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎R/pkg/R/sparkR.R‎
Lines changed: 0 additions & 3 deletions b/‎R/pkg/R/sparkR.R‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎bin/pyspark‎
Lines changed: 1 addition & 1 deletion b/‎bin/pyspark‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bin/pyspark2.cmd‎
Lines changed: 1 addition & 1 deletion b/‎bin/pyspark2.cmd‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/Partitioner.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/Partitioner.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/master/Master.scala‎
Lines changed: 79 additions & 37 deletions b/‎core/src/main/scala/org/apache/spark/deploy/master/Master.scala‎
Lines changed: 79 additions & 37 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala‎
Lines changed: 24 additions & 13 deletions b/‎core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala‎
Lines changed: 24 additions & 13 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/ui/WebUI.scala‎
Lines changed: 19 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/ui/WebUI.scala‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/util/Utils.scala‎
Lines changed: 3 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/util/Utils.scala‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎core/src/test/scala/org/apache/spark/PartitioningSuite.scala‎
Lines changed: 5 additions & 5 deletions b/‎core/src/test/scala/org/apache/spark/PartitioningSuite.scala‎
Lines changed: 5 additions & 5 deletions
@@ -102,11 +102,11 @@ readList <- function(con) {
 
 readRaw <- function(con) {
   dataLen <- readInt(con)
-  data <- readBin(con, raw(), as.integer(dataLen), endian = "big")
+  readBin(con, raw(), as.integer(dataLen), endian = "big")
 }
 
 readRawLen <- function(con, dataLen) {
-  data <- readBin(con, raw(), as.integer(dataLen), endian = "big")
+  readBin(con, raw(), as.integer(dataLen), endian = "big")
 }
 
 readDeserialize <- function(con) {
 
@@ -104,16 +104,13 @@ sparkR.init <- function(
     return(get(".sparkRjsc", envir = .sparkREnv))
   }
 
-  sparkMem <- Sys.getenv("SPARK_MEM", "1024m")
   jars <- suppressWarnings(normalizePath(as.character(sparkJars)))
 
   # Classpath separator is ";" on Windows
   # URI needs four /// as from http://stackoverflow.com/a/18522792
   if (.Platform$OS.type == "unix") {
-    collapseChar <- ":"
     uriSep <- "//"
   } else {
-    collapseChar <- ";"
     uriSep <- "////"
   }
 
 
@@ -82,4 +82,4 @@ fi
 
 export PYSPARK_DRIVER_PYTHON
 export PYSPARK_DRIVER_PYTHON_OPTS
-exec "$SPARK_HOME"/bin/spark-submit pyspark-shell-main "$@"
+exec "$SPARK_HOME"/bin/spark-submit pyspark-shell-main --name "PySparkShell" "$@"
@@ -35,4 +35,4 @@ set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.8.2.1-src.zip;%PYTHONPATH%
 set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
 set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py
 
-call %SPARK_HOME%\bin\spark-submit2.cmd pyspark-shell-main %*
+call %SPARK_HOME%\bin\spark-submit2.cmd pyspark-shell-main --name "PySparkShell" %*
@@ -56,7 +56,7 @@ object Partitioner {
    */
   def defaultPartitioner(rdd: RDD[_], others: RDD[_]*): Partitioner = {
     val bySize = (Seq(rdd) ++ others).sortBy(_.partitions.size).reverse
-    for (r <- bySize if r.partitioner.isDefined) {
+    for (r <- bySize if r.partitioner.isDefined && r.partitioner.get.numPartitions > 0) {
       return r.partitioner.get
     }
     if (rdd.context.conf.contains("spark.default.parallelism")) {
 
@@ -541,6 +541,7 @@ private[master] class Master(
 
   /**
    * Schedule executors to be launched on the workers.
+   * Returns an array containing number of cores assigned to each worker.
    *
    * There are two modes of launching executors. The first attempts to spread out an application's
    * executors on as many workers as possible, while the second does the opposite (i.e. launch them
@@ -551,59 +552,100 @@ private[master] class Master(
    * multiple executors from the same application may be launched on the same worker if the worker
    * has enough cores and memory. Otherwise, each executor grabs all the cores available on the
    * worker by default, in which case only one executor may be launched on each worker.
+   *
+   * It is important to allocate coresPerExecutor on each worker at a time (instead of 1 core
+   * at a time). Consider the following example: cluster has 4 workers with 16 cores each.
+   * User requests 3 executors (spark.cores.max = 48, spark.executor.cores = 16). If 1 core is
+   * allocated at a time, 12 cores from each worker would be assigned to each executor.
+   * Since 12 < 16, no executors would launch [SPARK-8881].
    */
-  private def startExecutorsOnWorkers(): Unit = {
-    // Right now this is a very simple FIFO scheduler. We keep trying to fit in the first app
-    // in the queue, then the second app, etc.
-    if (spreadOutApps) {
-      // Try to spread out each app among all the workers, until it has all its cores
-      for (app <- waitingApps if app.coresLeft > 0) {
-        val usableWorkers = workers.toArray.filter(_.state == WorkerState.ALIVE)
-          .filter(worker => worker.memoryFree >= app.desc.memoryPerExecutorMB &&
-            worker.coresFree >= app.desc.coresPerExecutor.getOrElse(1))
-          .sortBy(_.coresFree).reverse
-        val numUsable = usableWorkers.length
-        val assigned = new Array[Int](numUsable) // Number of cores to give on each node
-        var toAssign = math.min(app.coresLeft, usableWorkers.map(_.coresFree).sum)
-        var pos = 0
-        while (toAssign > 0) {
-          if (usableWorkers(pos).coresFree - assigned(pos) > 0) {
-            toAssign -= 1
-            assigned(pos) += 1
+  private def scheduleExecutorsOnWorkers(
+      app: ApplicationInfo,
+      usableWorkers: Array[WorkerInfo],
+      spreadOutApps: Boolean): Array[Int] = {
+    // If the number of cores per executor is not specified, then we can just schedule
+    // 1 core at a time since we expect a single executor to be launched on each worker
+    val coresPerExecutor = app.desc.coresPerExecutor.getOrElse(1)
+    val memoryPerExecutor = app.desc.memoryPerExecutorMB
+    val numUsable = usableWorkers.length
+    val assignedCores = new Array[Int](numUsable) // Number of cores to give to each worker
+    val assignedMemory = new Array[Int](numUsable) // Amount of memory to give to each worker
+    var coresToAssign = math.min(app.coresLeft, usableWorkers.map(_.coresFree).sum)
+    var freeWorkers = (0 until numUsable).toIndexedSeq
+
+    def canLaunchExecutor(pos: Int): Boolean = {
+      usableWorkers(pos).coresFree - assignedCores(pos) >= coresPerExecutor &&
+      usableWorkers(pos).memoryFree - assignedMemory(pos) >= memoryPerExecutor
+    }
+
+    while (coresToAssign >= coresPerExecutor && freeWorkers.nonEmpty) {
+      freeWorkers = freeWorkers.filter(canLaunchExecutor)
+      freeWorkers.foreach { pos =>
+        var keepScheduling = true
+        while (keepScheduling && canLaunchExecutor(pos) && coresToAssign >= coresPerExecutor) {
+          coresToAssign -= coresPerExecutor
+          assignedCores(pos) += coresPerExecutor
+          // If cores per executor is not set, we are assigning 1 core at a time
+          // without actually meaning to launch 1 executor for each core assigned
+          if (app.desc.coresPerExecutor.isDefined) {
+            assignedMemory(pos) += memoryPerExecutor
+          }
+
+          // Spreading out an application means spreading out its executors across as
+          // many workers as possible. If we are not spreading out, then we should keep
+          // scheduling executors on this worker until we use all of its resources.
+          // Otherwise, just move on to the next worker.
+          if (spreadOutApps) {
+            keepScheduling = false
           }
-          pos = (pos + 1) % numUsable
-        }
-        // Now that we've decided how many cores to give on each node, let's actually give them
-        for (pos <- 0 until numUsable if assigned(pos) > 0) {
-          allocateWorkerResourceToExecutors(app, assigned(pos), usableWorkers(pos))
         }
       }
-    } else {
-      // Pack each app into as few workers as possible until we've assigned all its cores
-      for (worker <- workers if worker.coresFree > 0 && worker.state == WorkerState.ALIVE) {
-        for (app <- waitingApps if app.coresLeft > 0) {
-          allocateWorkerResourceToExecutors(app, app.coresLeft, worker)
-        }
+    }
+    assignedCores
+  }
+
+  /**
+   * Schedule and launch executors on workers
+   */
+  private def startExecutorsOnWorkers(): Unit = {
+    // Right now this is a very simple FIFO scheduler. We keep trying to fit in the first app
+    // in the queue, then the second app, etc.
+    for (app <- waitingApps if app.coresLeft > 0) {
+      val coresPerExecutor: Option[Int] = app.desc.coresPerExecutor
+      // Filter out workers that don't have enough resources to launch an executor
+      val usableWorkers = workers.toArray.filter(_.state == WorkerState.ALIVE)
+        .filter(worker => worker.memoryFree >= app.desc.memoryPerExecutorMB &&
+          worker.coresFree >= coresPerExecutor.getOrElse(1))
+        .sortBy(_.coresFree).reverse
+      val assignedCores = scheduleExecutorsOnWorkers(app, usableWorkers, spreadOutApps)
+
+      // Now that we've decided how many cores to allocate on each worker, let's allocate them
+      for (pos <- 0 until usableWorkers.length if assignedCores(pos) > 0) {
+        allocateWorkerResourceToExecutors(
+          app, assignedCores(pos), coresPerExecutor, usableWorkers(pos))
       }
     }
   }
 
   /**
    * Allocate a worker's resources to one or more executors.
    * @param app the info of the application which the executors belong to
-   * @param coresToAllocate cores on this worker to be allocated to this application
+   * @param assignedCores number of cores on this worker for this application
+   * @param coresPerExecutor number of cores per executor
    * @param worker the worker info
    */
   private def allocateWorkerResourceToExecutors(
       app: ApplicationInfo,
-      coresToAllocate: Int,
+      assignedCores: Int,
+      coresPerExecutor: Option[Int],
       worker: WorkerInfo): Unit = {
-    val memoryPerExecutor = app.desc.memoryPerExecutorMB
-    val coresPerExecutor = app.desc.coresPerExecutor.getOrElse(coresToAllocate)
-    var coresLeft = coresToAllocate
-    while (coresLeft >= coresPerExecutor && worker.memoryFree >= memoryPerExecutor) {
-      val exec = app.addExecutor(worker, coresPerExecutor)
-      coresLeft -= coresPerExecutor
+    // If the number of cores per executor is specified, we divide the cores assigned
+    // to this worker evenly among the executors with no remainder.
+    // Otherwise, we launch a single executor that grabs all the assignedCores on this worker.
+    val numExecutors = coresPerExecutor.map { assignedCores / _ }.getOrElse(1)
+    val coresToAssign = coresPerExecutor.getOrElse(assignedCores)
+    for (i <- 1 to numExecutors) {
+      val exec = app.addExecutor(worker, coresToAssign)
       launchExecutor(worker, exec)
       app.state = ApplicationState.RUNNING
     }
 
@@ -128,7 +128,7 @@ class NewHadoopRDD[K, V](
           configurable.setConf(conf)
         case _ =>
       }
-      val reader = format.createRecordReader(
+      private var reader = format.createRecordReader(
         split.serializableHadoopSplit.value, hadoopAttemptContext)
       reader.initialize(split.serializableHadoopSplit.value, hadoopAttemptContext)
 
@@ -141,6 +141,12 @@ class NewHadoopRDD[K, V](
       override def hasNext: Boolean = {
         if (!finished && !havePair) {
           finished = !reader.nextKeyValue
+          if (finished) {
+            // Close and release the reader here; close() will also be called when the task
+            // completes, but for tasks that read from many files, it helps to release the
+            // resources early.
+            close()
+          }
           havePair = !finished
         }
         !finished
@@ -159,18 +165,23 @@ class NewHadoopRDD[K, V](
 
       private def close() {
         try {
-          reader.close()
-          if (bytesReadCallback.isDefined) {
-            inputMetrics.updateBytesRead()
-          } else if (split.serializableHadoopSplit.value.isInstanceOf[FileSplit] ||
-                     split.serializableHadoopSplit.value.isInstanceOf[CombineFileSplit]) {
-            // If we can't get the bytes read from the FS stats, fall back to the split size,
-            // which may be inaccurate.
-            try {
-              inputMetrics.incBytesRead(split.serializableHadoopSplit.value.getLength)
-            } catch {
-              case e: java.io.IOException =>
-                logWarning("Unable to get input size to set InputMetrics for task", e)
+          if (reader != null) {
+            // Close reader and release it
+            reader.close()
+            reader = null
+
+            if (bytesReadCallback.isDefined) {
+              inputMetrics.updateBytesRead()
+            } else if (split.serializableHadoopSplit.value.isInstanceOf[FileSplit] ||
+                       split.serializableHadoopSplit.value.isInstanceOf[CombineFileSplit]) {
+              // If we can't get the bytes read from the FS stats, fall back to the split size,
+              // which may be inaccurate.
+              try {
+                inputMetrics.incBytesRead(split.serializableHadoopSplit.value.getLength)
+              } catch {
+                case e: java.io.IOException =>
+                  logWarning("Unable to get input size to set InputMetrics for task", e)
+              }
             }
           }
         } catch {
 
@@ -107,6 +107,25 @@ private[spark] abstract class WebUI(
     }
   }
 
+  /**
+   * Add a handler for static content.
+   *
+   * @param resourceBase Root of where to find resources to serve.
+   * @param path Path in UI where to mount the resources.
+   */
+  def addStaticHandler(resourceBase: String, path: String): Unit = {
+    attachHandler(JettyUtils.createStaticHandler(resourceBase, path))
+  }
+
+  /**
+   * Remove a static content handler.
+   *
+   * @param path Path in UI to unmount.
+   */
+  def removeStaticHandler(path: String): Unit = {
+    handlers.find(_.getContextPath() == path).foreach(detachHandler)
+  }
+
   /** Initialize all components of the server. */
   def initialize()
 
 
@@ -443,18 +443,19 @@ private[spark] object Utils extends Logging {
       val lockFileName = s"${url.hashCode}${timestamp}_lock"
       val localDir = new File(getLocalDir(conf))
       val lockFile = new File(localDir, lockFileName)
-      val raf = new RandomAccessFile(lockFile, "rw")
+      val lockFileChannel = new RandomAccessFile(lockFile, "rw").getChannel()
       // Only one executor entry.
       // The FileLock is only used to control synchronization for executors download file,
       // it's always safe regardless of lock type (mandatory or advisory).
-      val lock = raf.getChannel().lock()
+      val lock = lockFileChannel.lock()
       val cachedFile = new File(localDir, cachedFileName)
       try {
         if (!cachedFile.exists()) {
           doFetchFile(url, localDir, cachedFileName, conf, securityMgr, hadoopConf)
         }
       } finally {
         lock.release()
+        lockFileChannel.close()
       }
       copyFile(
         url,
 
@@ -91,13 +91,13 @@ class PartitioningSuite extends SparkFunSuite with SharedSparkContext with Priva
 
   test("RangePartitioner for keys that are not Comparable (but with Ordering)") {
     // Row does not extend Comparable, but has an implicit Ordering defined.
-    implicit object RowOrdering extends Ordering[Row] {
-      override def compare(x: Row, y: Row): Int = x.value - y.value
+    implicit object RowOrdering extends Ordering[Item] {
+      override def compare(x: Item, y: Item): Int = x.value - y.value
     }
 
-    val rdd = sc.parallelize(1 to 4500).map(x => (Row(x), Row(x)))
+    val rdd = sc.parallelize(1 to 4500).map(x => (Item(x), Item(x)))
     val partitioner = new RangePartitioner(1500, rdd)
-    partitioner.getPartition(Row(100))
+    partitioner.getPartition(Item(100))
   }
 
   test("RangPartitioner.sketch") {
@@ -252,4 +252,4 @@ class PartitioningSuite extends SparkFunSuite with SharedSparkContext with Priva
 }
 
 
-private sealed case class Row(value: Int)
+private sealed case class Item(value: Int)
Original file line number	Diff line number	Diff line change
`@@ -102,11 +102,11 @@ readList <- function(con) {`
`102`	`102`
`103`	`103`	`readRaw <- function(con) {`
`104`	`104`	`dataLen <- readInt(con)`
`105`		`- data <- readBin(con, raw(), as.integer(dataLen), endian = "big")`
	`105`	`+ readBin(con, raw(), as.integer(dataLen), endian = "big")`
`106`	`106`	`}`
`107`	`107`
`108`	`108`	`readRawLen <- function(con, dataLen) {`
`109`		`- data <- readBin(con, raw(), as.integer(dataLen), endian = "big")`
	`109`	`+ readBin(con, raw(), as.integer(dataLen), endian = "big")`
`110`	`110`	`}`
`111`	`111`
`112`	`112`	`readDeserialize <- function(con) {`
Original file line number	Diff line number	Diff line change
`@@ -104,16 +104,13 @@ sparkR.init <- function(`
`104`	`104`	`return(get(".sparkRjsc", envir = .sparkREnv))`
`105`	`105`	`}`
`106`	`106`
`107`		`- sparkMem <- Sys.getenv("SPARK_MEM", "1024m")`
`108`	`107`	`jars <- suppressWarnings(normalizePath(as.character(sparkJars)))`
`109`	`108`
`110`	`109`	`# Classpath separator is ";" on Windows`
`111`	`110`	`# URI needs four /// as from http://stackoverflow.com/a/18522792`
`112`	`111`	`if (.Platform$OS.type == "unix") {`
`113`		`- collapseChar <- ":"`
`114`	`112`	`uriSep <- "//"`
`115`	`113`	`} else {`
`116`		`- collapseChar <- ";"`
`117`	`114`	`uriSep <- "////"`
`118`	`115`	`}`
`119`	`116`
Original file line number	Diff line number	Diff line change
`@@ -56,7 +56,7 @@ object Partitioner {`
`56`	`56`	`*/`
`57`	`57`	`def defaultPartitioner(rdd: RDD[_], others: RDD[_]*): Partitioner = {`
`58`	`58`	`val bySize = (Seq(rdd) ++ others).sortBy(_.partitions.size).reverse`
`59`		`- for (r <- bySize if r.partitioner.isDefined) {`
	`59`	`+ for (r <- bySize if r.partitioner.isDefined && r.partitioner.get.numPartitions > 0) {`
`60`	`60`	`return r.partitioner.get`
`61`	`61`	`}`
`62`	`62`	`if (rdd.context.conf.contains("spark.default.parallelism")) {`
Original file line number	Diff line number	Diff line change
`@@ -91,13 +91,13 @@ class PartitioningSuite extends SparkFunSuite with SharedSparkContext with Priva`
`91`	`91`
`92`	`92`	`test("RangePartitioner for keys that are not Comparable (but with Ordering)") {`
`93`	`93`	`// Row does not extend Comparable, but has an implicit Ordering defined.`
`94`		`- implicit object RowOrdering extends Ordering[Row] {`
`95`		`- override def compare(x: Row, y: Row): Int = x.value - y.value`
	`94`	`+ implicit object RowOrdering extends Ordering[Item] {`
	`95`	`+ override def compare(x: Item, y: Item): Int = x.value - y.value`
`96`	`96`	`}`
`97`	`97`
`98`		`- val rdd = sc.parallelize(1 to 4500).map(x => (Row(x), Row(x)))`
	`98`	`+ val rdd = sc.parallelize(1 to 4500).map(x => (Item(x), Item(x)))`
`99`	`99`	`val partitioner = new RangePartitioner(1500, rdd)`
`100`		`- partitioner.getPartition(Row(100))`
	`100`	`+ partitioner.getPartition(Item(100))`
`101`	`101`	`}`
`102`	`102`
`103`	`103`	`test("RangPartitioner.sketch") {`
`@@ -252,4 +252,4 @@ class PartitioningSuite extends SparkFunSuite with SharedSparkContext with Priva`
`252`	`252`	`}`
`253`	`253`
`254`	`254`
`255`		`-private sealed case class Row(value: Int)`
	`255`	`+private sealed case class Item(value: Int)`