Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
b34ec0c
make master support multiple executors per worker
CodingCat May 4, 2014
a5d629a
java doc
CodingCat Jan 27, 2015
a26096d
stylistic fix
CodingCat Jan 27, 2015
e5efabb
more java docs and consolidate canUse function
CodingCat Jan 27, 2015
ec7d421
test commit
CodingCat Jan 27, 2015
5b81466
remove outdated comments
CodingCat Jan 27, 2015
19d3da7
address the comments
CodingCat Feb 22, 2015
0b64fea
fix compilation issue
CodingCat Feb 22, 2015
35c462c
address Andrew's comments
CodingCat Feb 22, 2015
387f4ec
bug fix
CodingCat Feb 23, 2015
f64a28d
typo fix
CodingCat Feb 23, 2015
878402c
change the launching executor code
CodingCat Feb 23, 2015
497ec2c
address andrew's comments
CodingCat Mar 27, 2015
2c2bcc5
fix wrong usage info
CodingCat Mar 27, 2015
ff011e2
start multiple executors on the worker by rewriting startExeuctor logic
CodingCat Apr 5, 2015
4cf61f1
improve the code and docs
CodingCat Apr 5, 2015
63b3df9
change the description of the parameter in the submit script
CodingCat Apr 5, 2015
f595bd6
recover some unintentional changes
CodingCat Apr 5, 2015
d9c1685
remove unused var
CodingCat Apr 5, 2015
f035423
stylistic fix
CodingCat Apr 5, 2015
12a1b32
change the semantic of coresPerExecutor to exact core number
CodingCat Apr 9, 2015
2eeff77
stylistic fixes
CodingCat Apr 10, 2015
45967b4
remove unused method
CodingCat Apr 10, 2015
b8ca561
revert a change
CodingCat Apr 10, 2015
940cb42
avoid unnecessary allocation
CodingCat Apr 10, 2015
fbeb7e5
address the comments
CodingCat Apr 14, 2015
6dee808
change filter predicate
CodingCat Apr 14, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,26 +22,27 @@ import java.net.URI
private[spark] class ApplicationDescription(
val name: String,
val maxCores: Option[Int],
val memoryPerSlave: Int,
val memoryPerExecutorMB: Int,
val command: Command,
var appUiUrl: String,
val eventLogDir: Option[URI] = None,
// short name of compression codec used when writing event logs, if any (e.g. lzf)
val eventLogCodec: Option[String] = None)
val eventLogCodec: Option[String] = None,
val coresPerExecutor: Option[Int] = None)
extends Serializable {

val user = System.getProperty("user.name", "<unknown>")

def copy(
name: String = name,
maxCores: Option[Int] = maxCores,
memoryPerSlave: Int = memoryPerSlave,
memoryPerExecutorMB: Int = memoryPerExecutorMB,
command: Command = command,
appUiUrl: String = appUiUrl,
eventLogDir: Option[URI] = eventLogDir,
eventLogCodec: Option[String] = eventLogCodec): ApplicationDescription =
new ApplicationDescription(
name, maxCores, memoryPerSlave, command, appUiUrl, eventLogDir, eventLogCodec)
name, maxCores, memoryPerExecutorMB, command, appUiUrl, eventLogDir, eventLogCodec)

override def toString: String = "ApplicationDescription(" + name + ")"
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ private[deploy] object JsonProtocol {
("name" -> obj.desc.name) ~
("cores" -> obj.desc.maxCores) ~
("user" -> obj.desc.user) ~
("memoryperslave" -> obj.desc.memoryPerSlave) ~
("memoryperslave" -> obj.desc.memoryPerExecutorMB) ~
("submitdate" -> obj.submitDate.toString) ~
("state" -> obj.state.toString) ~
("duration" -> obj.duration)
Expand All @@ -55,7 +55,7 @@ private[deploy] object JsonProtocol {
def writeApplicationDescription(obj: ApplicationDescription): JObject = {
("name" -> obj.name) ~
("cores" -> obj.maxCores) ~
("memoryperslave" -> obj.memoryPerSlave) ~
("memoryperslave" -> obj.memoryPerExecutorMB) ~
("user" -> obj.user) ~
("command" -> obj.command.toString)
}
Expand Down
2 changes: 2 additions & 0 deletions core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,8 @@ object SparkSubmit {
OptionAssigner(args.jars, YARN, CLUSTER, clOption = "--addJars"),

// Other options
OptionAssigner(args.executorCores, STANDALONE, ALL_DEPLOY_MODES,
sysProp = "spark.executor.cores"),
OptionAssigner(args.executorMemory, STANDALONE | MESOS | YARN, ALL_DEPLOY_MODES,
sysProp = "spark.executor.memory"),
OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS, ALL_DEPLOY_MODES,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -482,10 +482,13 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
| Spark standalone and Mesos only:
| --total-executor-cores NUM Total cores for all executors.
|
| Spark standalone and YARN only:
| --executor-cores NUM Number of cores per executor. (Default: 1 in YARN mode,
| or all available cores on the worker in standalone mode)
|
| YARN-only:
| --driver-cores NUM Number of cores used by the driver, only in cluster mode
| (Default: 1).
| --executor-cores NUM Number of cores per executor (Default: 1).
| --queue QUEUE_NAME The YARN queue to submit to (Default: "default").
| --num-executors NUM Number of executors to launch (Default: 2).
| --archives ARCHIVES Comma separated list of archives to be extracted into the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,11 @@ private[deploy] class ApplicationInfo(
}
}

private[master] def addExecutor(worker: WorkerInfo, cores: Int, useID: Option[Int] = None):
ExecutorDesc = {
val exec = new ExecutorDesc(newExecutorId(useID), this, worker, cores, desc.memoryPerSlave)
private[master] def addExecutor(
worker: WorkerInfo,
cores: Int,
useID: Option[Int] = None): ExecutorDesc = {
val exec = new ExecutorDesc(newExecutorId(useID), this, worker, cores, desc.memoryPerExecutorMB)
executors(exec.id) = exec
coresGranted += cores
exec
Expand Down
117 changes: 61 additions & 56 deletions core/src/main/scala/org/apache/spark/deploy/master/Master.scala
Original file line number Diff line number Diff line change
Expand Up @@ -524,52 +524,28 @@ private[master] class Master(
}

/**
* Can an app use the given worker? True if the worker has enough memory and we haven't already
* launched an executor for the app on it (right now the standalone backend doesn't like having
* two executors on the same worker).
*/
private def canUse(app: ApplicationInfo, worker: WorkerInfo): Boolean = {
worker.memoryFree >= app.desc.memoryPerSlave && !worker.hasExecutor(app)
}

/**
* Schedule the currently available resources among waiting apps. This method will be called
* every time a new app joins or resource availability changes.
* Schedule executors to be launched on the workers.
*
* There are two modes of launching executors. The first attempts to spread out an application's
* executors on as many workers as possible, while the second does the opposite (i.e. launch them
* on as few workers as possible). The former is usually better for data locality purposes and is
* the default.
*
* The number of cores assigned to each executor is configurable. When this is explicitly set,
* multiple executors from the same application may be launched on the same worker if the worker
* has enough cores and memory. Otherwise, each executor grabs all the cores available on the
* worker by default, in which case only one executor may be launched on each worker.
*/
private def schedule() {
if (state != RecoveryState.ALIVE) { return }

// First schedule drivers, they take strict precedence over applications
// Randomization helps balance drivers
val shuffledAliveWorkers = Random.shuffle(workers.toSeq.filter(_.state == WorkerState.ALIVE))
val numWorkersAlive = shuffledAliveWorkers.size
var curPos = 0

for (driver <- waitingDrivers.toList) { // iterate over a copy of waitingDrivers
// We assign workers to each waiting driver in a round-robin fashion. For each driver, we
// start from the last worker that was assigned a driver, and continue onwards until we have
// explored all alive workers.
var launched = false
var numWorkersVisited = 0
while (numWorkersVisited < numWorkersAlive && !launched) {
val worker = shuffledAliveWorkers(curPos)
numWorkersVisited += 1
if (worker.memoryFree >= driver.desc.mem && worker.coresFree >= driver.desc.cores) {
launchDriver(worker, driver)
waitingDrivers -= driver
launched = true
}
curPos = (curPos + 1) % numWorkersAlive
}
}

private def startExecutorsOnWorkers(): Unit = {
// Right now this is a very simple FIFO scheduler. We keep trying to fit in the first app
// in the queue, then the second app, etc.
if (spreadOutApps) {
// Try to spread out each app among all the nodes, until it has all its cores
// Try to spread out each app among all the workers, until it has all its cores
for (app <- waitingApps if app.coresLeft > 0) {
val usableWorkers = workers.toArray.filter(_.state == WorkerState.ALIVE)
.filter(canUse(app, _)).sortBy(_.coresFree).reverse
.filter(worker => worker.memoryFree >= app.desc.memoryPerExecutorMB &&
worker.coresFree >= app.desc.coresPerExecutor.getOrElse(1))
.sortBy(_.coresFree).reverse
val numUsable = usableWorkers.length
val assigned = new Array[Int](numUsable) // Number of cores to give on each node
var toAssign = math.min(app.coresLeft, usableWorkers.map(_.coresFree).sum)
Expand All @@ -582,32 +558,61 @@ private[master] class Master(
pos = (pos + 1) % numUsable
}
// Now that we've decided how many cores to give on each node, let's actually give them
for (pos <- 0 until numUsable) {
if (assigned(pos) > 0) {
val exec = app.addExecutor(usableWorkers(pos), assigned(pos))
launchExecutor(usableWorkers(pos), exec)
app.state = ApplicationState.RUNNING
}
for (pos <- 0 until numUsable if assigned(pos) > 0) {
allocateWorkerResourceToExecutors(app, assigned(pos), usableWorkers(pos))
}
}
} else {
// Pack each app into as few nodes as possible until we've assigned all its cores
// Pack each app into as few workers as possible until we've assigned all its cores
for (worker <- workers if worker.coresFree > 0 && worker.state == WorkerState.ALIVE) {
for (app <- waitingApps if app.coresLeft > 0) {
if (canUse(app, worker)) {
val coresToUse = math.min(worker.coresFree, app.coresLeft)
if (coresToUse > 0) {
val exec = app.addExecutor(worker, coresToUse)
launchExecutor(worker, exec)
app.state = ApplicationState.RUNNING
}
}
allocateWorkerResourceToExecutors(app, app.coresLeft, worker)
}
}
}
}

/**
* Allocate a worker's resources to one or more executors.
* @param app the info of the application which the executors belong to
* @param coresToAllocate cores on this worker to be allocated to this application
* @param worker the worker info
*/
private def allocateWorkerResourceToExecutors(
app: ApplicationInfo,
coresToAllocate: Int,
worker: WorkerInfo): Unit = {
val memoryPerExecutor = app.desc.memoryPerExecutorMB
val coresPerExecutor = app.desc.coresPerExecutor.getOrElse(coresToAllocate)
var coresLeft = coresToAllocate
while (coresLeft >= coresPerExecutor && worker.memoryFree >= memoryPerExecutor) {
val exec = app.addExecutor(worker, coresPerExecutor)
coresLeft -= coresPerExecutor
launchExecutor(worker, exec)
app.state = ApplicationState.RUNNING
}
}

/**
* Schedule the currently available resources among waiting apps. This method will be called
* every time a new app joins or resource availability changes.
*/
private def schedule(): Unit = {
if (state != RecoveryState.ALIVE) { return }
// Drivers take strict precedence over executors
val shuffledWorkers = Random.shuffle(workers) // Randomization helps balance drivers
for (worker <- shuffledWorkers if worker.state == WorkerState.ALIVE) {
for (driver <- waitingDrivers) {
if (worker.memoryFree >= driver.desc.mem && worker.coresFree >= driver.desc.cores) {
launchDriver(worker, driver)
waitingDrivers -= driver
}
}
}
startExecutorsOnWorkers()
}

private def launchExecutor(worker: WorkerInfo, exec: ExecutorDesc) {
private def launchExecutor(worker: WorkerInfo, exec: ExecutorDesc): Unit = {
logInfo("Launching executor " + exec.fullId + " on worker " + worker.id)
worker.addExecutor(exec)
worker.actor ! LaunchExecutor(masterUrl,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app")
</li>
<li>
<strong>Executor Memory:</strong>
{Utils.megabytesToString(app.desc.memoryPerSlave)}
{Utils.megabytesToString(app.desc.memoryPerExecutorMB)}
</li>
<li><strong>Submit Date:</strong> {app.submitDate}</li>
<li><strong>State:</strong> {app.state}</li>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,8 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
<td>
{app.coresGranted}
</td>
<td sorttable_customkey={app.desc.memoryPerSlave.toString}>
{Utils.megabytesToString(app.desc.memoryPerSlave)}
<td sorttable_customkey={app.desc.memoryPerExecutorMB.toString}>
{Utils.megabytesToString(app.desc.memoryPerExecutorMB)}
</td>
<td>{UIUtils.formatDate(app.submitDate)}</td>
<td>{app.desc.user}</td>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,11 @@ private[spark] class SparkDeploySchedulerBackend(
val command = Command("org.apache.spark.executor.CoarseGrainedExecutorBackend",
args, sc.executorEnvs, classPathEntries ++ testingClassPath, libraryPathEntries, javaOpts)
val appUIAddress = sc.ui.map(_.appUIAddress).getOrElse("")
val appDesc = new ApplicationDescription(sc.appName, maxCores, sc.executorMemory, command,
appUIAddress, sc.eventLogDir, sc.eventLogCodec)

val coresPerExecutor = conf.getOption("spark.executor.cores").map(_.toInt)
val appDesc = new ApplicationDescription(sc.appName, maxCores, sc.executorMemory,
command, appUIAddress, sc.eventLogDir, sc.eventLogCodec, coresPerExecutor)
client = new AppClient(sc.env.actorSystem, masters, appDesc, this, conf)
client.start()

waitForRegistration()
}

Expand Down
11 changes: 11 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,17 @@ Apart from these, the following properties are also available, and may be useful
this duration will be cleared as well.
</td>
</tr>
<tr>
<td><code>spark.executor.cores</code></td>
<td>1 in YARN mode, all the available cores on the worker in standalone mode.</td>
<td>
The number of cores to use on each executor. For YARN and standalone mode only.

In standalone mode, setting this parameter allows an application to run multiple executors on
the same worker, provided that there are enough cores on that worker. Otherwise, only one
executor per application will run on each worker.
</td>
</tr>
<tr>
<td><code>spark.default.parallelism</code></td>
<td>
Expand Down