-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-20628][CORE][K8S] Start to improve Spark decommissioning & preemption support #26440
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
82c3c9a
c935a6e
16b3880
8f7ed26
55b6f9e
bfa06ce
bdda06d
a63b68f
9476e22
317c76b
86c0ff6
bdd7df3
545e4d9
88c89b1
0eb089f
8bda4c7
dcebf8c
f312348
7c02a63
41dc954
f0aafa8
5f65e27
271984d
2141df0
d4dcb82
eea0da0
940f4c3
2c7c1ba
c72f676
47e9107
95f24ab
e29cce9
cf3a6d6
1b9f83d
556191b
d07770d
5dee0dd
0c616fc
28566fa
9377eb0
4a55f3d
0c7182a
018e3a8
da7faf2
5bdfbfa
46d26c8
15179e5
ed45999
02d1668
0a7d084
c8161c3
bcd6d4f
ca27da0
af55030
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -43,7 +43,7 @@ import org.apache.spark.rpc._ | |
| import org.apache.spark.scheduler.{ExecutorLossReason, TaskDescription} | ||
| import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._ | ||
| import org.apache.spark.serializer.SerializerInstance | ||
| import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader, ThreadUtils, Utils} | ||
| import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader, SignalUtils, ThreadUtils, Utils} | ||
|
|
||
| private[spark] class CoarseGrainedExecutorBackend( | ||
| override val rpcEnv: RpcEnv, | ||
|
|
@@ -64,6 +64,7 @@ private[spark] class CoarseGrainedExecutorBackend( | |
|
|
||
| private[this] val stopping = new AtomicBoolean(false) | ||
| var executor: Executor = null | ||
| @volatile private var decommissioned = false | ||
| @volatile var driver: Option[RpcEndpointRef] = None | ||
|
|
||
| // If this CoarseGrainedExecutorBackend is changed to support multiple threads, then this may need | ||
|
|
@@ -80,6 +81,9 @@ private[spark] class CoarseGrainedExecutorBackend( | |
| private[executor] val taskResources = new mutable.HashMap[Long, Map[String, ResourceInformation]] | ||
|
|
||
| override def onStart(): Unit = { | ||
| logInfo("Registering PWR handler.") | ||
| SignalUtils.register("PWR")(decommissionSelf) | ||
holdenk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| logInfo("Connecting to driver: " + driverUrl) | ||
| try { | ||
| _resources = parseOrFindResources(resourcesFileOpt) | ||
|
|
@@ -160,6 +164,16 @@ private[spark] class CoarseGrainedExecutorBackend( | |
| if (executor == null) { | ||
| exitExecutor(1, "Received LaunchTask command but executor was null") | ||
| } else { | ||
| if (decommissioned) { | ||
| logError("Asked to launch a task while decommissioned.") | ||
| driver match { | ||
| case Some(endpoint) => | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that instead of doing this here, it should be done in (That also means this block can go away and you can just keep the log message in
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So what about when we are scaling down after the driver reference is created? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. https://docs.google.com/document/d/1xVO1b6KAwdUhjEJBolVPl9C6sLj7oOveErwDSYdT-pE/edit?disco=AAAAI73a0FM For this there can be DecommissionTracker ( in the same lines as BlacklistTracker). The DT is filled when the driver is informed of the decommissioning is informed on the host. As comment on the design I have tried to elaborate the flow of populating DT.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So @itskals on Kubernetes and in standalone the driver won't know first (and those the only parts implemented in this PR). Certainly, when we go to implement YARN support we could try and short circuit talking to the executor -- however to enable later things like migrations I'd still want to send the message. This could make a difference when we implement YARN support and I've added a note in the design doc so that we don't skip it then. |
||
| logInfo("Sending DecommissionExecutor to driver.") | ||
| endpoint.send(DecommissionExecutor(executorId)) | ||
| case _ => | ||
| logError("No registered driver to send Decommission to.") | ||
| } | ||
| } | ||
| val taskDesc = TaskDescription.decode(data.value) | ||
| logInfo("Got assigned task " + taskDesc.taskId) | ||
| taskResources(taskDesc.taskId) = taskDesc.resources | ||
|
|
@@ -242,6 +256,29 @@ private[spark] class CoarseGrainedExecutorBackend( | |
|
|
||
| System.exit(code) | ||
| } | ||
|
|
||
| private def decommissionSelf(): Boolean = { | ||
| logInfo("Decommissioning self w/sync") | ||
| try { | ||
| decommissioned = true | ||
| // Tell master we are are decommissioned so it stops trying to schedule us | ||
| if (driver.nonEmpty) { | ||
| driver.get.askSync[Boolean](DecommissionExecutor(executorId)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of decommission Executor, Can we have Entire node decommission
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as previous comment, in standalone only sure, but in YARN/K8s we could see individual executors decommission. |
||
| } else { | ||
| logError("No driver to message decommissioning.") | ||
| } | ||
| if (executor != null) { | ||
| executor.decommission() | ||
| } | ||
| logInfo("Done decommissioning self.") | ||
| // Return true since we are handling a signal | ||
| true | ||
| } catch { | ||
| case e: Exception => | ||
| logError(s"Error ${e} during attempt to decommission self") | ||
| false | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private[spark] object CoarseGrainedExecutorBackend extends Logging { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.