Skip to content

Commit a46497e

Browse files
committed
[SPARK-3984] [SPARK-3983] Fix incorrect scheduler delay and display task deserialization time in UI
This commit fixes the scheduler delay in the UI (which previously included things that are not scheduler delay, like time to deserialize the task and serialize the result), and also adds information about time to deserialize tasks to the optional additional metrics. Time to deserialize the task can be large relative to task time for short jobs, and understanding when it is high can help developers realize that they should try to reduce closure size (e.g, by including less data in the task description). cc shivaram etrain Author: Kay Ousterhout <[email protected]> Closes #2832 from kayousterhout/SPARK-3983 and squashes the following commits: 0c1398e [Kay Ousterhout] Fixed ordering 531575d [Kay Ousterhout] Removed executor launch time 1f13afe [Kay Ousterhout] Minor spacing fixes 335be4b [Kay Ousterhout] Made metrics hideable 5bc3cba [Kay Ousterhout] [SPARK-3984] [SPARK-3983] Improve UI task metrics.
1 parent 4c42986 commit a46497e

File tree

4 files changed

+36
-3
lines changed

4 files changed

+36
-3
lines changed

core/src/main/scala/org/apache/spark/executor/Executor.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ private[spark] class Executor(
161161
}
162162

163163
override def run() {
164-
val startTime = System.currentTimeMillis()
164+
val deserializeStartTime = System.currentTimeMillis()
165165
Thread.currentThread.setContextClassLoader(replClassLoader)
166166
val ser = SparkEnv.get.closureSerializer.newInstance()
167167
logInfo(s"Running $taskName (TID $taskId)")
@@ -206,7 +206,7 @@ private[spark] class Executor(
206206
val afterSerialization = System.currentTimeMillis()
207207

208208
for (m <- task.metrics) {
209-
m.executorDeserializeTime = taskStart - startTime
209+
m.executorDeserializeTime = taskStart - deserializeStartTime
210210
m.executorRunTime = taskFinish - taskStart
211211
m.jvmGCTime = gcTime - startGCTime
212212
m.resultSerializationTime = afterSerialization - beforeSerialization

core/src/main/scala/org/apache/spark/ui/ToolTips.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ private[spark] object ToolTips {
2424
scheduler delay is large, consider decreasing the size of tasks or decreasing the size
2525
of task results."""
2626

27+
val TASK_DESERIALIZATION_TIME =
28+
"""Time spent deserializating the task closure on the executor."""
29+
2730
val INPUT = "Bytes read from Hadoop or from Spark storage."
2831

2932
val SHUFFLE_WRITE = "Bytes written to disk in order to be read by a shuffle in a future stage."

core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,13 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
112112
<span class="additional-metric-title">Scheduler Delay</span>
113113
</span>
114114
</li>
115+
<li>
116+
<span data-toggle="tooltip"
117+
title={ToolTips.TASK_DESERIALIZATION_TIME} data-placement="right">
118+
<input type="checkbox" name={TaskDetailsClassNames.TASK_DESERIALIZATION_TIME}/>
119+
<span class="additional-metric-title">Task Deserialization Time</span>
120+
</span>
121+
</li>
115122
<li>
116123
<span data-toggle="tooltip"
117124
title={ToolTips.GC_TIME} data-placement="right">
@@ -147,6 +154,7 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
147154
("Index", ""), ("ID", ""), ("Attempt", ""), ("Status", ""), ("Locality Level", ""),
148155
("Executor ID / Host", ""), ("Launch Time", ""), ("Duration", ""),
149156
("Scheduler Delay", TaskDetailsClassNames.SCHEDULER_DELAY),
157+
("Task Deserialization Time", TaskDetailsClassNames.TASK_DESERIALIZATION_TIME),
150158
("GC Time", TaskDetailsClassNames.GC_TIME),
151159
("Result Serialization Time", TaskDetailsClassNames.RESULT_SERIALIZATION_TIME),
152160
("Getting Result Time", TaskDetailsClassNames.GETTING_RESULT_TIME)) ++
@@ -179,6 +187,17 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
179187
}
180188
}
181189

190+
val deserializationTimes = validTasks.map { case TaskUIData(_, metrics, _) =>
191+
metrics.get.executorDeserializeTime.toDouble
192+
}
193+
val deserializationQuantiles =
194+
<td>
195+
<span data-toggle="tooltip" title={ToolTips.TASK_DESERIALIZATION_TIME}
196+
data-placement="right">
197+
Task Deserialization Time
198+
</span>
199+
</td> +: getFormattedTimeQuantiles(deserializationTimes)
200+
182201
val serviceTimes = validTasks.map { case TaskUIData(_, metrics, _) =>
183202
metrics.get.executorRunTime.toDouble
184203
}
@@ -266,6 +285,9 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
266285
val listings: Seq[Seq[Node]] = Seq(
267286
<tr>{serviceQuantiles}</tr>,
268287
<tr class={TaskDetailsClassNames.SCHEDULER_DELAY}>{schedulerDelayQuantiles}</tr>,
288+
<tr class={TaskDetailsClassNames.TASK_DESERIALIZATION_TIME}>
289+
{deserializationQuantiles}
290+
</tr>
269291
<tr class={TaskDetailsClassNames.GC_TIME}>{gcQuantiles}</tr>,
270292
<tr class={TaskDetailsClassNames.RESULT_SERIALIZATION_TIME}>
271293
{serializationQuantiles}
@@ -314,6 +336,7 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
314336
else metrics.map(m => UIUtils.formatDuration(m.executorRunTime)).getOrElse("")
315337
val schedulerDelay = metrics.map(getSchedulerDelay(info, _)).getOrElse(0L)
316338
val gcTime = metrics.map(_.jvmGCTime).getOrElse(0L)
339+
val taskDeserializationTime = metrics.map(_.executorDeserializeTime).getOrElse(0L)
317340
val serializationTime = metrics.map(_.resultSerializationTime).getOrElse(0L)
318341
val gettingResultTime = info.gettingResultTime
319342

@@ -367,6 +390,10 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
367390
class={TaskDetailsClassNames.SCHEDULER_DELAY}>
368391
{UIUtils.formatDuration(schedulerDelay.toLong)}
369392
</td>
393+
<td sorttable_customkey={taskDeserializationTime.toString}
394+
class={TaskDetailsClassNames.TASK_DESERIALIZATION_TIME}>
395+
{UIUtils.formatDuration(taskDeserializationTime.toLong)}
396+
</td>
370397
<td sorttable_customkey={gcTime.toString} class={TaskDetailsClassNames.GC_TIME}>
371398
{if (gcTime > 0) UIUtils.formatDuration(gcTime) else ""}
372399
</td>
@@ -424,6 +451,8 @@ private[ui] class StagePage(parent: JobProgressTab) extends WebUIPage("stage") {
424451
(info.finishTime - info.launchTime)
425452
}
426453
}
427-
totalExecutionTime - metrics.executorRunTime
454+
val executorOverhead = (metrics.executorDeserializeTime +
455+
metrics.resultSerializationTime)
456+
totalExecutionTime - metrics.executorRunTime - executorOverhead
428457
}
429458
}

core/src/main/scala/org/apache/spark/ui/jobs/TaskDetailsClassNames.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ package org.apache.spark.ui.jobs
2424
private object TaskDetailsClassNames {
2525
val SCHEDULER_DELAY = "scheduler_delay"
2626
val GC_TIME = "gc_time"
27+
val TASK_DESERIALIZATION_TIME = "deserialization_time"
2728
val RESULT_SERIALIZATION_TIME = "serialization_time"
2829
val GETTING_RESULT_TIME = "getting_result_time"
2930
}

0 commit comments

Comments
 (0)