-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-46383] Reduce Driver Heap Usage by Reducing the Lifespan of TaskInfo.accumulables()
#44321
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
664e06d
0b56a28
c6fc226
72e9345
a60eaca
86e47af
6773750
cd80408
aeed0e2
e72830b
8cbe951
03cedb4
65c327f
0bffbd2
1e27935
aca9329
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ | |
| package org.apache.spark.scheduler | ||
|
|
||
| import java.io.{Externalizable, ObjectInput, ObjectOutput} | ||
| import java.util.{Collections, IdentityHashMap} | ||
| import java.util.concurrent.Semaphore | ||
|
|
||
| import scala.collection.mutable | ||
|
|
@@ -289,6 +290,19 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match | |
| stageInfo.rddInfos.forall(_.numPartitions == 4) should be {true} | ||
| } | ||
|
|
||
| test("SPARK-46383: Track TaskInfo objects") { | ||
| // Test that the same TaskInfo object is sent to the `DAGScheduler` in the `onTaskStart` and | ||
| // `onTaskEnd` events. | ||
| val conf = new SparkConf().set(DROP_TASK_INFO_ACCUMULABLES_ON_TASK_COMPLETION, true) | ||
| sc = new SparkContext("local", "SparkListenerSuite", conf) | ||
| val listener = new SaveActiveTaskInfos | ||
| sc.addSparkListener(listener) | ||
| val rdd1 = sc.parallelize(1 to 100, 4) | ||
| sc.runJob(rdd1, (items: Iterator[Int]) => items.size, Seq(0, 1)) | ||
| sc.listenerBus.waitUntilEmpty() | ||
| listener.taskInfos.size should be { 0 } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure I follow this test, what is it trying to do ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test asserts that the same
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't that not simply an implementation detail ? (for ex, the resubmission case would break it) I dont see a harm is keeping it, but want to make sure I am not missing something here.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't mind dropping it. I was just trying to assert one of the ways SparkListeners could be used. The test is more of a general test to ensure that we preserve the behavior of SparkListeners
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Functionally that (the right task info is in the event) should be covered already (in use of
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| } | ||
|
|
||
| test("local metrics") { | ||
| sc = new SparkContext("local", "SparkListenerSuite") | ||
| val listener = new SaveStageAndTaskInfo | ||
|
|
@@ -643,6 +657,27 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * A simple listener that tracks task infos for all active tasks. | ||
| */ | ||
| private class SaveActiveTaskInfos extends SparkListener { | ||
| // Use a set based on IdentityHashMap instead of a HashSet to track unique references of | ||
| // TaskInfo objects. | ||
| val taskInfos = Collections.newSetFromMap[TaskInfo](new IdentityHashMap) | ||
|
|
||
| override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = { | ||
| val info = taskStart.taskInfo | ||
| if (info != null) { | ||
| taskInfos.add(info) | ||
| } | ||
| } | ||
|
|
||
| override def onTaskEnd(task: SparkListenerTaskEnd): Unit = { | ||
| val info = task.taskInfo | ||
| taskInfos.remove(info) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * A simple listener that saves the task indices for all task events. | ||
| */ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Reading this comment, the partition is already completed, probably by another
TaskSetManager, and we just need to reset the task info here?Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this branch is handling a rare corner-case where the same
TaskSetManagercan mark the same task as both succeeded and failed. There is some detailed prior discussion of this in https://issues.apache.org/jira/browse/SPARK-37300