Skip to content

Commit 5bafdc4

Browse files
committed
[SPARK-18991][CORE] Change ContextCleaner.referenceBuffer to use ConcurrentHashMap to make it faster
## What changes were proposed in this pull request? The time complexity of ConcurrentHashMap's `remove` is O(1). Changing ContextCleaner.referenceBuffer's type from `ConcurrentLinkedQueue` to `ConcurrentHashMap's` will make the removal much faster. ## How was this patch tested? Jenkins Author: Shixiong Zhu <[email protected]> Closes #16390 from zsxwing/SPARK-18991. (cherry picked from commit a848f0b) Signed-off-by: Shixiong Zhu <[email protected]>
1 parent 1857acc commit 5bafdc4

File tree

1 file changed

+12
-6
lines changed

1 file changed

+12
-6
lines changed

core/src/main/scala/org/apache/spark/ContextCleaner.scala

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
package org.apache.spark
1919

2020
import java.lang.ref.{ReferenceQueue, WeakReference}
21-
import java.util.concurrent.{ConcurrentLinkedQueue, ScheduledExecutorService, TimeUnit}
21+
import java.util.Collections
22+
import java.util.concurrent.{ConcurrentHashMap, ConcurrentLinkedQueue, ScheduledExecutorService, TimeUnit}
2223

2324
import scala.collection.JavaConverters._
2425

@@ -58,7 +59,12 @@ private class CleanupTaskWeakReference(
5859
*/
5960
private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
6061

61-
private val referenceBuffer = new ConcurrentLinkedQueue[CleanupTaskWeakReference]()
62+
/**
63+
* A buffer to ensure that `CleanupTaskWeakReference`s are not garbage collected as long as they
64+
* have not been handled by the reference queue.
65+
*/
66+
private val referenceBuffer =
67+
Collections.newSetFromMap[CleanupTaskWeakReference](new ConcurrentHashMap)
6268

6369
private val referenceQueue = new ReferenceQueue[AnyRef]
6470

@@ -176,10 +182,10 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
176182
.map(_.asInstanceOf[CleanupTaskWeakReference])
177183
// Synchronize here to avoid being interrupted on stop()
178184
synchronized {
179-
reference.map(_.task).foreach { task =>
180-
logDebug("Got cleaning task " + task)
181-
referenceBuffer.remove(reference.get)
182-
task match {
185+
reference.foreach { ref =>
186+
logDebug("Got cleaning task " + ref.task)
187+
referenceBuffer.remove(ref)
188+
ref.task match {
183189
case CleanRDD(rddId) =>
184190
doCleanupRDD(rddId, blocking = blockOnCleanupTasks)
185191
case CleanShuffle(shuffleId) =>

0 commit comments

Comments
 (0)