Skip to content

Commit 2f8abb4

Browse files
Adam BuddeAndrew Or
authored andcommitted
[SPARK-13122] Fix race condition in MemoryStore.unrollSafely()
https://issues.apache.org/jira/browse/SPARK-13122 A race condition can occur in MemoryStore's unrollSafely() method if two threads that return the same value for currentTaskAttemptId() execute this method concurrently. This change makes the operation of reading the initial amount of unroll memory used, performing the unroll, and updating the associated memory maps atomic in order to avoid this race condition. Initial proposed fix wraps all of unrollSafely() in a memoryManager.synchronized { } block. A cleaner approach might be introduce a mechanism that synchronizes based on task attempt ID. An alternative option might be to track unroll/pending unroll memory based on block ID rather than task attempt ID. Author: Adam Budde <[email protected]> Closes #11012 from budde/master. (cherry picked from commit ff71261) Signed-off-by: Andrew Or <[email protected]> Conflicts: core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
1 parent e81333b commit 2f8abb4

File tree

1 file changed

+9
-5
lines changed

1 file changed

+9
-5
lines changed

core/src/main/scala/org/apache/spark/storage/MemoryStore.scala

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -264,8 +264,8 @@ private[spark] class MemoryStore(blockManager: BlockManager, memoryManager: Memo
264264
var memoryThreshold = initialMemoryThreshold
265265
// Memory to request as a multiple of current vector size
266266
val memoryGrowthFactor = 1.5
267-
// Previous unroll memory held by this task, for releasing later (only at the very end)
268-
val previousMemoryReserved = currentUnrollMemoryForThisTask
267+
// Keep track of pending unroll memory reserved by this method.
268+
var pendingMemoryReserved = 0L
269269
// Underlying vector for unrolling the block
270270
var vector = new SizeTrackingVector[Any]
271271

@@ -275,6 +275,8 @@ private[spark] class MemoryStore(blockManager: BlockManager, memoryManager: Memo
275275
if (!keepUnrolling) {
276276
logWarning(s"Failed to reserve initial memory threshold of " +
277277
s"${Utils.bytesToString(initialMemoryThreshold)} for computing block $blockId in memory.")
278+
} else {
279+
pendingMemoryReserved += initialMemoryThreshold
278280
}
279281

280282
// Unroll this block safely, checking whether we have exceeded our threshold periodically
@@ -288,6 +290,9 @@ private[spark] class MemoryStore(blockManager: BlockManager, memoryManager: Memo
288290
val amountToRequest = (currentSize * memoryGrowthFactor - memoryThreshold).toLong
289291
keepUnrolling = reserveUnrollMemoryForThisTask(
290292
blockId, amountToRequest, droppedBlocks)
293+
if (keepUnrolling) {
294+
pendingMemoryReserved += amountToRequest
295+
}
291296
// New threshold is currentSize * memoryGrowthFactor
292297
memoryThreshold += amountToRequest
293298
}
@@ -314,10 +319,9 @@ private[spark] class MemoryStore(blockManager: BlockManager, memoryManager: Memo
314319
// release the unroll memory yet. Instead, we transfer it to pending unroll memory
315320
// so `tryToPut` can further transfer it to normal storage memory later.
316321
// TODO: we can probably express this without pending unroll memory (SPARK-10907)
317-
val amountToTransferToPending = currentUnrollMemoryForThisTask - previousMemoryReserved
318-
unrollMemoryMap(taskAttemptId) -= amountToTransferToPending
322+
unrollMemoryMap(taskAttemptId) -= pendingMemoryReserved
319323
pendingUnrollMemoryMap(taskAttemptId) =
320-
pendingUnrollMemoryMap.getOrElse(taskAttemptId, 0L) + amountToTransferToPending
324+
pendingUnrollMemoryMap.getOrElse(taskAttemptId, 0L) + pendingMemoryReserved
321325
}
322326
} else {
323327
// Otherwise, if we return an iterator, we can only release the unroll memory when

0 commit comments

Comments
 (0)