Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,12 @@ private[streaming] class ReceivedBlockTracker(
*/
def allocateBlocksToBatch(batchTime: Time): Unit = synchronized {
if (lastAllocatedBatchTime == null || batchTime > lastAllocatedBatchTime) {
// We explicitly create an ArrayBuffer here because at least as of Scala 2.11 and 2.12
// a mutable.Queue fails serialization with a StackOverflow error if it has more than
// a few thousand elements. So we explicitly allocate a collection for serialization which
// we know doesn't have this issue. (See SPARK-26734).
val streamIdToBlocks = streamIds.map { streamId =>
(streamId, getReceivedBlockQueue(streamId).clone())
(streamId, mutable.ArrayBuffer(getReceivedBlockQueue(streamId).clone(): _*))
}.toMap
val allocatedBlocks = AllocatedBlocks(streamIdToBlocks)
if (writeToLog(BatchAllocationEvent(batchTime, allocatedBlocks))) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,27 @@ class ReceivedBlockTrackerSuite
receivedBlockTracker.getUnallocatedBlocks(streamId) shouldEqual blockInfos
}

test("block addition, and block to batch allocation with many blocks") {
val receivedBlockTracker = createTracker()
receivedBlockTracker.isWriteAheadLogEnabled should be (true)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good to close the tracker. I know not all tests does this (which is a bug) but it would be good to make it clean here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, done.


val blockInfos = generateBlockInfos(100000)
blockInfos.map(receivedBlockTracker.addBlock)
receivedBlockTracker.allocateBlocksToBatch(1)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would add assertions here as well. If it's not throwing exception doesn't mean the same blocks deserialized.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure


receivedBlockTracker.getUnallocatedBlocks(streamId) shouldEqual Seq.empty
receivedBlockTracker.hasUnallocatedReceivedBlocks should be (false)
receivedBlockTracker.getBlocksOfBatch(1) shouldEqual Map(streamId -> blockInfos)
receivedBlockTracker.getBlocksOfBatchAndStream(1, streamId) shouldEqual blockInfos

val expectedWrittenData1 = blockInfos.map(BlockAdditionEvent) :+
BatchAllocationEvent(1, AllocatedBlocks(Map(streamId -> blockInfos)))
getWrittenLogData() shouldEqual expectedWrittenData1
getWriteAheadLogFiles() should have size 1

receivedBlockTracker.stop()
}

test("recovery with write ahead logs should remove only allocated blocks from received queue") {
val manualClock = new ManualClock
val batchTime = manualClock.getTimeMillis()
Expand Down Expand Up @@ -362,8 +383,8 @@ class ReceivedBlockTrackerSuite
}

/** Generate blocks infos using random ids */
def generateBlockInfos(): Seq[ReceivedBlockInfo] = {
List.fill(5)(ReceivedBlockInfo(streamId, Some(0L), None,
def generateBlockInfos(blockCount: Int = 5): Seq[ReceivedBlockInfo] = {
List.fill(blockCount)(ReceivedBlockInfo(streamId, Some(0L), None,
BlockManagerBasedStoreResult(StreamBlockId(streamId, math.abs(Random.nextInt)), Some(0L))))
}

Expand Down