Skip to content

Commit 59f8887

Browse files
committed
Schedule all receivers at the same time when launching them
1 parent 075e0a3 commit 59f8887

File tree

3 files changed

+203
-38
lines changed

3 files changed

+203
-38
lines changed

streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverSchedulingPolicy.scala

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ package org.apache.spark.streaming.scheduler
2020
import scala.collection.Map
2121
import scala.collection.mutable
2222

23+
import org.apache.spark.streaming.receiver.Receiver
24+
2325
/**
2426
* A ReceiverScheduler trying to balance executors' load. Here is the approach to schedule executors
2527
* for a receiver.
@@ -46,11 +48,80 @@ import scala.collection.mutable
4648
*/
4749
private[streaming] class ReceiverSchedulingPolicy {
4850

51+
/**
52+
* Try our best to schedule receivers with evenly distributed. However, if the
53+
* `preferredLocation`s of receivers are not even, we may not be able to schedule them evenly
54+
* because we have to respect them.
55+
*
56+
* This method is called when we start to launch receivers at the first time.
57+
*/
58+
def scheduleReceivers(
59+
receivers: Seq[Receiver[_]], executors: Seq[String]): Map[Int, Seq[String]] = {
60+
if (receivers.isEmpty) {
61+
return Map.empty
62+
}
63+
64+
require(executors.nonEmpty, "There is no executor up")
65+
66+
val hostToExecutors = executors.groupBy(_.split(":")(0))
67+
val locations = new Array[mutable.ArrayBuffer[String]](receivers.length)
68+
val numReceiversOnExecutor = mutable.HashMap[String, Int]()
69+
// Set the initial value to 0
70+
executors.foreach(numReceiversOnExecutor(_) = 0)
71+
72+
// Firstly, we need to respect "preferredLocation". So if a receiver has "preferredLocation",
73+
// we need to make sure the "preferredLocation" is in the candidate location list.
74+
for (i <- 0 until receivers.length) {
75+
locations(i) = new mutable.ArrayBuffer[String]()
76+
// Note: preferredLocation is host but executors are host:port
77+
receivers(i).preferredLocation.foreach { host =>
78+
hostToExecutors.get(host) match {
79+
case Some(executorsOnHost) =>
80+
// preferredLocation is a known host. Select an executor that has the least receivers in
81+
// this host
82+
val scheduledLocation =
83+
executorsOnHost.minBy(executor => numReceiversOnExecutor(executor))
84+
locations(i) += scheduledLocation
85+
numReceiversOnExecutor(scheduledLocation) =
86+
numReceiversOnExecutor(scheduledLocation) + 1
87+
case None =>
88+
// preferredLocation is an unknown host.
89+
// Note: There are two cases:
90+
// 1. This executor is not up. But it may be up later.
91+
// 2. This executor is dead, or it's not a host in the cluster.
92+
// Currently, simply add host to the scheduled locations
93+
locations(i) += host
94+
}
95+
}
96+
}
97+
98+
// For those receivers that don't have preferredLocation, make sure we assign at least one
99+
// executor to them.
100+
for (scheduledLocations <- locations.filter(_.isEmpty)) {
101+
// Select the executor that has the least receivers
102+
val (executor, numReceivers) = numReceiversOnExecutor.minBy(_._2)
103+
scheduledLocations += executor
104+
numReceiversOnExecutor(executor) = numReceivers + 1
105+
}
106+
107+
// Assign idle executors to receivers that have less executors
108+
val idleExecutors = numReceiversOnExecutor.filter(_._2 == 0).map(_._1)
109+
for (executor <- idleExecutors) {
110+
// Assign an idle executor to the receiver that has least locations.
111+
val scheduledLocations = locations.minBy(_.size)
112+
scheduledLocations += executor
113+
}
114+
115+
receivers.map(_.streamId).zip(locations).toMap
116+
}
117+
49118
/**
50119
* Return a list of candidate executors to run the receiver. If the list is empty, the caller can
51120
* run this receiver in arbitrary executor.
121+
*
122+
* This method is called when a receiver is registering with ReceiverTracker or is restarting.
52123
*/
53-
def scheduleReceiver(
124+
def rescheduleReceiver(
54125
receiverId: Int,
55126
preferredLocation: Option[String],
56127
receiverTrackingInfoMap: Map[Int, ReceiverTrackingInfo],

streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala

Lines changed: 49 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,16 @@ private[streaming] case class DeregisterReceiver(streamId: Int, msg: String, err
6262
private[streaming] sealed trait ReceiverTrackerLocalMessage
6363

6464
/**
65-
* This message will trigger ReceiverTrackerEndpoint to start a Spark job for the receiver.
65+
* This message will trigger ReceiverTrackerEndpoint to restart a Spark job for the receiver.
6666
*/
67-
private[streaming] case class StartReceiver(receiver: Receiver[_])
67+
private[streaming] case class RestartReceiver(receiver: Receiver[_])
68+
extends ReceiverTrackerLocalMessage
69+
70+
/**
71+
* This message is sent to ReceiverTrackerEndpoint when we start to launch Spark jobs for receivers
72+
* at the first time.
73+
*/
74+
private[streaming] case class StartAllReceivers(receiver: Seq[Receiver[_]])
6875
extends ReceiverTrackerLocalMessage
6976

7077
/**
@@ -307,8 +314,8 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
307314

308315
private def scheduleReceiver(receiverId: Int): Seq[String] = {
309316
val preferredLocation = receiverPreferredLocations.getOrElse(receiverId, None)
310-
val scheduledLocations = schedulingPolicy.scheduleReceiver(
311-
receiverId, preferredLocation, receiverTrackingInfos, getExecutors(ssc))
317+
val scheduledLocations = schedulingPolicy.rescheduleReceiver(
318+
receiverId, preferredLocation, receiverTrackingInfos, getExecutors)
312319
updateReceiverScheduledLocations(receiverId, scheduledLocations)
313320
scheduledLocations
314321
}
@@ -337,10 +344,14 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
337344
/**
338345
* Get the list of executors excluding driver
339346
*/
340-
private def getExecutors(ssc: StreamingContext): List[String] = {
341-
val executors = ssc.sparkContext.getExecutorMemoryStatus.map(_._1.split(":")(0)).toList
342-
val driver = ssc.sparkContext.getConf.get("spark.driver.host")
343-
executors.diff(List(driver))
347+
private def getExecutors: List[String] = {
348+
if (ssc.sc.isLocal) {
349+
List("localhost")
350+
} else {
351+
val executors = ssc.sparkContext.getExecutorMemoryStatus.map(_._1.split(":")(0)).toList
352+
val driver = ssc.sparkContext.getConf.get("spark.driver.host")
353+
executors.diff(List(driver))
354+
}
344355
}
345356

346357
/**
@@ -355,6 +366,7 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
355366
if (!ssc.sparkContext.isLocal) {
356367
ssc.sparkContext.makeRDD(1 to 50, 50).map(x => (x, 1)).reduceByKey(_ + _, 20).collect()
357368
}
369+
assert(getExecutors.nonEmpty)
358370
}
359371

360372
/**
@@ -370,12 +382,8 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
370382

371383
runDummySparkJob()
372384

373-
// Distribute the receivers and start them
374385
logInfo("Starting " + receivers.length + " receivers")
375-
376-
for (receiver <- receivers) {
377-
endpoint.send(StartReceiver(receiver))
378-
}
386+
endpoint.send(StartAllReceivers)
379387
}
380388

381389
/** Check if tracker has been marked for starting */
@@ -396,8 +404,22 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
396404

397405
override def receive: PartialFunction[Any, Unit] = {
398406
// Local messages
399-
case StartReceiver(receiver) =>
400-
startReceiver(receiver)
407+
case StartAllReceivers(receivers) =>
408+
val scheduledLocations = schedulingPolicy.scheduleReceivers(receivers, getExecutors)
409+
for (receiver <- receivers) {
410+
val locations = scheduledLocations(receiver.streamId)
411+
updateReceiverScheduledLocations(receiver.streamId, locations)
412+
receiverPreferredLocations(receiver.streamId) = receiver.preferredLocation
413+
startReceiver(receiver, locations)
414+
}
415+
case RestartReceiver(receiver) =>
416+
val scheduledLocations = schedulingPolicy.rescheduleReceiver(
417+
receiver.streamId,
418+
receiver.preferredLocation,
419+
receiverTrackingInfos,
420+
getExecutors)
421+
updateReceiverScheduledLocations(receiver.streamId, scheduledLocations)
422+
startReceiver(receiver, scheduledLocations)
401423
case c @ CleanupOldBlocks(cleanupThreshTime) =>
402424
receiverTrackingInfos.values.flatMap(_.endpoint).foreach(_.send(c))
403425
// Remote messages
@@ -425,28 +447,22 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
425447
context.reply(true)
426448
}
427449

428-
private def startReceiver(receiver: Receiver[_]): Unit = {
429-
val checkpointDirOption = Option(ssc.checkpointDir)
430-
val serializableHadoopConf =
431-
new SerializableConfiguration(ssc.sparkContext.hadoopConfiguration)
432-
433-
// Function to start the receiver on the worker node
434-
val startReceiverFunc = new StartReceiverFunc(checkpointDirOption, serializableHadoopConf)
435-
436-
receiverPreferredLocations(receiver.streamId) = receiver.preferredLocation
450+
/**
451+
* Start a receiver along with its scheduled locations
452+
*/
453+
private def startReceiver(receiver: Receiver[_], scheduledLocations: Seq[String]): Unit = {
437454
val receiverId = receiver.streamId
438-
439455
if (!isTrackerStarted) {
440456
onReceiverJobFinish(receiverId)
441457
return
442458
}
443459

444-
val scheduledLocations = schedulingPolicy.scheduleReceiver(
445-
receiverId,
446-
receiver.preferredLocation,
447-
receiverTrackingInfos,
448-
getExecutors(ssc))
449-
updateReceiverScheduledLocations(receiver.streamId, scheduledLocations)
460+
val checkpointDirOption = Option(ssc.checkpointDir)
461+
val serializableHadoopConf =
462+
new SerializableConfiguration(ssc.sparkContext.hadoopConfiguration)
463+
464+
// Function to start the receiver on the worker node
465+
val startReceiverFunc = new StartReceiverFunc(checkpointDirOption, serializableHadoopConf)
450466

451467
// Create the RDD using the scheduledLocations to run the receiver in a Spark job
452468
val receiverRDD: RDD[Receiver[_]] =
@@ -465,15 +481,15 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
465481
onReceiverJobFinish(receiverId)
466482
} else {
467483
logInfo(s"Restarting Receiver $receiverId")
468-
self.send(StartReceiver(receiver))
484+
self.send(RestartReceiver(receiver))
469485
}
470486
case Failure(e) =>
471487
if (!isTrackerStarted) {
472488
onReceiverJobFinish(receiverId)
473489
} else {
474490
logError("Receiver has been stopped. Try to restart it.", e)
475491
logInfo(s"Restarting Receiver $receiverId")
476-
self.send(StartReceiver(receiver))
492+
self.send(RestartReceiver(receiver))
477493
}
478494
}(submitJobThreadPool)
479495
logInfo(s"Receiver ${receiver.streamId} started")

streaming/src/test/scala/org/apache/spark/streaming/scheduler/ReceiverSchedulingPolicySuite.scala

Lines changed: 82 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,26 @@
1717

1818
package org.apache.spark.streaming.scheduler
1919

20+
import scala.collection.mutable
21+
2022
import org.apache.spark.SparkFunSuite
23+
import org.apache.spark.storage.StorageLevel
24+
import org.apache.spark.streaming.receiver.Receiver
2125

2226
class ReceiverSchedulingPolicySuite extends SparkFunSuite {
2327

2428
val receiverSchedulingPolicy = new ReceiverSchedulingPolicy
2529

2630
test("empty executors") {
2731
val scheduledLocations =
28-
receiverSchedulingPolicy.scheduleReceiver(0, None, Map.empty, executors = Seq.empty)
32+
receiverSchedulingPolicy.rescheduleReceiver(0, None, Map.empty, executors = Seq.empty)
2933
assert(scheduledLocations === Seq.empty)
3034
}
3135

3236
test("receiver preferredLocation") {
3337
val receiverTrackingInfoMap = Map(
3438
0 -> ReceiverTrackingInfo(0, ReceiverState.INACTIVE, None, None))
35-
val scheduledLocations = receiverSchedulingPolicy.scheduleReceiver(
39+
val scheduledLocations = receiverSchedulingPolicy.rescheduleReceiver(
3640
0, Some("host1"), receiverTrackingInfoMap, executors = Seq("host2"))
3741
assert(scheduledLocations.toSet === Set("host1", "host2"))
3842
}
@@ -42,7 +46,7 @@ class ReceiverSchedulingPolicySuite extends SparkFunSuite {
4246
// host3 is idle
4347
val receiverTrackingInfoMap = Map(
4448
0 -> ReceiverTrackingInfo(0, ReceiverState.ACTIVE, None, Some("host1")))
45-
val scheduledLocations = receiverSchedulingPolicy.scheduleReceiver(
49+
val scheduledLocations = receiverSchedulingPolicy.rescheduleReceiver(
4650
1, None, receiverTrackingInfoMap, executors)
4751
assert(scheduledLocations.toSet === Set("host2", "host3", "host4", "host5"))
4852
}
@@ -55,8 +59,82 @@ class ReceiverSchedulingPolicySuite extends SparkFunSuite {
5559
0 -> ReceiverTrackingInfo(0, ReceiverState.ACTIVE, None, Some("host1")),
5660
1 -> ReceiverTrackingInfo(1, ReceiverState.SCHEDULED, Some(Seq("host2", "host3")), None),
5761
2 -> ReceiverTrackingInfo(1, ReceiverState.SCHEDULED, Some(Seq("host1", "host3")), None))
58-
val scheduledLocations = receiverSchedulingPolicy.scheduleReceiver(
62+
val scheduledLocations = receiverSchedulingPolicy.rescheduleReceiver(
5963
3, None, receiverTrackingInfoMap, executors)
6064
assert(scheduledLocations.toSet === Set("host2", "host4", "host5"))
6165
}
66+
67+
test("scheduleReceivers should schedule receivers evenly " +
68+
"when there are more receivers than executors") {
69+
val receivers = (0 until 6).map(new DummyReceiver(_))
70+
val executors = (10000 until 10003).map(port => s"localhost:${port}")
71+
val scheduledLocations = receiverSchedulingPolicy.scheduleReceivers(receivers, executors)
72+
val numReceiversOnExecutor = mutable.HashMap[String, Int]()
73+
// There should be 2 receivers running on each executor and each receiver has one location
74+
scheduledLocations.foreach { case (receiverId, locations) =>
75+
assert(locations.size == 1)
76+
numReceiversOnExecutor(locations(0)) = numReceiversOnExecutor.getOrElse(locations(0), 0) + 1
77+
}
78+
assert(numReceiversOnExecutor === executors.map(_ -> 2).toMap)
79+
}
80+
81+
82+
test("scheduleReceivers should schedule receivers evenly " +
83+
"when there are more executors than receivers") {
84+
val receivers = (0 until 3).map(new DummyReceiver(_))
85+
val executors = (10000 until 10006).map(port => s"localhost:${port}")
86+
val scheduledLocations = receiverSchedulingPolicy.scheduleReceivers(receivers, executors)
87+
val numReceiversOnExecutor = mutable.HashMap[String, Int]()
88+
// There should be 1 receiver running on each executor and each receiver has two locations
89+
scheduledLocations.foreach { case (receiverId, locations) =>
90+
assert(locations.size == 2)
91+
locations.foreach { l =>
92+
numReceiversOnExecutor(l) = numReceiversOnExecutor.getOrElse(l, 0) + 1
93+
}
94+
}
95+
assert(numReceiversOnExecutor === executors.map(_ -> 1).toMap)
96+
}
97+
98+
test("scheduleReceivers should schedule receivers evenly " +
99+
"when the preferredLocations are even") {
100+
val receivers = (0 until 3).map(new DummyReceiver(_)) ++
101+
(3 until 6).map(new DummyReceiver(_, Some("localhost")))
102+
val executors = (10000 until 10003).map(port => s"localhost:${port}") ++
103+
(10003 until 10006).map(port => s"localhost2:${port}")
104+
val scheduledLocations = receiverSchedulingPolicy.scheduleReceivers(receivers, executors)
105+
val numReceiversOnExecutor = mutable.HashMap[String, Int]()
106+
// There should be 1 receiver running on each executor and each receiver has 1 location
107+
scheduledLocations.foreach { case (receiverId, locations) =>
108+
assert(locations.size == 1)
109+
locations.foreach { l =>
110+
numReceiversOnExecutor(l) = numReceiversOnExecutor.getOrElse(l, 0) + 1
111+
}
112+
}
113+
assert(numReceiversOnExecutor === executors.map(_ -> 1).toMap)
114+
// Make sure we schedule the receivers to their preferredLocations
115+
val locationsForReceiversWithPreferredLocation =
116+
scheduledLocations.filter { case (receiverId, locations) => receiverId >= 3 }.flatMap(_._2)
117+
// We can simply check the location set because we only know each receiver only has 1 location
118+
assert(locationsForReceiversWithPreferredLocation.toSet ===
119+
(10000 until 10003).map(port => s"localhost:${port}").toSet)
120+
}
121+
122+
test("scheduleReceivers should return empty if no receiver") {
123+
assert(receiverSchedulingPolicy.scheduleReceivers(Seq.empty, Seq("localhost:10000")).isEmpty)
124+
}
125+
}
126+
127+
/**
128+
* Dummy receiver implementation
129+
*/
130+
private class DummyReceiver(receiverId: Int, host: Option[String] = None)
131+
extends Receiver[Int](StorageLevel.MEMORY_ONLY) {
132+
133+
setReceiverId(receiverId)
134+
135+
override def onStart(): Unit = {}
136+
137+
override def onStop(): Unit = {}
138+
139+
override def preferredLocation: Option[String] = host
62140
}

0 commit comments

Comments
 (0)