Skip to content

Commit 794a720

Browse files
committed
Incorporating suggestions from @cloud-fan
1 parent d17f6e9 commit 794a720

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,11 @@ object BlockReplicationUtils {
6767
*/
6868
// scalastyle:on line.size.limit
6969
private def getSampleIds(n: Int, m: Int, r: Random): List[Int] = {
70-
val indices = (n - m + 1 to n).foldLeft(Set.empty[Int]) {case (set, i) =>
70+
val indices = (n - m + 1 to n).foldLeft(mutable.LinkedHashSet.empty[Int]) {case (set, i) =>
7171
val t = r.nextInt(i) + 1
7272
if (set.contains(t)) set + i else set + t
7373
}
74-
// we shuffle the result to ensure a random arrangement within the sample
75-
// to avoid any bias from set implementations
76-
r.shuffle(indices.map(_ - 1).toList)
74+
indices.map(_ - 1).toList
7775
}
7876

7977
/**
@@ -140,8 +138,10 @@ class BasicBlockReplicationPolicy
140138

141139
/**
142140
* Method to prioritize a bunch of candidate peers of a block manager. This implementation
143-
* replicates the behavior of block replication in HDFS, a peer is chosen within the rack,
144-
* one outside and that's it. This works best with a total replication factor of 3.
141+
* replicates the behavior of block replication in HDFS. For a given number of replicas needed,
142+
* we choose a peer within the rack, one outside and remaining blockmanagers are chosen at
143+
* random, in that order till we meet the number of replicas needed.
144+
* This works best with a total replication factor of 3, like HDFS.
145145
*
146146
* @param blockManagerId Id of the current BlockManager for self identification
147147
* @param peers A list of peers of a BlockManager
@@ -163,7 +163,7 @@ class BasicBlockReplicationPolicy
163163

164164
val random = new Random(blockId.hashCode)
165165

166-
// if block doesn't have topology info, we can't do much, so we randlomly shuffle
166+
// if block doesn't have topology info, we can't do much, so we randomly shuffle
167167
// if there is, we see what's needed from peersReplicatedTo and based on numReplicas,
168168
// we choose whats needed
169169
if (blockManagerId.topologyInfo.isEmpty || numReplicas == 0) {

0 commit comments

Comments
 (0)