@@ -67,13 +67,11 @@ object BlockReplicationUtils {
6767 */
6868 // scalastyle:on line.size.limit
6969 private def getSampleIds (n : Int , m : Int , r : Random ): List [Int ] = {
70- val indices = (n - m + 1 to n).foldLeft(Set .empty[Int ]) {case (set, i) =>
70+ val indices = (n - m + 1 to n).foldLeft(mutable. LinkedHashSet .empty[Int ]) {case (set, i) =>
7171 val t = r.nextInt(i) + 1
7272 if (set.contains(t)) set + i else set + t
7373 }
74- // we shuffle the result to ensure a random arrangement within the sample
75- // to avoid any bias from set implementations
76- r.shuffle(indices.map(_ - 1 ).toList)
74+ indices.map(_ - 1 ).toList
7775 }
7876
7977 /**
@@ -140,8 +138,10 @@ class BasicBlockReplicationPolicy
140138
141139 /**
142140 * Method to prioritize a bunch of candidate peers of a block manager. This implementation
143- * replicates the behavior of block replication in HDFS, a peer is chosen within the rack,
144- * one outside and that's it. This works best with a total replication factor of 3.
141+ * replicates the behavior of block replication in HDFS. For a given number of replicas needed,
142+ * we choose a peer within the rack, one outside and remaining blockmanagers are chosen at
143+ * random, in that order till we meet the number of replicas needed.
144+ * This works best with a total replication factor of 3, like HDFS.
145145 *
146146 * @param blockManagerId Id of the current BlockManager for self identification
147147 * @param peers A list of peers of a BlockManager
@@ -163,7 +163,7 @@ class BasicBlockReplicationPolicy
163163
164164 val random = new Random (blockId.hashCode)
165165
166- // if block doesn't have topology info, we can't do much, so we randlomly shuffle
166+ // if block doesn't have topology info, we can't do much, so we randomly shuffle
167167 // if there is, we see what's needed from peersReplicatedTo and based on numReplicas,
168168 // we choose whats needed
169169 if (blockManagerId.topologyInfo.isEmpty || numReplicas == 0 ) {
0 commit comments