Skip to content

Commit bf37815

Browse files
committed
HDFS-17052. Erasure coding reconstruction failed when num of storageType rack NOT enough
1 parent ba08f26 commit bf37815

File tree

2 files changed

+66
-5
lines changed

2 files changed

+66
-5
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyRackFaultTolerant.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -166,11 +166,10 @@ private void chooseEvenlyFromRemainingRacks(Node writer,
166166
List<DatanodeStorageInfo> results, boolean avoidStaleNodes,
167167
EnumMap<StorageType, Integer> storageTypes, int totalReplicaExpected,
168168
NotEnoughReplicasException e) throws NotEnoughReplicasException {
169-
int numResultsOflastChoose = 0;
170169
NotEnoughReplicasException lastException = e;
171170
int bestEffortMaxNodesPerRack = maxNodesPerRack;
172171
while (results.size() != totalReplicaExpected &&
173-
numResultsOflastChoose != results.size()) {
172+
bestEffortMaxNodesPerRack <= totalReplicaExpected) {
174173
// Exclude the chosen nodes
175174
final Set<Node> newExcludeNodes = new HashSet<>();
176175
for (DatanodeStorageInfo resultStorage : results) {
@@ -182,7 +181,6 @@ private void chooseEvenlyFromRemainingRacks(Node writer,
182181
LOG.trace("Excluded nodes: {}", excludedNodes);
183182
LOG.trace("New Excluded nodes: {}", newExcludeNodes);
184183
final int numOfReplicas = totalReplicaExpected - results.size();
185-
numResultsOflastChoose = results.size();
186184
try {
187185
chooseOnce(numOfReplicas, writer, newExcludeNodes, blocksize,
188186
++bestEffortMaxNodesPerRack, results, avoidStaleNodes,
@@ -194,9 +192,9 @@ private void chooseEvenlyFromRemainingRacks(Node writer,
194192
}
195193
}
196194

197-
if (numResultsOflastChoose != totalReplicaExpected) {
195+
if (results.size() != totalReplicaExpected) {
198196
LOG.debug("Best effort placement failed: expecting {} replicas, only "
199-
+ "chose {}.", totalReplicaExpected, numResultsOflastChoose);
197+
+ "chose {}.", totalReplicaExpected, results.size());
200198
throw lastException;
201199
}
202200
}

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReconstructStripedBlocks.java

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
package org.apache.hadoop.hdfs.server.namenode;
1919

2020
import org.apache.hadoop.conf.Configuration;
21+
import org.apache.hadoop.fs.FileSystem;
2122
import org.apache.hadoop.fs.Path;
23+
import org.apache.hadoop.fs.StorageType;
2224
import org.apache.hadoop.hdfs.DFSConfigKeys;
2325
import org.apache.hadoop.hdfs.DFSTestUtil;
2426
import org.apache.hadoop.hdfs.DistributedFileSystem;
@@ -52,6 +54,8 @@
5254
import org.slf4j.Logger;
5355
import org.slf4j.LoggerFactory;
5456

57+
import java.io.IOException;
58+
import java.util.Arrays;
5559
import java.util.BitSet;
5660
import java.util.Iterator;
5761
import java.util.List;
@@ -515,4 +519,63 @@ public void testReconstrutionWithBusyBlock1() throws Exception {
515519
assertEquals(9, bm.countNodes(blockInfo).liveReplicas());
516520
}
517521

522+
@Test
523+
public void testReconstructionWithStorageTypeNotEnough() throws Exception {
524+
final HdfsConfiguration conf = new HdfsConfiguration();
525+
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1);
526+
527+
// nine disk node eleven archive node
528+
int numDn = groupSize * 2 + 2;
529+
StorageType[][] storageTypes = new StorageType[numDn][];
530+
Arrays.fill(storageTypes, 0, groupSize,
531+
new StorageType[]{StorageType.DISK, StorageType.DISK});
532+
Arrays.fill(storageTypes, groupSize, numDn,
533+
new StorageType[]{StorageType.ARCHIVE, StorageType.ARCHIVE});
534+
535+
// nine Disk racks and one Archive rack
536+
String[] racks = {
537+
"/rack1", "/rack2", "/rack3", "/rack4", "/rack5", "/rack6", "/rack7", "/rack8",
538+
"/rack9", "/rack0", "/rack0", "/rack0", "/rack0", "/rack0", "/rack0", "/rack0",
539+
"/rack0", "/rack0", "/rack0", "/rack0"};
540+
541+
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDn)
542+
.storageTypes(storageTypes)
543+
.racks(racks)
544+
.build();
545+
cluster.waitActive();
546+
DistributedFileSystem fs = cluster.getFileSystem();
547+
fs.enableErasureCodingPolicy(
548+
StripedFileTestUtil.getDefaultECPolicy().getName());
549+
550+
try {
551+
fs.mkdirs(dirPath);
552+
fs.setStoragePolicy(dirPath, "COLD");
553+
fs.setErasureCodingPolicy(dirPath,
554+
StripedFileTestUtil.getDefaultECPolicy().getName());
555+
DFSTestUtil.createFile(fs, filePath,
556+
cellSize * dataBlocks * 2, (short) 1, 0L);
557+
558+
// stop one dn
559+
LocatedBlocks blks = fs.getClient().getLocatedBlocks(filePath.toString(), 0);
560+
LocatedStripedBlock block = (LocatedStripedBlock) blks.getLastLocatedBlock();
561+
DatanodeInfo dnToStop = block.getLocations()[0];
562+
cluster.stopDataNode(dnToStop.getXferAddr());
563+
cluster.setDataNodeDead(dnToStop);
564+
565+
// wait for reconstruction to happen
566+
StripedFileTestUtil.waitForReconstructionFinished(filePath, fs, groupSize);
567+
blks = fs.getClient().getLocatedBlocks(filePath.toString(), 0);
568+
block = (LocatedStripedBlock) blks.getLastLocatedBlock();
569+
BitSet bitSet = new BitSet(groupSize);
570+
for (byte index : block.getBlockIndices()) {
571+
bitSet.set(index);
572+
}
573+
for (int i = 0; i < groupSize; i++) {
574+
Assert.assertTrue(bitSet.get(i));
575+
}
576+
} finally {
577+
cluster.shutdown();
578+
}
579+
}
580+
518581
}

0 commit comments

Comments
 (0)