Skip to content

Commit b8c6ba6

Browse files
committed
HDFS-16479. EC: NameNode should not send a reconstruction work when the source datanodes are insufficient (#4138)
(cherry picked from commit 2efab92)
1 parent cb14e8d commit b8c6ba6

File tree

2 files changed

+106
-0
lines changed

2 files changed

+106
-0
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2060,6 +2060,16 @@ BlockReconstructionWork scheduleReconstruction(BlockInfo block,
20602060
return null;
20612061
}
20622062

2063+
// skip if source datanodes for reconstructing ec block are not enough
2064+
if (block.isStriped()) {
2065+
BlockInfoStriped stripedBlock = (BlockInfoStriped) block;
2066+
if (stripedBlock.getRealDataBlockNum() > srcNodes.length) {
2067+
LOG.debug("Block {} cannot be reconstructed due to shortage of source datanodes ", block);
2068+
NameNode.getNameNodeMetrics().incNumTimesReReplicationNotScheduled();
2069+
return null;
2070+
}
2071+
}
2072+
20632073
// liveReplicaNodes can include READ_ONLY_SHARED replicas which are
20642074
// not included in the numReplicas.liveReplicas() count
20652075
assert liveReplicaNodes.size() >= numReplicas.liveReplicas();

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -842,6 +842,102 @@ public void testChooseSrcDNWithDupECInDecommissioningNode() throws Exception {
842842
0, numReplicas.redundantInternalBlocks());
843843
}
844844

845+
@Test
846+
public void testSkipReconstructionWithManyBusyNodes() {
847+
long blockId = -9223372036854775776L; // real ec block id
848+
// RS-3-2 EC policy
849+
ErasureCodingPolicy ecPolicy =
850+
SystemErasureCodingPolicies.getPolicies().get(1);
851+
852+
// create an EC block group: 3 data blocks + 2 parity blocks
853+
Block aBlockGroup = new Block(blockId, ecPolicy.getCellSize() * ecPolicy.getNumDataUnits(), 0);
854+
BlockInfoStriped aBlockInfoStriped = new BlockInfoStriped(aBlockGroup, ecPolicy);
855+
856+
// create 4 storageInfo, which means 1 block is missing
857+
DatanodeStorageInfo ds1 = DFSTestUtil.createDatanodeStorageInfo(
858+
"storage1", "1.1.1.1", "rack1", "host1");
859+
DatanodeStorageInfo ds2 = DFSTestUtil.createDatanodeStorageInfo(
860+
"storage2", "2.2.2.2", "rack2", "host2");
861+
DatanodeStorageInfo ds3 = DFSTestUtil.createDatanodeStorageInfo(
862+
"storage3", "3.3.3.3", "rack3", "host3");
863+
DatanodeStorageInfo ds4 = DFSTestUtil.createDatanodeStorageInfo(
864+
"storage4", "4.4.4.4", "rack4", "host4");
865+
866+
// link block with storage
867+
aBlockInfoStriped.addStorage(ds1, aBlockGroup);
868+
aBlockInfoStriped.addStorage(ds2, new Block(blockId + 1, 0, 0));
869+
aBlockInfoStriped.addStorage(ds3, new Block(blockId + 2, 0, 0));
870+
aBlockInfoStriped.addStorage(ds4, new Block(blockId + 3, 0, 0));
871+
872+
addEcBlockToBM(blockId, ecPolicy);
873+
aBlockInfoStriped.setBlockCollectionId(mockINodeId);
874+
875+
// reconstruction should be scheduled
876+
BlockReconstructionWork work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
877+
assertNotNull(work);
878+
879+
// simulate the 2 nodes reach maxReplicationStreams
880+
for(int i = 0; i < bm.maxReplicationStreams; i++){
881+
ds3.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets();
882+
ds4.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets();
883+
}
884+
885+
// reconstruction should be skipped since the number of non-busy nodes are not enough
886+
work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
887+
assertNull(work);
888+
}
889+
890+
@Test
891+
public void testSkipReconstructionWithManyBusyNodes2() {
892+
long blockId = -9223372036854775776L; // real ec block id
893+
// RS-3-2 EC policy
894+
ErasureCodingPolicy ecPolicy =
895+
SystemErasureCodingPolicies.getPolicies().get(1);
896+
897+
// create an EC block group: 2 data blocks + 2 parity blocks
898+
Block aBlockGroup = new Block(blockId,
899+
ecPolicy.getCellSize() * (ecPolicy.getNumDataUnits() - 1), 0);
900+
BlockInfoStriped aBlockInfoStriped = new BlockInfoStriped(aBlockGroup, ecPolicy);
901+
902+
// create 3 storageInfo, which means 1 block is missing
903+
DatanodeStorageInfo ds1 = DFSTestUtil.createDatanodeStorageInfo(
904+
"storage1", "1.1.1.1", "rack1", "host1");
905+
DatanodeStorageInfo ds2 = DFSTestUtil.createDatanodeStorageInfo(
906+
"storage2", "2.2.2.2", "rack2", "host2");
907+
DatanodeStorageInfo ds3 = DFSTestUtil.createDatanodeStorageInfo(
908+
"storage3", "3.3.3.3", "rack3", "host3");
909+
910+
// link block with storage
911+
aBlockInfoStriped.addStorage(ds1, aBlockGroup);
912+
aBlockInfoStriped.addStorage(ds2, new Block(blockId + 1, 0, 0));
913+
aBlockInfoStriped.addStorage(ds3, new Block(blockId + 2, 0, 0));
914+
915+
addEcBlockToBM(blockId, ecPolicy);
916+
aBlockInfoStriped.setBlockCollectionId(mockINodeId);
917+
918+
// reconstruction should be scheduled
919+
BlockReconstructionWork work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
920+
assertNotNull(work);
921+
922+
// simulate the 1 node reaches maxReplicationStreams
923+
for(int i = 0; i < bm.maxReplicationStreams; i++){
924+
ds2.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets();
925+
}
926+
927+
// reconstruction should still be scheduled since there are 2 source nodes to create 2 blocks
928+
work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
929+
assertNotNull(work);
930+
931+
// simulate the 1 more node reaches maxReplicationStreams
932+
for(int i = 0; i < bm.maxReplicationStreams; i++){
933+
ds3.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets();
934+
}
935+
936+
// reconstruction should be skipped since the number of non-busy nodes are not enough
937+
work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
938+
assertNull(work);
939+
}
940+
845941
@Test
846942
public void testFavorDecomUntilHardLimit() throws Exception {
847943
bm.maxReplicationStreams = 0;

0 commit comments

Comments
 (0)