From c2eac051bd0fc1b0d497778efb495f1776aca2cf Mon Sep 17 00:00:00 2001 From: tom lee Date: Wed, 9 Mar 2022 20:36:40 +0800 Subject: [PATCH 1/5] HDFS-16498. Fix NPE for checkBlockReportLease --- .../server/blockmanagement/BlockManager.java | 5 +++ .../blockmanagement/DatanodeManager.java | 5 +++ .../blockmanagement/TestBlockReportLease.java | 43 +++++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 2b647704e3f9b..a2b245468ff33 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -2751,6 +2751,11 @@ public boolean checkBlockReportLease(BlockReportContext context, return true; } DatanodeDescriptor node = datanodeManager.getDatanode(nodeID); + if (node == null) { + final UnregisteredNodeException e = new UnregisteredNodeException(nodeID, null); + NameNode.stateChangeLog.error("BLOCK* NameSystem.getDatanode: " + e.getLocalizedMessage()); + throw e; + } final long startTime = Time.monotonicNow(); return blockReportLeaseManager.checkLease(node, startTime, context.getLeaseId()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index cfb1d83ec5bc6..4e031657d2ec2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -2187,5 +2187,10 @@ public DatanodeStorageReport[] getDatanodeStorageReport( } return reports; } + + @VisibleForTesting + public Map getDatanodeMap() { + return datanodeMap; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportLease.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportLease.java index a5acc14edd935..b949fa0558181 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportLease.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportLease.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hdfs.server.protocol.FinalizeCommand; import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; +import org.apache.hadoop.hdfs.server.protocol.RegisterCommand; import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports; import org.apache.hadoop.hdfs.server.protocol.SlowPeerReports; import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport; @@ -136,6 +137,48 @@ public void testCheckBlockReportLease() throws Exception { } } + @Test + public void testCheckBlockReportLeaseWhenDnUnregister() throws Exception { + HdfsConfiguration conf = new HdfsConfiguration(); + Random rand = new Random(); + + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(1).build()) { + FSNamesystem fsn = cluster.getNamesystem(); + BlockManager blockManager = fsn.getBlockManager(); + String poolId = cluster.getNamesystem().getBlockPoolId(); + NamenodeProtocols rpcServer = cluster.getNameNodeRpc(); + + // Remove the unique datanode to simulate the unregistered situation. + DataNode dn = cluster.getDataNodes().get(0); + blockManager.getDatanodeManager().getDatanodeMap().remove(dn.getDatanodeUuid()); + + // Trigger BlockReport. + DatanodeRegistration dnRegistration = dn.getDNRegistrationForBP(poolId); + StorageReport[] storages = dn.getFSDataset().getStorageReports(poolId); + ExecutorService pool = Executors.newFixedThreadPool(1); + BlockReportContext brContext = new BlockReportContext(1, 0, + rand.nextLong(), 1); + Future sendBRfuturea = pool.submit(() -> { + // Build every storage with 100 blocks for sending report. + DatanodeStorage[] datanodeStorages + = new DatanodeStorage[storages.length]; + for (int i = 0; i < storages.length; i++) { + datanodeStorages[i] = storages[i].getStorage(); + } + StorageBlockReport[] reports = createReports(datanodeStorages, 100); + + // Send blockReport. + return rpcServer.blockReport(dnRegistration, poolId, reports, + brContext); + }); + + // Get datanodeCommand, it should be RegisterCommand. + DatanodeCommand datanodeCommand = sendBRfuturea.get(); + assertTrue(datanodeCommand instanceof RegisterCommand); + } + } + private StorageBlockReport[] createReports(DatanodeStorage[] dnStorages, int numBlocks) { int longsPerBlock = 3; From 5d84c9bc965bc9dc1959f28b1b89a2465867ed5b Mon Sep 17 00:00:00 2001 From: tom lee Date: Thu, 10 Mar 2022 14:08:01 +0800 Subject: [PATCH 2/5] update log level --- .../apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index c0cb5787682d7..cb17496028cc2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -1633,7 +1633,7 @@ public DatanodeCommand blockReport(final DatanodeRegistration nodeReg, } } } catch (UnregisteredNodeException une) { - LOG.debug("Datanode {} is attempting to report but not register yet.", + LOG.warn("Datanode {} is attempting to report but not register yet.", nodeReg); return RegisterCommand.REGISTER; } From 6b12706d8929fe12326c93e9f2986a766f28a504 Mon Sep 17 00:00:00 2001 From: tom lee Date: Wed, 16 Mar 2022 13:06:49 +0800 Subject: [PATCH 3/5] update log level back to debug --- .../apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index cb17496028cc2..c0cb5787682d7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -1633,7 +1633,7 @@ public DatanodeCommand blockReport(final DatanodeRegistration nodeReg, } } } catch (UnregisteredNodeException une) { - LOG.warn("Datanode {} is attempting to report but not register yet.", + LOG.debug("Datanode {} is attempting to report but not register yet.", nodeReg); return RegisterCommand.REGISTER; } From 1558c1020841ab7769f79753048f321c2dd81996 Mon Sep 17 00:00:00 2001 From: tom lee Date: Mon, 21 Mar 2022 20:13:16 +0800 Subject: [PATCH 4/5] update unit test, add detail comments --- .../hdfs/server/blockmanagement/BlockManager.java | 4 +++- .../blockmanagement/TestBlockReportLease.java | 14 ++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index a2b245468ff33..e81f72bf8a372 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -2753,7 +2753,9 @@ public boolean checkBlockReportLease(BlockReportContext context, DatanodeDescriptor node = datanodeManager.getDatanode(nodeID); if (node == null) { final UnregisteredNodeException e = new UnregisteredNodeException(nodeID, null); - NameNode.stateChangeLog.error("BLOCK* NameSystem.getDatanode: " + e.getLocalizedMessage()); + NameNode.stateChangeLog.error("BLOCK* NameSystem.getDatanode: " + "Data node " + nodeID + + " is attempting to report storage ID " + nodeID.getDatanodeUuid() + + ". But this node is not registered."); throw e; } final long startTime = Time.monotonicNow(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportLease.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportLease.java index b949fa0558181..d1ae0b600fcfa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportLease.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportLease.java @@ -142,14 +142,14 @@ public void testCheckBlockReportLeaseWhenDnUnregister() throws Exception { HdfsConfiguration conf = new HdfsConfiguration(); Random rand = new Random(); - try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) - .numDataNodes(1).build()) { + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build()) { FSNamesystem fsn = cluster.getNamesystem(); BlockManager blockManager = fsn.getBlockManager(); String poolId = cluster.getNamesystem().getBlockPoolId(); NamenodeProtocols rpcServer = cluster.getNameNodeRpc(); - // Remove the unique datanode to simulate the unregistered situation. + // Remove the unique DataNode to simulate the unregistered situation. + // This is similar to starting NameNode, and DataNodes are not registered yet. DataNode dn = cluster.getDataNodes().get(0); blockManager.getDatanodeManager().getDatanodeMap().remove(dn.getDatanodeUuid()); @@ -159,7 +159,7 @@ public void testCheckBlockReportLeaseWhenDnUnregister() throws Exception { ExecutorService pool = Executors.newFixedThreadPool(1); BlockReportContext brContext = new BlockReportContext(1, 0, rand.nextLong(), 1); - Future sendBRfuturea = pool.submit(() -> { + Future sendBRFuture = pool.submit(() -> { // Build every storage with 100 blocks for sending report. DatanodeStorage[] datanodeStorages = new DatanodeStorage[storages.length]; @@ -173,8 +173,10 @@ public void testCheckBlockReportLeaseWhenDnUnregister() throws Exception { brContext); }); - // Get datanodeCommand, it should be RegisterCommand. - DatanodeCommand datanodeCommand = sendBRfuturea.get(); + // When unregistered DataNode triggering the block report, will throw an + // UnregisteredNodeException. After NameNode processing, RegisterCommand + // is returned to the DataNode. + DatanodeCommand datanodeCommand = sendBRFuture.get(); assertTrue(datanodeCommand instanceof RegisterCommand); } } From 207024b30fdde9aca690efa1342bcb94ad3bfadd Mon Sep 17 00:00:00 2001 From: tom lee Date: Sat, 26 Mar 2022 18:08:07 +0800 Subject: [PATCH 5/5] update log and log level --- .../hadoop/hdfs/server/blockmanagement/BlockManager.java | 6 +----- .../hadoop/hdfs/server/namenode/NameNodeRpcServer.java | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index e81f72bf8a372..bd5ff78554ab6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -2752,11 +2752,7 @@ public boolean checkBlockReportLease(BlockReportContext context, } DatanodeDescriptor node = datanodeManager.getDatanode(nodeID); if (node == null) { - final UnregisteredNodeException e = new UnregisteredNodeException(nodeID, null); - NameNode.stateChangeLog.error("BLOCK* NameSystem.getDatanode: " + "Data node " + nodeID + - " is attempting to report storage ID " + nodeID.getDatanodeUuid() + - ". But this node is not registered."); - throw e; + throw new UnregisteredNodeException(nodeID, null); } final long startTime = Time.monotonicNow(); return blockReportLeaseManager.checkLease(node, startTime, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index c0cb5787682d7..cb17496028cc2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -1633,7 +1633,7 @@ public DatanodeCommand blockReport(final DatanodeRegistration nodeReg, } } } catch (UnregisteredNodeException une) { - LOG.debug("Datanode {} is attempting to report but not register yet.", + LOG.warn("Datanode {} is attempting to report but not register yet.", nodeReg); return RegisterCommand.REGISTER; }