diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index 5331df8c71e63..f2fec0712e111 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -813,6 +813,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final int DFS_HA_ZKFC_PORT_DEFAULT = 8019; public static final String DFS_HA_ZKFC_NN_HTTP_TIMEOUT_KEY = "dfs.ha.zkfc.nn.http.timeout.ms"; public static final int DFS_HA_ZKFC_NN_HTTP_TIMEOUT_KEY_DEFAULT = 20000; + public static final String DFS_HA_ZKFC_NN_SAFEMODE_AS_UNHEALTHY_TO_ZKFC_KEY = + "dfs.ha.zkfc.nn.safemode-as-unhealthy"; + public static final boolean DFS_HA_ZKFC_NN_SAFEMODE_AS_UNHEALTHY_TO_ZKFC_DEFAULT = + false; // Security-related configs public static final String DFS_ENCRYPT_DATA_TRANSFER_KEY = "dfs.encrypt.data.transfer"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index 1826bcec069f0..dc4d11b128063 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -158,6 +158,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.FS_PROTECTED_DIRECTORIES; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_ZKFC_NN_SAFEMODE_AS_UNHEALTHY_TO_ZKFC_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_ZKFC_NN_SAFEMODE_AS_UNHEALTHY_TO_ZKFC_DEFAULT; import static org.apache.hadoop.util.ExitUtil.terminate; import static org.apache.hadoop.util.ToolRunner.confirmPrompt; import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_BACKOFF_ENABLE; @@ -369,6 +371,7 @@ public long getProtocolVersion(String protocol, private final HAContext haContext; protected final boolean allowStaleStandbyReads; private AtomicBoolean started = new AtomicBoolean(false); + private final boolean safemodeAsUnhealthyToZkfc; private final static int HEALTH_MONITOR_WARN_THRESHOLD_MS = 5000; @@ -965,6 +968,9 @@ protected NameNode(Configuration conf, NamenodeRole role) this.stopAtException(e); throw e; } + safemodeAsUnhealthyToZkfc = conf.getBoolean( + DFS_HA_ZKFC_NN_SAFEMODE_AS_UNHEALTHY_TO_ZKFC_KEY, + DFS_HA_ZKFC_NN_SAFEMODE_AS_UNHEALTHY_TO_ZKFC_DEFAULT); this.started.set(true); } @@ -1766,6 +1772,10 @@ synchronized void monitorHealth() throw new HealthCheckFailedException( "The NameNode has no resources available"); } + if (safemodeAsUnhealthyToZkfc && isInSafeMode()) { + throw new HealthCheckFailedException("The NameNode is configured to " + + "report UNHEALTHY to ZKFC in Safemode."); + } } synchronized void transitionToActive() diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 4b9cf4a890082..7233e2ffc37f6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -3005,6 +3005,17 @@ + + dfs.ha.zkfc.nn.safemode-as-unhealthy + false + + Whether to report SERVICE_UNHEALTHY to ZKFC while namenode is in safemode, + when it is set to true, the namenode in safemode will not become active, and + only namenodes that not in safemode and ready to fully serve will become + active. + + + dfs.namenode.quota.init-threads 4 diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestNNHealthCheck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestNNHealthCheck.java index e0f794f285db0..d36191bf27997 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestNNHealthCheck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestNNHealthCheck.java @@ -19,6 +19,7 @@ import static org.apache.hadoop.fs.CommonConfigurationKeys.HA_HM_RPC_TIMEOUT_DEFAULT; import static org.apache.hadoop.fs.CommonConfigurationKeys.HA_HM_RPC_TIMEOUT_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_ZKFC_NN_SAFEMODE_AS_UNHEALTHY_TO_ZKFC_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIFELINE_RPC_ADDRESS_KEY; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -31,6 +32,7 @@ import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.server.namenode.MockNameNodeResourceChecker; import org.apache.hadoop.hdfs.tools.NNHAServiceTarget; import org.apache.hadoop.ipc.RemoteException; @@ -76,6 +78,38 @@ public void testNNHealthCheckWithLifelineAddress() throws IOException { doNNHealthCheckTest(); } + @Test + public void testNNHealthCheckWithSafemodeAsUnhealthy() throws IOException { + conf.setBoolean(DFS_HA_ZKFC_NN_SAFEMODE_AS_UNHEALTHY_TO_ZKFC_KEY, true); + + // now bring up just the NameNode. + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0) + .nnTopology(MiniDFSNNTopology.simpleHATopology()).build(); + cluster.waitActive(); + + // manually set safemode. + cluster.getFileSystem(0) + .setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_ENTER); + + NNHAServiceTarget haTarget = new NNHAServiceTarget(conf, + DFSUtil.getNamenodeNameServiceId(conf), "nn1"); + final String expectedTargetString = haTarget.getAddress().toString(); + + assertTrue("Expected haTarget " + haTarget + " containing " + + expectedTargetString, + haTarget.toString().contains(expectedTargetString)); + HAServiceProtocol rpc = haTarget.getHealthMonitorProxy(conf, 5000); + + try { + // Should throw error - NN is unhealthy. + rpc.monitorHealth(); + fail("Should not have succeeded in calling monitorHealth"); + } catch (Exception hcfe) { + GenericTestUtils.assertExceptionContains("The NameNode is configured" + + " to report UNHEALTHY to ZKFC in Safemode.", hcfe); + } + } + private void doNNHealthCheckTest() throws IOException { MockNameNodeResourceChecker mockResourceChecker = new MockNameNodeResourceChecker(conf);