Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -813,6 +813,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final int DFS_HA_ZKFC_PORT_DEFAULT = 8019;
public static final String DFS_HA_ZKFC_NN_HTTP_TIMEOUT_KEY = "dfs.ha.zkfc.nn.http.timeout.ms";
public static final int DFS_HA_ZKFC_NN_HTTP_TIMEOUT_KEY_DEFAULT = 20000;
public static final String DFS_HA_ZKFC_NN_SAFEMODE_AS_UNHEALTHY_TO_ZKFC_KEY =
"dfs.ha.zkfc.nn.safemode-as-unhealthy";
public static final boolean DFS_HA_ZKFC_NN_SAFEMODE_AS_UNHEALTHY_TO_ZKFC_DEFAULT =
false;

// Security-related configs
public static final String DFS_ENCRYPT_DATA_TRANSFER_KEY = "dfs.encrypt.data.transfer";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.FS_PROTECTED_DIRECTORIES;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_ZKFC_NN_SAFEMODE_AS_UNHEALTHY_TO_ZKFC_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_ZKFC_NN_SAFEMODE_AS_UNHEALTHY_TO_ZKFC_DEFAULT;
import static org.apache.hadoop.util.ExitUtil.terminate;
import static org.apache.hadoop.util.ToolRunner.confirmPrompt;
import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_BACKOFF_ENABLE;
Expand Down Expand Up @@ -369,6 +371,7 @@ public long getProtocolVersion(String protocol,
private final HAContext haContext;
protected final boolean allowStaleStandbyReads;
private AtomicBoolean started = new AtomicBoolean(false);
private final boolean safemodeAsUnhealthyToZkfc;

private final static int HEALTH_MONITOR_WARN_THRESHOLD_MS = 5000;

Expand Down Expand Up @@ -965,6 +968,9 @@ protected NameNode(Configuration conf, NamenodeRole role)
this.stopAtException(e);
throw e;
}
safemodeAsUnhealthyToZkfc = conf.getBoolean(
DFS_HA_ZKFC_NN_SAFEMODE_AS_UNHEALTHY_TO_ZKFC_KEY,
DFS_HA_ZKFC_NN_SAFEMODE_AS_UNHEALTHY_TO_ZKFC_DEFAULT);
this.started.set(true);
}

Expand Down Expand Up @@ -1766,6 +1772,10 @@ synchronized void monitorHealth()
throw new HealthCheckFailedException(
"The NameNode has no resources available");
}
if (safemodeAsUnhealthyToZkfc && isInSafeMode()) {
throw new HealthCheckFailedException("The NameNode is configured to " +
"report UNHEALTHY to ZKFC in Safemode.");
}
}

synchronized void transitionToActive()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3005,6 +3005,17 @@
</description>
</property>

<property>
<name>dfs.ha.zkfc.nn.safemode-as-unhealthy</name>
<value>false</value>
<description>
Whether to report SERVICE_UNHEALTHY to ZKFC while namenode is in safemode,
when it is set to true, the namenode in safemode will not become active, and
only namenodes that not in safemode and ready to fully serve will become
active.
</description>
</property>

<property>
<name>dfs.namenode.quota.init-threads</name>
<value>4</value>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import static org.apache.hadoop.fs.CommonConfigurationKeys.HA_HM_RPC_TIMEOUT_DEFAULT;
import static org.apache.hadoop.fs.CommonConfigurationKeys.HA_HM_RPC_TIMEOUT_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_ZKFC_NN_SAFEMODE_AS_UNHEALTHY_TO_ZKFC_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIFELINE_RPC_ADDRESS_KEY;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
Expand All @@ -31,6 +32,7 @@
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.server.namenode.MockNameNodeResourceChecker;
import org.apache.hadoop.hdfs.tools.NNHAServiceTarget;
import org.apache.hadoop.ipc.RemoteException;
Expand Down Expand Up @@ -76,6 +78,38 @@ public void testNNHealthCheckWithLifelineAddress() throws IOException {
doNNHealthCheckTest();
}

@Test
public void testNNHealthCheckWithSafemodeAsUnhealthy() throws IOException {
conf.setBoolean(DFS_HA_ZKFC_NN_SAFEMODE_AS_UNHEALTHY_TO_ZKFC_KEY, true);

// now bring up just the NameNode.
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
.nnTopology(MiniDFSNNTopology.simpleHATopology()).build();
cluster.waitActive();

// manually set safemode.
cluster.getFileSystem(0)
.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_ENTER);

NNHAServiceTarget haTarget = new NNHAServiceTarget(conf,
DFSUtil.getNamenodeNameServiceId(conf), "nn1");
final String expectedTargetString = haTarget.getAddress().toString();

assertTrue("Expected haTarget " + haTarget + " containing " +
expectedTargetString,
haTarget.toString().contains(expectedTargetString));
HAServiceProtocol rpc = haTarget.getHealthMonitorProxy(conf, 5000);

try {
// Should throw error - NN is unhealthy.
rpc.monitorHealth();
fail("Should not have succeeded in calling monitorHealth");
} catch (Exception hcfe) {
GenericTestUtils.assertExceptionContains("The NameNode is configured" +
" to report UNHEALTHY to ZKFC in Safemode.", hcfe);
}
}

private void doNNHealthCheckTest() throws IOException {
MockNameNodeResourceChecker mockResourceChecker =
new MockNameNodeResourceChecker(conf);
Expand Down