Skip to content

Commit 57c9a85

Browse files
ayushtknsunchao
authored andcommitted
HDFS-16373. Fix MiniDFSCluster restart in case of multiple namenodes. (#3756)
Reviewed-by: Viraj Jasani <[email protected]> Reviewed-by: litao <[email protected]> Signed-off-by: Takanobu Asanuma <[email protected]>
1 parent f16d9df commit 57c9a85

File tree

2 files changed

+30
-21
lines changed

2 files changed

+30
-21
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2249,9 +2249,11 @@ public synchronized void restartNameNode(int nnIndex, boolean waitActive,
22492249
info.nameNode = nn;
22502250
info.setStartOpt(startOpt);
22512251
if (waitActive) {
2252-
waitClusterUp();
2252+
if (numDataNodes > 0) {
2253+
waitNameNodeUp(nnIndex);
2254+
}
22532255
LOG.info("Restarted the namenode");
2254-
waitActive();
2256+
waitActive(nnIndex);
22552257
}
22562258
}
22572259

@@ -2761,11 +2763,25 @@ public void waitActive(int nnIndex) throws IOException {
27612763
DFSClient client = new DFSClient(addr, conf);
27622764

27632765
// ensure all datanodes have registered and sent heartbeat to the namenode
2764-
while (shouldWait(client.datanodeReport(DatanodeReportType.LIVE), addr)) {
2766+
int failedCount = 0;
2767+
while (true) {
27652768
try {
2766-
LOG.info("Waiting for cluster to become active");
2767-
Thread.sleep(100);
2769+
while (shouldWait(client.datanodeReport(DatanodeReportType.LIVE), addr)) {
2770+
LOG.info("Waiting for cluster to become active");
2771+
Thread.sleep(100);
2772+
}
2773+
break;
2774+
} catch (IOException e) {
2775+
failedCount++;
2776+
// Cached RPC connection to namenode, if any, is expected to fail once
2777+
if (failedCount > 1) {
2778+
LOG.warn("Tried waitActive() " + failedCount
2779+
+ " time(s) and failed, giving up. " + StringUtils
2780+
.stringifyException(e));
2781+
throw e;
2782+
}
27682783
} catch (InterruptedException e) {
2784+
throw new IOException(e);
27692785
}
27702786
}
27712787

@@ -2801,22 +2817,7 @@ public Boolean get() {
28012817
*/
28022818
public void waitActive() throws IOException {
28032819
for (int index = 0; index < namenodes.size(); index++) {
2804-
int failedCount = 0;
2805-
while (true) {
2806-
try {
2807-
waitActive(index);
2808-
break;
2809-
} catch (IOException e) {
2810-
failedCount++;
2811-
// Cached RPC connection to namenode, if any, is expected to fail once
2812-
if (failedCount > 1) {
2813-
LOG.warn("Tried waitActive() " + failedCount
2814-
+ " time(s) and failed, giving up. "
2815-
+ StringUtils.stringifyException(e));
2816-
throw e;
2817-
}
2818-
}
2819-
}
2820+
waitActive(index);
28202821
}
28212822
LOG.info("Cluster is active");
28222823
}

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,14 @@ public void testSetUpFederatedCluster() throws Exception {
309309
DFSUtil.addKeySuffixes(
310310
DFS_NAMENODE_HTTP_ADDRESS_KEY, "ns1", "nn1")));
311311
}
312+
313+
// Shutdown namenodes individually.
314+
cluster.shutdownNameNode(0);
315+
cluster.shutdownNameNode(1);
316+
317+
// Restart namenodes individually with wait active, both should be successful.
318+
cluster.restartNameNode(0);
319+
cluster.restartNameNode(1);
312320
}
313321
}
314322
}

0 commit comments

Comments
 (0)