Skip to content

Commit d29f0e8

Browse files
authored
HDFS-16373. Fix MiniDFSCluster restart in case of multiple namenodes. (#3756)
Reviewed-by: Viraj Jasani <[email protected]> Reviewed-by: litao <[email protected]> Signed-off-by: Takanobu Asanuma <[email protected]>
1 parent c56a07f commit d29f0e8

File tree

2 files changed

+30
-21
lines changed

2 files changed

+30
-21
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2267,9 +2267,11 @@ public synchronized void restartNameNode(int nnIndex, boolean waitActive,
22672267
info.nameNode = nn;
22682268
info.setStartOpt(startOpt);
22692269
if (waitActive) {
2270-
waitClusterUp();
2270+
if (numDataNodes > 0) {
2271+
waitNameNodeUp(nnIndex);
2272+
}
22712273
LOG.info("Restarted the namenode");
2272-
waitActive();
2274+
waitActive(nnIndex);
22732275
}
22742276
}
22752277

@@ -2775,11 +2777,25 @@ public void waitActive(int nnIndex) throws IOException {
27752777
DFSClient client = new DFSClient(addr, conf);
27762778

27772779
// ensure all datanodes have registered and sent heartbeat to the namenode
2778-
while (shouldWait(client.datanodeReport(DatanodeReportType.LIVE), addr)) {
2780+
int failedCount = 0;
2781+
while (true) {
27792782
try {
2780-
LOG.info("Waiting for cluster to become active");
2781-
Thread.sleep(100);
2783+
while (shouldWait(client.datanodeReport(DatanodeReportType.LIVE), addr)) {
2784+
LOG.info("Waiting for cluster to become active");
2785+
Thread.sleep(100);
2786+
}
2787+
break;
2788+
} catch (IOException e) {
2789+
failedCount++;
2790+
// Cached RPC connection to namenode, if any, is expected to fail once
2791+
if (failedCount > 1) {
2792+
LOG.warn("Tried waitActive() " + failedCount
2793+
+ " time(s) and failed, giving up. " + StringUtils
2794+
.stringifyException(e));
2795+
throw e;
2796+
}
27822797
} catch (InterruptedException e) {
2798+
throw new IOException(e);
27832799
}
27842800
}
27852801

@@ -2815,22 +2831,7 @@ public Boolean get() {
28152831
*/
28162832
public void waitActive() throws IOException {
28172833
for (int index = 0; index < namenodes.size(); index++) {
2818-
int failedCount = 0;
2819-
while (true) {
2820-
try {
2821-
waitActive(index);
2822-
break;
2823-
} catch (IOException e) {
2824-
failedCount++;
2825-
// Cached RPC connection to namenode, if any, is expected to fail once
2826-
if (failedCount > 1) {
2827-
LOG.warn("Tried waitActive() " + failedCount
2828-
+ " time(s) and failed, giving up. "
2829-
+ StringUtils.stringifyException(e));
2830-
throw e;
2831-
}
2832-
}
2833-
}
2834+
waitActive(index);
28342835
}
28352836
LOG.info("Cluster is active");
28362837
}

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,14 @@ public void testSetUpFederatedCluster() throws Exception {
309309
DFSUtil.addKeySuffixes(
310310
DFS_NAMENODE_HTTP_ADDRESS_KEY, "ns1", "nn1")));
311311
}
312+
313+
// Shutdown namenodes individually.
314+
cluster.shutdownNameNode(0);
315+
cluster.shutdownNameNode(1);
316+
317+
// Restart namenodes individually with wait active, both should be successful.
318+
cluster.restartNameNode(0);
319+
cluster.restartNameNode(1);
312320
}
313321
}
314322
}

0 commit comments

Comments
 (0)