Skip to content

Commit 1bd0b58

Browse files
authored
HBASE-27104 Add a tool command list_unknownservers (#4714)
Signed-off-by: Duo Zhang <[email protected]>
1 parent fc93fba commit 1bd0b58

File tree

13 files changed

+245
-9
lines changed

13 files changed

+245
-9
lines changed

hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterMetrics.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ public interface ClusterMetrics {
7676
/** Returns the names of region servers on the dead list */
7777
List<ServerName> getDeadServerNames();
7878

79+
/** Returns the names of region servers on the unknown list */
80+
List<ServerName> getUnknownServerNames();
81+
7982
/** Returns the names of region servers on the live list */
8083
Map<ServerName, ServerMetrics> getLiveServerMetrics();
8184

@@ -176,6 +179,10 @@ enum Option {
176179
* metrics about dead region servers
177180
*/
178181
DEAD_SERVERS,
182+
/**
183+
* metrics about unknown region servers
184+
*/
185+
UNKNOWN_SERVERS,
179186
/**
180187
* metrics about master name
181188
*/

hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterMetricsBuilder.java

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ public static ClusterStatusProtos.ClusterStatus toClusterStatus(ClusterMetrics m
4747
.collect(Collectors.toList()))
4848
.addAllDeadServers(metrics.getDeadServerNames().stream().map(ProtobufUtil::toServerName)
4949
.collect(Collectors.toList()))
50+
.addAllUnknownServers(metrics.getUnknownServerNames().stream()
51+
.map(ProtobufUtil::toServerName).collect(Collectors.toList()))
5052
.addAllLiveServers(metrics.getLiveServerMetrics().entrySet().stream()
5153
.map(s -> ClusterStatusProtos.LiveServerInfo.newBuilder()
5254
.setServer(ProtobufUtil.toServerName(s.getKey()))
@@ -98,6 +100,8 @@ public static ClusterMetrics toClusterMetrics(ClusterStatusProtos.ClusterStatus
98100
ServerMetricsBuilder::toServerMetrics)))
99101
.setDeadServerNames(proto.getDeadServersList().stream().map(ProtobufUtil::toServerName)
100102
.collect(Collectors.toList()))
103+
.setUnknownServerNames(proto.getUnknownServersList().stream().map(ProtobufUtil::toServerName)
104+
.collect(Collectors.toList()))
101105
.setBackerMasterNames(proto.getBackupMastersList().stream().map(ProtobufUtil::toServerName)
102106
.collect(Collectors.toList()))
103107
.setRegionsInTransition(proto.getRegionsInTransitionList().stream()
@@ -147,6 +151,8 @@ public static ClusterMetrics.Option toOption(ClusterStatusProtos.Option option)
147151
return ClusterMetrics.Option.LIVE_SERVERS;
148152
case DEAD_SERVERS:
149153
return ClusterMetrics.Option.DEAD_SERVERS;
154+
case UNKNOWN_SERVERS:
155+
return ClusterMetrics.Option.UNKNOWN_SERVERS;
150156
case REGIONS_IN_TRANSITION:
151157
return ClusterMetrics.Option.REGIONS_IN_TRANSITION;
152158
case CLUSTER_ID:
@@ -186,6 +192,8 @@ public static ClusterStatusProtos.Option toOption(ClusterMetrics.Option option)
186192
return ClusterStatusProtos.Option.LIVE_SERVERS;
187193
case DEAD_SERVERS:
188194
return ClusterStatusProtos.Option.DEAD_SERVERS;
195+
case UNKNOWN_SERVERS:
196+
return ClusterStatusProtos.Option.UNKNOWN_SERVERS;
189197
case REGIONS_IN_TRANSITION:
190198
return ClusterStatusProtos.Option.REGIONS_IN_TRANSITION;
191199
case CLUSTER_ID:
@@ -238,6 +246,7 @@ public static ClusterMetricsBuilder newBuilder() {
238246
@Nullable
239247
private String hbaseVersion;
240248
private List<ServerName> deadServerNames = Collections.emptyList();
249+
private List<ServerName> unknownServerNames = Collections.emptyList();
241250
private Map<ServerName, ServerMetrics> liveServerMetrics = new TreeMap<>();
242251
@Nullable
243252
private ServerName masterName;
@@ -267,6 +276,11 @@ public ClusterMetricsBuilder setDeadServerNames(List<ServerName> value) {
267276
return this;
268277
}
269278

279+
public ClusterMetricsBuilder setUnknownServerNames(List<ServerName> value) {
280+
this.unknownServerNames = value;
281+
return this;
282+
}
283+
270284
public ClusterMetricsBuilder setLiveServerMetrics(Map<ServerName, ServerMetrics> value) {
271285
liveServerMetrics.putAll(value);
272286
return this;
@@ -324,16 +338,18 @@ public ClusterMetricsBuilder setMasterTasks(List<ServerTask> masterTasks) {
324338
}
325339

326340
public ClusterMetrics build() {
327-
return new ClusterMetricsImpl(hbaseVersion, deadServerNames, liveServerMetrics, masterName,
328-
backupMasterNames, regionsInTransition, clusterId, masterCoprocessorNames, balancerOn,
329-
masterInfoPort, serversName, tableRegionStatesCount, masterTasks);
341+
return new ClusterMetricsImpl(hbaseVersion, deadServerNames, unknownServerNames,
342+
liveServerMetrics, masterName, backupMasterNames, regionsInTransition, clusterId,
343+
masterCoprocessorNames, balancerOn, masterInfoPort, serversName, tableRegionStatesCount,
344+
masterTasks);
330345
}
331346

332347
private static class ClusterMetricsImpl implements ClusterMetrics {
333348
@Nullable
334349
private final String hbaseVersion;
335350
private final List<ServerName> deadServerNames;
336351
private final Map<ServerName, ServerMetrics> liveServerMetrics;
352+
private final List<ServerName> unknownServerNames;
337353
@Nullable
338354
private final ServerName masterName;
339355
private final List<ServerName> backupMasterNames;
@@ -349,13 +365,14 @@ private static class ClusterMetricsImpl implements ClusterMetrics {
349365
private final List<ServerTask> masterTasks;
350366

351367
ClusterMetricsImpl(String hbaseVersion, List<ServerName> deadServerNames,
352-
Map<ServerName, ServerMetrics> liveServerMetrics, ServerName masterName,
353-
List<ServerName> backupMasterNames, List<RegionState> regionsInTransition, String clusterId,
354-
List<String> masterCoprocessorNames, Boolean balancerOn, int masterInfoPort,
355-
List<ServerName> serversName, Map<TableName, RegionStatesCount> tableRegionStatesCount,
356-
List<ServerTask> masterTasks) {
368+
List<ServerName> unknownServerNames, Map<ServerName, ServerMetrics> liveServerMetrics,
369+
ServerName masterName, List<ServerName> backupMasterNames,
370+
List<RegionState> regionsInTransition, String clusterId, List<String> masterCoprocessorNames,
371+
Boolean balancerOn, int masterInfoPort, List<ServerName> serversName,
372+
Map<TableName, RegionStatesCount> tableRegionStatesCount, List<ServerTask> masterTasks) {
357373
this.hbaseVersion = hbaseVersion;
358374
this.deadServerNames = Preconditions.checkNotNull(deadServerNames);
375+
this.unknownServerNames = Preconditions.checkNotNull(unknownServerNames);
359376
this.liveServerMetrics = Preconditions.checkNotNull(liveServerMetrics);
360377
this.masterName = masterName;
361378
this.backupMasterNames = Preconditions.checkNotNull(backupMasterNames);
@@ -379,6 +396,11 @@ public List<ServerName> getDeadServerNames() {
379396
return Collections.unmodifiableList(deadServerNames);
380397
}
381398

399+
@Override
400+
public List<ServerName> getUnknownServerNames() {
401+
return Collections.unmodifiableList(unknownServerNames);
402+
}
403+
382404
@Override
383405
public Map<ServerName, ServerMetrics> getLiveServerMetrics() {
384406
return Collections.unmodifiableMap(liveServerMetrics);
@@ -469,6 +491,14 @@ public String toString() {
469491
}
470492
}
471493

494+
int unknownServerSize = getUnknownServerNames().size();
495+
sb.append("\nNumber of unknown region servers: " + unknownServerSize);
496+
if (unknownServerSize > 0) {
497+
for (ServerName serverName : getUnknownServerNames()) {
498+
sb.append("\n " + serverName);
499+
}
500+
}
501+
472502
sb.append("\nAverage load: " + getAverageLoad());
473503
sb.append("\nNumber of requests: " + getRequestCount());
474504
sb.append("\nNumber of regions: " + getRegionCount());

hbase-client/src/main/java/org/apache/hadoop/hbase/ClusterStatus.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,11 @@ public List<ServerName> getDeadServerNames() {
113113
return metrics.getDeadServerNames();
114114
}
115115

116+
@Override
117+
public List<ServerName> getUnknownServerNames() {
118+
return metrics.getUnknownServerNames();
119+
}
120+
116121
@Override
117122
public Map<ServerName, ServerMetrics> getLiveServerMetrics() {
118123
return metrics.getLiveServerMetrics();

hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3026,6 +3026,14 @@ default List<ServerName> listDeadServers() throws IOException {
30263026
return getClusterMetrics(EnumSet.of(Option.DEAD_SERVERS)).getDeadServerNames();
30273027
}
30283028

3029+
/**
3030+
* List unknown region servers.
3031+
* @return List of unknown region servers.
3032+
*/
3033+
default List<ServerName> listUnknownServers() throws IOException {
3034+
return getClusterMetrics(EnumSet.of(Option.UNKNOWN_SERVERS)).getUnknownServerNames();
3035+
}
3036+
30293037
/**
30303038
* Clear dead region servers from master.
30313039
* @param servers list of dead region servers.

hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1426,6 +1426,14 @@ default CompletableFuture<List<ServerName>> listDeadServers() {
14261426
.thenApply(ClusterMetrics::getDeadServerNames);
14271427
}
14281428

1429+
/**
1430+
* List all the unknown region servers.
1431+
*/
1432+
default CompletableFuture<List<ServerName>> listUnknownServers() {
1433+
return this.getClusterMetrics(EnumSet.of(Option.UNKNOWN_SERVERS))
1434+
.thenApply(ClusterMetrics::getUnknownServerNames);
1435+
}
1436+
14291437
/**
14301438
* Clear dead region servers from master.
14311439
* @param servers list of dead region servers.

hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -764,6 +764,11 @@ public CompletableFuture<List<ServerName>> listDeadServers() {
764764
return wrap(rawAdmin.listDeadServers());
765765
}
766766

767+
@Override
768+
public CompletableFuture<List<ServerName>> listUnknownServers() {
769+
return wrap(rawAdmin.listUnknownServers());
770+
}
771+
767772
@Override
768773
public CompletableFuture<List<ServerName>> clearDeadServers(List<ServerName> servers) {
769774
return wrap(rawAdmin.clearDeadServers(servers));

hbase-protocol-shaded/src/main/protobuf/ClusterStatus.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,7 @@ message ClusterStatus {
339339
repeated ServerName servers_name = 11;
340340
repeated TableRegionStatesCount table_region_states_count = 12;
341341
repeated ServerTask master_tasks = 13;
342+
repeated ServerName unknown_servers = 14;
342343
}
343344

344345
enum Option {
@@ -355,4 +356,5 @@ enum Option {
355356
SERVERS_NAME = 10;
356357
TABLE_TO_REGIONS_COUNT = 11;
357358
TASKS = 12;
359+
UNKNOWN_SERVERS = 13;
358360
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2849,6 +2849,12 @@ public ClusterMetrics getClusterMetricsWithoutCoprocessor(EnumSet<Option> option
28492849
}
28502850
break;
28512851
}
2852+
case UNKNOWN_SERVERS: {
2853+
if (serverManager != null) {
2854+
builder.setUnknownServerNames(getUnknownServers());
2855+
}
2856+
break;
2857+
}
28522858
case MASTER_COPROCESSORS: {
28532859
if (cpHost != null) {
28542860
builder.setMasterCoprocessorNames(Arrays.asList(getMasterCoprocessors()));
@@ -2908,6 +2914,17 @@ public ClusterMetrics getClusterMetricsWithoutCoprocessor(EnumSet<Option> option
29082914
return builder.build();
29092915
}
29102916

2917+
private List<ServerName> getUnknownServers() {
2918+
if (serverManager != null) {
2919+
final Set<ServerName> serverNames = getAssignmentManager().getRegionStates().getRegionStates()
2920+
.stream().map(RegionState::getServerName).collect(Collectors.toSet());
2921+
final List<ServerName> unknownServerNames = serverNames.stream()
2922+
.filter(sn -> sn != null && serverManager.isServerUnknown(sn)).collect(Collectors.toList());
2923+
return unknownServerNames;
2924+
}
2925+
return null;
2926+
}
2927+
29112928
private Map<ServerName, ServerMetrics> getOnlineServers() {
29122929
if (serverManager != null) {
29132930
final Map<ServerName, ServerMetrics> map = new HashMap<>();

hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,11 @@ public List<ServerName> getDeadServerNames() {
227227
return null;
228228
}
229229

230+
@Override
231+
public List<ServerName> getUnknownServerNames() {
232+
return null;
233+
}
234+
230235
@Override
231236
public Map<ServerName, ServerMetrics> getLiveServerMetrics() {
232237
Map<ServerName, ServerMetrics> liveServerMetrics = new HashMap<>();
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hbase.master;
19+
20+
import java.io.IOException;
21+
import org.apache.hadoop.conf.Configuration;
22+
import org.apache.hadoop.hbase.HBaseClassTestRule;
23+
import org.apache.hadoop.hbase.HBaseTestingUtility;
24+
import org.apache.hadoop.hbase.HConstants;
25+
import org.apache.hadoop.hbase.ServerName;
26+
import org.apache.hadoop.hbase.client.Admin;
27+
import org.apache.hadoop.hbase.testclassification.MasterTests;
28+
import org.apache.hadoop.hbase.testclassification.MediumTests;
29+
import org.junit.AfterClass;
30+
import org.junit.Assert;
31+
import org.junit.BeforeClass;
32+
import org.junit.ClassRule;
33+
import org.junit.Test;
34+
import org.junit.experimental.categories.Category;
35+
36+
@Category({ MasterTests.class, MediumTests.class })
37+
public class TestUnknownServers {
38+
@ClassRule
39+
public static final HBaseClassTestRule CLASS_RULE =
40+
HBaseClassTestRule.forClass(TestUnknownServers.class);
41+
42+
private static HBaseTestingUtility UTIL;
43+
private static Admin ADMIN;
44+
private final static int SLAVES = 2;
45+
private static boolean IS_UNKNOWN_SERVER = true;
46+
47+
@BeforeClass
48+
public static void setUpBeforeClass() throws Exception {
49+
UTIL = new HBaseTestingUtility();
50+
UTIL.getConfiguration().setClass(HConstants.MASTER_IMPL,
51+
TestUnknownServers.HMasterForTest.class, HMaster.class);
52+
UTIL.startMiniCluster(SLAVES);
53+
ADMIN = UTIL.getAdmin();
54+
}
55+
56+
@Test
57+
public void testListUnknownServers() throws Exception {
58+
Assert.assertEquals(ADMIN.listUnknownServers().size(), SLAVES);
59+
IS_UNKNOWN_SERVER = false;
60+
Assert.assertEquals(ADMIN.listUnknownServers().size(), 0);
61+
}
62+
63+
@AfterClass
64+
public static void tearDownAfterClass() throws Exception {
65+
if (ADMIN != null) {
66+
ADMIN.close();
67+
}
68+
if (UTIL != null) {
69+
UTIL.shutdownMiniCluster();
70+
}
71+
}
72+
73+
public static final class HMasterForTest extends HMaster {
74+
75+
public HMasterForTest(Configuration conf) throws IOException {
76+
super(conf);
77+
}
78+
79+
@Override
80+
protected ServerManager createServerManager(MasterServices master, RegionServerList storage)
81+
throws IOException {
82+
setupClusterConnection();
83+
return new TestUnknownServers.ServerManagerForTest(master, storage);
84+
}
85+
}
86+
87+
private static final class ServerManagerForTest extends ServerManager {
88+
89+
public ServerManagerForTest(MasterServices master, RegionServerList storage) {
90+
super(master, storage);
91+
}
92+
93+
@Override
94+
public boolean isServerUnknown(ServerName serverName) {
95+
return IS_UNKNOWN_SERVER;
96+
}
97+
}
98+
}

0 commit comments

Comments
 (0)