Skip to content

Commit 5ec1fed

Browse files
committed
HBASE-22737 Add a new admin method and shell cmd to trigger the hbck chore to run (apache#425)
Signed-off-by: stack <[email protected]>
1 parent 205444e commit 5ec1fed

File tree

13 files changed

+175
-59
lines changed

13 files changed

+175
-59
lines changed

hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,24 @@
2525
import org.apache.hadoop.conf.Configuration;
2626
import org.apache.hadoop.hbase.ServerName;
2727
import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
28+
import org.apache.yetus.audience.InterfaceAudience;
29+
import org.slf4j.Logger;
30+
import org.slf4j.LoggerFactory;
31+
2832
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
2933
import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter;
30-
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos;
34+
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.AssignsResponse;
35+
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.BypassProcedureRequest;
36+
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.BypassProcedureResponse;
3137
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetTableStateResponse;
3238
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.HbckService.BlockingInterface;
39+
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunHbckChoreRequest;
40+
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunHbckChoreResponse;
41+
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ScheduleServerCrashProcedureResponse;
42+
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.UnassignsResponse;
3343

3444
import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
3545

36-
import org.apache.yetus.audience.InterfaceAudience;
37-
38-
import org.slf4j.Logger;
39-
import org.slf4j.LoggerFactory;
40-
41-
4246
/**
4347
* Use {@link ClusterConnection#getHbck()} to obtain an instance of {@link Hbck} instead of
4448
* constructing an HBaseHbck directly.
@@ -106,9 +110,8 @@ public TableState setTableStateInMeta(TableState state) throws IOException {
106110
public List<Long> assigns(List<String> encodedRegionNames, boolean override)
107111
throws IOException {
108112
try {
109-
MasterProtos.AssignsResponse response =
110-
this.hbck.assigns(rpcControllerFactory.newController(),
111-
RequestConverter.toAssignRegionsRequest(encodedRegionNames, override));
113+
AssignsResponse response = this.hbck.assigns(rpcControllerFactory.newController(),
114+
RequestConverter.toAssignRegionsRequest(encodedRegionNames, override));
112115
return response.getPidList();
113116
} catch (ServiceException se) {
114117
LOG.debug(toCommaDelimitedString(encodedRegionNames), se);
@@ -120,9 +123,8 @@ public List<Long> assigns(List<String> encodedRegionNames, boolean override)
120123
public List<Long> unassigns(List<String> encodedRegionNames, boolean override)
121124
throws IOException {
122125
try {
123-
MasterProtos.UnassignsResponse response =
124-
this.hbck.unassigns(rpcControllerFactory.newController(),
125-
RequestConverter.toUnassignRegionsRequest(encodedRegionNames, override));
126+
UnassignsResponse response = this.hbck.unassigns(rpcControllerFactory.newController(),
127+
RequestConverter.toUnassignRegionsRequest(encodedRegionNames, override));
126128
return response.getPidList();
127129
} catch (ServiceException se) {
128130
LOG.debug(toCommaDelimitedString(encodedRegionNames), se);
@@ -138,13 +140,13 @@ private static String toCommaDelimitedString(List<String> list) {
138140
public List<Boolean> bypassProcedure(List<Long> pids, long waitTime, boolean override,
139141
boolean recursive)
140142
throws IOException {
141-
MasterProtos.BypassProcedureResponse response = ProtobufUtil.call(
142-
new Callable<MasterProtos.BypassProcedureResponse>() {
143+
BypassProcedureResponse response = ProtobufUtil.call(
144+
new Callable<BypassProcedureResponse>() {
143145
@Override
144-
public MasterProtos.BypassProcedureResponse call() throws Exception {
146+
public BypassProcedureResponse call() throws Exception {
145147
try {
146148
return hbck.bypassProcedure(rpcControllerFactory.newController(),
147-
MasterProtos.BypassProcedureRequest.newBuilder().addAllProcId(pids).
149+
BypassProcedureRequest.newBuilder().addAllProcId(pids).
148150
setWaitTime(waitTime).setOverride(override).setRecursive(recursive).build());
149151
} catch (Throwable t) {
150152
LOG.error(pids.stream().map(i -> i.toString()).
@@ -160,7 +162,7 @@ public MasterProtos.BypassProcedureResponse call() throws Exception {
160162
public List<Long> scheduleServerCrashProcedures(List<ServerName> serverNames)
161163
throws IOException {
162164
try {
163-
MasterProtos.ScheduleServerCrashProcedureResponse response =
165+
ScheduleServerCrashProcedureResponse response =
164166
this.hbck.scheduleServerCrashProcedure(rpcControllerFactory.newController(),
165167
RequestConverter.toScheduleServerCrashProcedureRequest(serverNames));
166168
return response.getPidList();
@@ -172,4 +174,16 @@ public List<Long> scheduleServerCrashProcedures(List<ServerName> serverNames)
172174
throw new IOException(se);
173175
}
174176
}
177+
178+
@Override
179+
public boolean runHbckChore() throws IOException {
180+
try {
181+
RunHbckChoreResponse response = this.hbck.runHbckChore(rpcControllerFactory.newController(),
182+
RunHbckChoreRequest.newBuilder().build());
183+
return response.getRan();
184+
} catch (ServiceException se) {
185+
LOG.debug("Failed to run HBCK chore", se);
186+
throw new IOException(se);
187+
}
188+
}
175189
}

hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,4 +120,12 @@ default List<Long> scheduleServerCrashProcedure(List<HBaseProtos.ServerName> ser
120120
}
121121

122122
List<Long> scheduleServerCrashProcedures(List<ServerName> serverNames) throws IOException;
123+
124+
/**
125+
* Request HBCK chore to run at master side.
126+
*
127+
* @return <code>true</code> if HBCK chore ran, <code>false</code> if HBCK chore already running
128+
* @throws IOException if a remote or network exception occurs
129+
*/
130+
boolean runHbckChore() throws IOException;
123131
}

hbase-protocol-shaded/src/main/protobuf/Master.proto

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,13 @@ message IsNormalizerEnabledResponse {
351351
required bool enabled = 1;
352352
}
353353

354+
message RunHbckChoreRequest {
355+
}
356+
357+
message RunHbckChoreResponse {
358+
required bool ran = 1;
359+
}
360+
354361
message RunCatalogScanRequest {
355362
}
356363

@@ -1123,4 +1130,10 @@ service HbckService {
11231130
/** Schedule a ServerCrashProcedure to help recover a crash server */
11241131
rpc ScheduleServerCrashProcedure(ScheduleServerCrashProcedureRequest)
11251132
returns(ScheduleServerCrashProcedureResponse);
1133+
1134+
/**
1135+
* Request HBCK chore to run at master side.
1136+
*/
1137+
rpc RunHbckChore(RunHbckChoreRequest)
1138+
returns(RunHbckChoreResponse);
11261139
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,7 @@ public void run() {
374374
private ClusterStatusChore clusterStatusChore;
375375
private ClusterStatusPublisher clusterStatusPublisherChore = null;
376376

377-
private HbckChecker hbckChecker;
377+
private HbckChore hbckChore;
378378
CatalogJanitor catalogJanitorChore;
379379
private LogCleaner logCleaner;
380380
private HFileCleaner hfileCleaner;
@@ -1088,8 +1088,8 @@ private void finishActiveMasterInitialization(MonitoredTask status)
10881088
getChoreService().scheduleChore(normalizerChore);
10891089
this.catalogJanitorChore = new CatalogJanitor(this);
10901090
getChoreService().scheduleChore(catalogJanitorChore);
1091-
this.hbckChecker = new HbckChecker(this);
1092-
getChoreService().scheduleChore(hbckChecker);
1091+
this.hbckChore = new HbckChore(this);
1092+
getChoreService().scheduleChore(hbckChore);
10931093

10941094
// NAMESPACE READ!!!!
10951095
// Here we expect hbase:namespace to be online. See inside initClusterSchemaService.
@@ -1552,7 +1552,7 @@ private void stopChores() {
15521552
choreService.cancelChore(this.logCleaner);
15531553
choreService.cancelChore(this.hfileCleaner);
15541554
choreService.cancelChore(this.replicationBarrierCleaner);
1555-
choreService.cancelChore(this.hbckChecker);
1555+
choreService.cancelChore(this.hbckChore);
15561556
}
15571557
}
15581558

@@ -3673,7 +3673,7 @@ public Map<String, ReplicationStatus> getWalGroupsReplicationStatus() {
36733673
return super.getWalGroupsReplicationStatus();
36743674
}
36753675

3676-
public HbckChecker getHbckChecker() {
3677-
return this.hbckChecker;
3676+
public HbckChore getHbckChore() {
3677+
return this.hbckChore;
36783678
}
36793679
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChecker.java renamed to hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,11 @@
4747
*/
4848
@InterfaceAudience.Private
4949
@InterfaceStability.Evolving
50-
public class HbckChecker extends ScheduledChore {
51-
private static final Logger LOG = LoggerFactory.getLogger(HbckChecker.class.getName());
50+
public class HbckChore extends ScheduledChore {
51+
private static final Logger LOG = LoggerFactory.getLogger(HbckChore.class.getName());
5252

53-
private static final String HBCK_CHECKER_INTERVAL = "hbase.master.hbck.checker.interval";
54-
private static final int DEFAULT_HBCK_CHECKER_INTERVAL = 60 * 60 * 1000;
53+
private static final String HBCK_CHORE_INTERVAL = "hbase.master.hbck.chore.interval";
54+
private static final int DEFAULT_HBCK_CHORE_INTERVAL = 60 * 60 * 1000;
5555

5656
private final MasterServices master;
5757

@@ -100,14 +100,14 @@ public class HbckChecker extends ScheduledChore {
100100
private volatile long checkingStartTimestamp = 0;
101101
private volatile long checkingEndTimestamp = 0;
102102

103-
public HbckChecker(MasterServices master) {
104-
super("HbckChecker-", master,
105-
master.getConfiguration().getInt(HBCK_CHECKER_INTERVAL, DEFAULT_HBCK_CHECKER_INTERVAL));
103+
public HbckChore(MasterServices master) {
104+
super("HbckChore-", master,
105+
master.getConfiguration().getInt(HBCK_CHORE_INTERVAL, DEFAULT_HBCK_CHORE_INTERVAL));
106106
this.master = master;
107107
}
108108

109109
@Override
110-
protected void chore() {
110+
protected synchronized void chore() {
111111
running = true;
112112
regionInfoMap.clear();
113113
orphanRegionsOnRS.clear();
@@ -277,6 +277,6 @@ public long getCheckingStartTimestamp() {
277277
* Used for web ui to show when the HBCK checking report generated.
278278
*/
279279
public long getCheckingEndTimestamp() {
280-
return this.checkingStartTimestamp;
280+
return this.checkingEndTimestamp;
281281
}
282282
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,8 @@
254254
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunCatalogScanResponse;
255255
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunCleanerChoreRequest;
256256
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunCleanerChoreResponse;
257+
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunHbckChoreRequest;
258+
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunHbckChoreResponse;
257259
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SecurityCapabilitiesRequest;
258260
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SecurityCapabilitiesResponse;
259261
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetBalancerRunningRequest;
@@ -2314,6 +2316,20 @@ public ReportProcedureDoneResponse reportProcedureDone(RpcController controller,
23142316

23152317
// HBCK Services
23162318

2319+
@Override
2320+
public RunHbckChoreResponse runHbckChore(RpcController c, RunHbckChoreRequest req)
2321+
throws ServiceException {
2322+
rpcPreCheck("runHbckChore");
2323+
LOG.info("{} request HBCK chore to run", master.getClientIdAuditPrefix());
2324+
HbckChore hbckChore = master.getHbckChore();
2325+
boolean ran = false;
2326+
if (!hbckChore.isRunning()) {
2327+
hbckChore.chore();
2328+
ran = true;
2329+
}
2330+
return RunHbckChoreResponse.newBuilder().setRan(ran).build();
2331+
}
2332+
23172333
/**
23182334
* Update state of the table in meta only. This is required by hbck in some situations to cleanup
23192335
* stuck assign/ unassign regions procedures for the table.

hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,25 +23,25 @@
2323
import="java.util.Map"
2424
import="java.util.stream.Collectors"
2525
%>
26-
<%@ page import="org.apache.hadoop.hbase.master.HbckChecker" %>
26+
<%@ page import="org.apache.hadoop.hbase.master.HbckChore" %>
2727
<%@ page import="org.apache.hadoop.hbase.master.HMaster" %>
2828
<%@ page import="org.apache.hadoop.hbase.ServerName" %>
2929
<%@ page import="org.apache.hadoop.hbase.util.Pair" %>
3030
<%
3131
HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER);
3232
pageContext.setAttribute("pageTitle", "HBase Master HBCK Report: " + master.getServerName());
33-
HbckChecker hbckChecker = master.getHbckChecker();
33+
HbckChore hbckChore = master.getHbckChore();
3434
Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions = null;
3535
Map<String, ServerName> orphanRegionsOnRS = null;
3636
List<String> orphanRegionsOnFS = null;
3737
long startTimestamp = 0;
3838
long endTimestamp = 0;
39-
if (hbckChecker != null) {
40-
inconsistentRegions = hbckChecker.getInconsistentRegions();
41-
orphanRegionsOnRS = hbckChecker.getOrphanRegionsOnRS();
42-
orphanRegionsOnFS = hbckChecker.getOrphanRegionsOnFS();
43-
startTimestamp = hbckChecker.getCheckingStartTimestamp();
44-
endTimestamp = hbckChecker.getCheckingEndTimestamp();
39+
if (hbckChore != null) {
40+
inconsistentRegions = hbckChore.getInconsistentRegions();
41+
orphanRegionsOnRS = hbckChore.getOrphanRegionsOnRS();
42+
orphanRegionsOnFS = hbckChore.getOrphanRegionsOnFS();
43+
startTimestamp = hbckChore.getCheckingStartTimestamp();
44+
endTimestamp = hbckChore.getCheckingEndTimestamp();
4545
}
4646
%>
4747
<jsp:include page="header.jsp">

hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.apache.hadoop.hbase.HBaseTestingUtility;
2929
import org.apache.hadoop.hbase.ServerName;
3030
import org.apache.hadoop.hbase.TableName;
31+
import org.apache.hadoop.hbase.master.HMaster;
3132
import org.apache.hadoop.hbase.master.RegionState;
3233
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
3334
import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface;
@@ -223,6 +224,20 @@ public void testScheduleSCP() throws Exception {
223224
waitOnPids(pids);
224225
}
225226

227+
@Test
228+
public void testRunHbckChore() throws Exception {
229+
HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
230+
long endTimestamp = master.getHbckChore().getCheckingEndTimestamp();
231+
Hbck hbck = getHbck();
232+
boolean ran = false;
233+
while (!ran) {
234+
ran = hbck.runHbckChore();
235+
if (ran) {
236+
assertTrue(master.getHbckChore().getCheckingEndTimestamp() > endTimestamp);
237+
}
238+
}
239+
}
240+
226241
private void waitOnPids(List<Long> pids) {
227242
TEST_UTIL.waitFor(60000, () -> pids.stream().allMatch(procExec::isFinished));
228243
}

0 commit comments

Comments
 (0)