Skip to content

Commit 037e081

Browse files
committed
HBASE-22404 Open/Close region request may be executed twice when master restart
1 parent 62ad94c commit 037e081

File tree

4 files changed

+74
-19
lines changed

4 files changed

+74
-19
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
import java.util.concurrent.ConcurrentHashMap;
5252
import java.util.concurrent.ConcurrentMap;
5353
import java.util.concurrent.ConcurrentSkipListMap;
54+
import java.util.concurrent.TimeUnit;
5455
import java.util.concurrent.atomic.AtomicBoolean;
5556
import java.util.concurrent.locks.ReentrantReadWriteLock;
5657
import java.util.function.Function;
@@ -181,6 +182,8 @@
181182
import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
182183
import org.apache.hadoop.ipc.RemoteException;
183184
import org.apache.hadoop.util.ReflectionUtils;
185+
import org.apache.hbase.thirdparty.com.google.common.cache.Cache;
186+
import org.apache.hbase.thirdparty.com.google.common.cache.CacheBuilder;
184187
import org.apache.yetus.audience.InterfaceAudience;
185188
import org.apache.zookeeper.KeeperException;
186189
import org.slf4j.Logger;
@@ -255,6 +258,16 @@ public class HRegionServer extends HasThread implements
255258
protected final ConcurrentMap<byte[], Boolean> regionsInTransitionInRS =
256259
new ConcurrentSkipListMap<>(Bytes.BYTES_COMPARATOR);
257260

261+
/**
262+
* Used to cache the open/close region procedures which already submitted.
263+
*/
264+
private final ConcurrentMap<Long, Long> submittedRegionProcedures = new ConcurrentHashMap<>();
265+
/**
266+
* Used to cache the open/close region procedures which already executed.
267+
*/
268+
private final Cache<Long, Long> executedRegionProcedures =
269+
CacheBuilder.newBuilder().expireAfterAccess(600, TimeUnit.SECONDS).build();
270+
258271
// Cache flushing
259272
protected MemStoreFlusher cacheFlusher;
260273

@@ -3882,6 +3895,35 @@ void reportProcedureDone(ReportProcedureDoneRequest request) throws IOException
38823895
}
38833896
}
38843897

3898+
/**
3899+
* Will ignore the open/close region procedures which already submitted or executed.
3900+
* See HBASE-22404 for more details.
3901+
* @param procId the id of the open/close region procedure
3902+
* @return true if the procedure can be submitted.
3903+
*/
3904+
boolean submitRegionProcedure(long procId) {
3905+
if (procId == -1) {
3906+
return true;
3907+
}
3908+
// Ignore the region procedures which already submitted.
3909+
Long previous = submittedRegionProcedures.putIfAbsent(procId, procId);
3910+
if (previous != null) {
3911+
LOG.warn("Received procedure pid={}, which already submitted, just ignore it", procId);
3912+
return false;
3913+
}
3914+
// Ignore the region procedures which already executed.
3915+
if (executedRegionProcedures.getIfPresent(procId) != null) {
3916+
LOG.warn("Received procedure pid={}, which already executed, just ignore it", procId);
3917+
return false;
3918+
}
3919+
return true;
3920+
}
3921+
3922+
public void finishRegionProcedure(long procId) {
3923+
submittedRegionProcedures.remove(procId);
3924+
executedRegionProcedures.put(procId, procId);
3925+
}
3926+
38853927
public boolean isShutDown() {
38863928
return shutDown;
38873929
}

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3721,8 +3721,12 @@ private void executeOpenRegionProcedures(OpenRegionRequest request,
37213721
regionServer.updateRegionFavoredNodesMapping(regionInfo.getEncodedName(),
37223722
regionOpenInfo.getFavoredNodesList());
37233723
}
3724-
regionServer.executorService.submit(AssignRegionHandler.create(regionServer, regionInfo,
3725-
regionOpenInfo.getOpenProcId(), tableDesc, masterSystemTime));
3724+
long procId = regionOpenInfo.getOpenProcId();
3725+
if (regionServer.submitRegionProcedure(procId)) {
3726+
regionServer.executorService.submit(AssignRegionHandler
3727+
.create(regionServer, regionInfo, procId, tableDesc,
3728+
masterSystemTime));
3729+
}
37263730
}
37273731
}
37283732

@@ -3733,11 +3737,14 @@ private void executeCloseRegionProcedures(CloseRegionRequest request) {
37333737
} catch (DoNotRetryIOException e) {
37343738
throw new UncheckedIOException("Should not happen", e);
37353739
}
3736-
ServerName destination =
3737-
request.hasDestinationServer() ? ProtobufUtil.toServerName(request.getDestinationServer())
3738-
: null;
3739-
regionServer.executorService.submit(UnassignRegionHandler.create(regionServer, encodedName,
3740-
request.getCloseProcId(), false, destination));
3740+
ServerName destination = request.hasDestinationServer() ?
3741+
ProtobufUtil.toServerName(request.getDestinationServer()) :
3742+
null;
3743+
long procId = request.getCloseProcId();
3744+
if (regionServer.submitRegionProcedure(procId)) {
3745+
regionServer.executorService.submit(UnassignRegionHandler
3746+
.create(regionServer, encodedName, procId, false, destination));
3747+
}
37413748
}
37423749

37433750
private void executeProcedures(RemoteProcedureRequest request) {

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/AssignRegionHandler.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.apache.hadoop.hbase.executor.EventHandler;
2727
import org.apache.hadoop.hbase.executor.EventType;
2828
import org.apache.hadoop.hbase.regionserver.HRegion;
29+
import org.apache.hadoop.hbase.regionserver.HRegionServer;
2930
import org.apache.hadoop.hbase.regionserver.Region;
3031
import org.apache.hadoop.hbase.regionserver.RegionServerServices;
3132
import org.apache.hadoop.hbase.regionserver.RegionServerServices.PostOpenDeployContext;
@@ -59,7 +60,7 @@ public class AssignRegionHandler extends EventHandler {
5960

6061
private final RetryCounter retryCounter;
6162

62-
public AssignRegionHandler(RegionServerServices server, RegionInfo regionInfo, long openProcId,
63+
public AssignRegionHandler(HRegionServer server, RegionInfo regionInfo, long openProcId,
6364
@Nullable TableDescriptor tableDesc, long masterSystemTime, EventType eventType) {
6465
super(server, eventType);
6566
this.regionInfo = regionInfo;
@@ -69,8 +70,8 @@ public AssignRegionHandler(RegionServerServices server, RegionInfo regionInfo, l
6970
this.retryCounter = HandlerUtil.getRetryCounter();
7071
}
7172

72-
private RegionServerServices getServer() {
73-
return (RegionServerServices) server;
73+
private HRegionServer getServer() {
74+
return (HRegionServer) server;
7475
}
7576

7677
private void cleanUpAndReportFailure(IOException error) throws IOException {
@@ -87,7 +88,7 @@ private void cleanUpAndReportFailure(IOException error) throws IOException {
8788

8889
@Override
8990
public void process() throws IOException {
90-
RegionServerServices rs = getServer();
91+
HRegionServer rs = getServer();
9192
String encodedName = regionInfo.getEncodedName();
9293
byte[] encodedNameBytes = regionInfo.getEncodedNameAsBytes();
9394
String regionName = regionInfo.getRegionNameAsString();
@@ -101,7 +102,6 @@ public void process() throws IOException {
101102
// reportRegionStateTransition any more.
102103
return;
103104
}
104-
LOG.info("Open {}", regionName);
105105
Boolean previous = rs.getRegionsInTransitionInRS().putIfAbsent(encodedNameBytes, Boolean.TRUE);
106106
if (previous != null) {
107107
if (previous) {
@@ -121,6 +121,7 @@ public void process() throws IOException {
121121
}
122122
return;
123123
}
124+
LOG.info("Open {}", regionName);
124125
HRegion region;
125126
try {
126127
TableDescriptor htd =
@@ -139,6 +140,8 @@ public void process() throws IOException {
139140
rs.postOpenDeployTasks(new PostOpenDeployContext(region, openProcId, masterSystemTime));
140141
rs.addRegion(region);
141142
LOG.info("Opened {}", regionName);
143+
// Cache the open region procedure id after report region transition succeed.
144+
rs.finishRegionProcedure(openProcId);
142145
Boolean current = rs.getRegionsInTransitionInRS().remove(regionInfo.getEncodedNameAsBytes());
143146
if (current == null) {
144147
// Should NEVER happen, but let's be paranoid.
@@ -158,7 +161,7 @@ protected void handleException(Throwable t) {
158161
"Failed to open region " + regionInfo.getRegionNameAsString() + " and can not recover", t);
159162
}
160163

161-
public static AssignRegionHandler create(RegionServerServices server, RegionInfo regionInfo,
164+
public static AssignRegionHandler create(HRegionServer server, RegionInfo regionInfo,
162165
long openProcId, TableDescriptor tableDesc, long masterSystemTime) {
163166
EventType eventType;
164167
if (regionInfo.isMetaRegion()) {

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/UnassignRegionHandler.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.hadoop.hbase.executor.EventHandler;
2626
import org.apache.hadoop.hbase.executor.EventType;
2727
import org.apache.hadoop.hbase.regionserver.HRegion;
28+
import org.apache.hadoop.hbase.regionserver.HRegionServer;
2829
import org.apache.hadoop.hbase.regionserver.Region;
2930
import org.apache.hadoop.hbase.regionserver.RegionServerServices;
3031
import org.apache.hadoop.hbase.regionserver.RegionServerServices.RegionStateTransitionContext;
@@ -60,7 +61,7 @@ public class UnassignRegionHandler extends EventHandler {
6061

6162
private final RetryCounter retryCounter;
6263

63-
public UnassignRegionHandler(RegionServerServices server, String encodedName, long closeProcId,
64+
public UnassignRegionHandler(HRegionServer server, String encodedName, long closeProcId,
6465
boolean abort, @Nullable ServerName destination, EventType eventType) {
6566
super(server, eventType);
6667
this.encodedName = encodedName;
@@ -70,13 +71,13 @@ public UnassignRegionHandler(RegionServerServices server, String encodedName, lo
7071
this.retryCounter = HandlerUtil.getRetryCounter();
7172
}
7273

73-
private RegionServerServices getServer() {
74-
return (RegionServerServices) server;
74+
private HRegionServer getServer() {
75+
return (HRegionServer) server;
7576
}
7677

7778
@Override
7879
public void process() throws IOException {
79-
RegionServerServices rs = getServer();
80+
HRegionServer rs = getServer();
8081
byte[] encodedNameBytes = Bytes.toBytes(encodedName);
8182
Boolean previous = rs.getRegionsInTransitionInRS().putIfAbsent(encodedNameBytes, Boolean.FALSE);
8283
if (previous != null) {
@@ -94,7 +95,7 @@ public void process() throws IOException {
9495
}
9596
return;
9697
}
97-
HRegion region = (HRegion) rs.getRegion(encodedName);
98+
HRegion region = rs.getRegion(encodedName);
9899
if (region == null) {
99100
LOG.debug(
100101
"Received CLOSE for a region {} which is not online, and we're not opening/closing.",
@@ -124,6 +125,8 @@ public void process() throws IOException {
124125
HConstants.NO_SEQNUM, closeProcId, -1, region.getRegionInfo()))) {
125126
throw new IOException("Failed to report close to master: " + regionName);
126127
}
128+
// Cache the close region procedure id after report region transition succeed.
129+
rs.finishRegionProcedure(closeProcId);
127130
rs.getRegionsInTransitionInRS().remove(encodedNameBytes, Boolean.FALSE);
128131
LOG.info("Closed {}", regionName);
129132
}
@@ -134,7 +137,7 @@ protected void handleException(Throwable t) {
134137
getServer().abort("Failed to close region " + encodedName + " and can not recover", t);
135138
}
136139

137-
public static UnassignRegionHandler create(RegionServerServices server, String encodedName,
140+
public static UnassignRegionHandler create(HRegionServer server, String encodedName,
138141
long closeProcId, boolean abort, @Nullable ServerName destination) {
139142
// Just try our best to determine whether it is for closing meta. It is not the end of the world
140143
// if we put the handler into a wrong executor.

0 commit comments

Comments
 (0)