Skip to content

Commit a96047d

Browse files
frostruanhuiruan
authored andcommitted
HBASE-27988 NPE in AddPeerProcedure recovery (#5331)
Co-authored-by: huiruan <[email protected]> Signed-off-by: Duo Zhang <[email protected]> (cherry picked from commit 67b20fd)
1 parent 331970c commit a96047d

File tree

6 files changed

+25
-16
lines changed

6 files changed

+25
-16
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
import java.util.Set;
5555
import java.util.concurrent.ExecutionException;
5656
import java.util.concurrent.Future;
57+
import java.util.concurrent.Semaphore;
5758
import java.util.concurrent.TimeUnit;
5859
import java.util.concurrent.TimeoutException;
5960
import java.util.concurrent.atomic.AtomicInteger;
@@ -368,6 +369,9 @@ public class HMaster extends HBaseServerBase<MasterRpcServices> implements Maste
368369
private final ReplicationLogCleanerBarrier replicationLogCleanerBarrier =
369370
new ReplicationLogCleanerBarrier();
370371

372+
// Only allow to add one sync replication peer concurrently
373+
private final Semaphore syncReplicationPeerLock = new Semaphore(1);
374+
371375
// manager of replication
372376
private ReplicationPeerManager replicationPeerManager;
373377

@@ -4115,6 +4119,11 @@ public ReplicationLogCleanerBarrier getReplicationLogCleanerBarrier() {
41154119
return replicationLogCleanerBarrier;
41164120
}
41174121

4122+
@Override
4123+
public Semaphore getSyncReplicationPeerLock() {
4124+
return syncReplicationPeerLock;
4125+
}
4126+
41184127
public HashMap<String, List<Pair<ServerName, ReplicationLoadSource>>>
41194128
getReplicationLoad(ServerName[] serverNames) {
41204129
List<ReplicationPeerDescription> peerList = this.getReplicationPeerManager().listPeers(null);

hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import java.io.IOException;
2121
import java.util.List;
22+
import java.util.concurrent.Semaphore;
2223
import org.apache.hadoop.hbase.Server;
2324
import org.apache.hadoop.hbase.ServerName;
2425
import org.apache.hadoop.hbase.TableDescriptors;
@@ -368,6 +369,11 @@ ReplicationPeerConfig getReplicationPeerConfig(String peerId)
368369
*/
369370
ReplicationLogCleanerBarrier getReplicationLogCleanerBarrier();
370371

372+
/**
373+
* Returns the SyncReplicationPeerLock.
374+
*/
375+
Semaphore getSyncReplicationPeerLock();
376+
371377
/**
372378
* Returns the {@link SyncReplicationReplayWALManager}.
373379
*/

hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AddPeerProcedure.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ protected void releaseLatch(MasterProcedureEnv env) {
8989
env.getMasterServices().getReplicationLogCleanerBarrier().enable();
9090
}
9191
if (peerConfig.isSyncReplication()) {
92-
env.getReplicationPeerManager().releaseSyncReplicationPeerLock();
92+
env.getMasterServices().getSyncReplicationPeerLock().release();
9393
}
9494
super.releaseLatch(env);
9595
}
@@ -108,7 +108,7 @@ protected void prePeerModification(MasterProcedureEnv env)
108108
cpHost.preAddReplicationPeer(peerId, peerConfig);
109109
}
110110
if (peerConfig.isSyncReplication()) {
111-
if (!env.getReplicationPeerManager().tryAcquireSyncReplicationPeerLock()) {
111+
if (!env.getMasterServices().getSyncReplicationPeerLock().tryAcquire()) {
112112
throw suspend(env.getMasterConfiguration(),
113113
backoff -> LOG.warn(
114114
"Can not acquire sync replication peer lock for peer {}, sleep {} secs", peerId,
@@ -147,7 +147,7 @@ protected void afterReplay(MasterProcedureEnv env) {
147147
}
148148
cleanerDisabled = true;
149149
if (peerConfig.isSyncReplication()) {
150-
if (!env.getReplicationPeerManager().tryAcquireSyncReplicationPeerLock()) {
150+
if (!env.getMasterServices().getSyncReplicationPeerLock().tryAcquire()) {
151151
throw new IllegalStateException(
152152
"Can not acquire sync replication peer lock for peer " + peerId);
153153
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/AssignReplicationQueuesProcedure.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ protected Flow executeFromState(MasterProcedureEnv env, AssignReplicationQueuesS
186186
retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
187187
}
188188
long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
189-
LOG.warn("Failed to claim replication queues for {}, suspend {}secs {}; {};", crashedServer,
189+
LOG.warn("Failed to claim replication queues for {}, suspend {} secs", crashedServer,
190190
backoff / 1000, e);
191191
setTimeout(Math.toIntExact(backoff));
192192
setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);

hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/ReplicationPeerManager.java

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
import java.util.concurrent.ConcurrentHashMap;
3333
import java.util.concurrent.ConcurrentMap;
3434
import java.util.concurrent.ExecutorService;
35-
import java.util.concurrent.Semaphore;
3635
import java.util.concurrent.TimeUnit;
3736
import java.util.regex.Pattern;
3837
import java.util.stream.Collectors;
@@ -111,9 +110,6 @@ public class ReplicationPeerManager implements ConfigurationObserver {
111110
SyncReplicationState.DOWNGRADE_ACTIVE,
112111
EnumSet.of(SyncReplicationState.STANDBY, SyncReplicationState.ACTIVE)));
113112

114-
// Only allow to add one sync replication peer concurrently
115-
private final Semaphore syncReplicationPeerLock = new Semaphore(1);
116-
117113
private final String clusterId;
118114

119115
private volatile Configuration conf;
@@ -713,14 +709,6 @@ private boolean isStringEquals(String s1, String s2) {
713709
return s1.equals(s2);
714710
}
715711

716-
public boolean tryAcquireSyncReplicationPeerLock() {
717-
return syncReplicationPeerLock.tryAcquire();
718-
}
719-
720-
public void releaseSyncReplicationPeerLock() {
721-
syncReplicationPeerLock.release();
722-
}
723-
724712
@Override
725713
public void onConfigurationChange(Configuration conf) {
726714
this.conf = conf;

hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import java.io.IOException;
2323
import java.util.List;
24+
import java.util.concurrent.Semaphore;
2425
import org.apache.hadoop.conf.Configuration;
2526
import org.apache.hadoop.fs.FileSystem;
2627
import org.apache.hadoop.hbase.ChoreService;
@@ -530,4 +531,9 @@ public boolean isReplicationPeerModificationEnabled() {
530531
public ReplicationLogCleanerBarrier getReplicationLogCleanerBarrier() {
531532
return null;
532533
}
534+
535+
@Override
536+
public Semaphore getSyncReplicationPeerLock() {
537+
return null;
538+
}
533539
}

0 commit comments

Comments
 (0)