From 6db4b16feb853af893117098b56bd44fc4dff907 Mon Sep 17 00:00:00 2001 From: Rushabh Date: Thu, 15 Aug 2019 09:05:58 -0700 Subject: [PATCH 1/3] [HBASE-22601] Misconfigured addition of peers leads to cluster shutdown. --- .../replication/regionserver/ReplicationSource.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java index 7e36d625c02c..945808d687a0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java @@ -504,6 +504,15 @@ private void initialize() { } } + if (peerClusterId == null) { + // In some cases, it is possible that peerClusterId is null because it couldn't read + // peer cluster id from zookeeper. One case this might happen is because 2 clusters don't + // have kerberos trust setup. + this.terminate("Peer ClusterId returned is null", null, false); + this.manager.removeSource(this); + return; + } + // In rare case, zookeeper setting may be messed up. That leads to the incorrect // peerClusterId value, which is the same as the source clusterId if (clusterId.equals(peerClusterId) && !replicationEndpoint.canReplicateToSameCluster()) { From c9c41eed7b717e9a9326dc08bf2f1ee53f3d9bf4 Mon Sep 17 00:00:00 2001 From: Rushabh Date: Sat, 17 Aug 2019 16:07:27 -0700 Subject: [PATCH 2/3] [HBASE-22601] Addresing test failures. --- .../replication/regionserver/ReplicationSourceManager.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java index 12a903ac115e..ddc55f85dfb3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java @@ -484,8 +484,10 @@ public void refreshSources(String peerId) throws IOException { LOG.info("Terminate replication source for " + toRemove.getPeerId()); toRemove.terminate(terminateMessage); } - for (NavigableSet walsByGroup : walsById.get(peerId).values()) { - walsByGroup.forEach(wal -> src.enqueueLog(new Path(this.logDir, wal))); + if (walsById.get(peerId) != null) { + for (NavigableSet walsByGroup : walsById.get(peerId).values()) { + walsByGroup.forEach(wal -> src.enqueueLog(new Path(this.logDir, wal))); + } } } LOG.info("Startup replication source for " + src.getPeerId()); From c5b7f7e9832886cfc0164e7a0efae478b94055c9 Mon Sep 17 00:00:00 2001 From: Rushabh Date: Sat, 17 Aug 2019 21:57:23 -0700 Subject: [PATCH 3/3] [HBASE-22601] Addresing review comments. --- .../hbase/replication/regionserver/ReplicationSource.java | 7 +------ .../replication/regionserver/ReplicationSourceManager.java | 6 ++---- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java index 945808d687a0..948c24dc4f86 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java @@ -504,12 +504,7 @@ private void initialize() { } } - if (peerClusterId == null) { - // In some cases, it is possible that peerClusterId is null because it couldn't read - // peer cluster id from zookeeper. One case this might happen is because 2 clusters don't - // have kerberos trust setup. - this.terminate("Peer ClusterId returned is null", null, false); - this.manager.removeSource(this); + if(!this.isSourceActive()) { return; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java index ddc55f85dfb3..12a903ac115e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java @@ -484,10 +484,8 @@ public void refreshSources(String peerId) throws IOException { LOG.info("Terminate replication source for " + toRemove.getPeerId()); toRemove.terminate(terminateMessage); } - if (walsById.get(peerId) != null) { - for (NavigableSet walsByGroup : walsById.get(peerId).values()) { - walsByGroup.forEach(wal -> src.enqueueLog(new Path(this.logDir, wal))); - } + for (NavigableSet walsByGroup : walsById.get(peerId).values()) { + walsByGroup.forEach(wal -> src.enqueueLog(new Path(this.logDir, wal))); } } LOG.info("Startup replication source for " + src.getPeerId());