From 61450e626a9aa1a4ffebe4cbce7bee020c310934 Mon Sep 17 00:00:00 2001 From: Caroline Zhou Date: Fri, 20 Aug 2021 12:12:35 -0700 Subject: [PATCH 1/5] HBASE-26163 Better logging in RSGroupInfoManagerImpl --- .../rsgroup/RSGroupBasedLoadBalancer.java | 6 ++++ .../hbase/rsgroup/RSGroupInfoManagerImpl.java | 34 +++++++++++++++---- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java index e2e9945a8321..12a66027d5d7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java @@ -26,6 +26,8 @@ import java.util.Map; import java.util.Set; import java.util.TreeMap; +import java.util.stream.Collectors; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ClusterMetrics; import org.apache.hadoop.hbase.HBaseIOException; @@ -129,6 +131,7 @@ public synchronized List balanceCluster( try { // For each rsgroup for (RSGroupInfo rsgroup : rsGroupInfoManager.listRSGroups()) { + LOG.debug("Balancing RSGroup={}", rsgroup.getName()); Map>> loadOfTablesInGroup = new HashMap<>(); for (Map.Entry>> entry : correctedLoadOfAllTable .entrySet()) { @@ -234,6 +237,9 @@ private List, List>> generateGroupAssignments( if (!fallbackRegions.isEmpty()) { List candidates = null; if (isFallbackEnabled()) { + LOG.debug("Falling back {} regions to servers outside their RSGroup. Regions: {}", + fallbackRegions.size(), fallbackRegions.stream().map(RegionInfo::getRegionNameAsString) + .collect(Collectors.toSet())); candidates = getFallBackCandidates(servers); } candidates = (candidates == null || candidates.isEmpty()) ? diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java index 3ef9365456fd..aa56e19f5dbc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java @@ -256,7 +256,9 @@ private synchronized void updateDefaultServers() { newGroupMap.put(RSGroupInfo.DEFAULT_GROUP, newDefaultGroupInfo); // do not need to persist, as we do not persist default group. resetRSGroupMap(newGroupMap); - LOG.info("Updated default servers, {} servers", newDefaultGroupInfo.getServers().size()); + LOG.info("Updated default servers, now {} servers online: {}", + newDefaultGroupInfo.getServers().size(), + newDefaultGroupInfo.getServers().stream().map(Address::toString).collect(Collectors.toSet())); } private synchronized void init() throws IOException { @@ -298,6 +300,7 @@ public synchronized void addRSGroup(RSGroupInfo rsGroupInfo) throws IOException Map newGroupMap = Maps.newHashMap(rsGroupMap); newGroupMap.put(rsGroupInfo.getName(), rsGroupInfo); flushConfig(newGroupMap); + LOG.info("Add group {} done.", rsGroupInfo.getName()); } private RSGroupInfo getRSGroupInfo(final String groupName) throws ConstraintException { @@ -331,7 +334,7 @@ public synchronized Set
moveServers(Set
servers, String srcGro if (onlineServers != null) { if (!onlineServers.contains(el)) { if (LOG.isDebugEnabled()) { - LOG.debug("Dropping " + el + " during move-to-default rsgroup because not online"); + LOG.debug("Dropping " + el + " during move-to-default RSGroup because not online"); } continue; } @@ -373,8 +376,8 @@ public synchronized void removeRSGroup(String groupName) throws IOException { for (TableDescriptor td : masterServices.getTableDescriptors().getAll().values()) { if (td.getRegionServerGroup().map(groupName::equals).orElse(false)) { throw new ConstraintException("RSGroup " + groupName + " is already referenced by " + - td.getTableName() + "; you must remove all the tables from the rsgroup before " + - "the rsgroup can be removed."); + td.getTableName() + "; you must remove all the tables from the RSGroup before " + + "the RSGroup can be removed."); } } for (NamespaceDescriptor ns : masterServices.getClusterSchema().getNamespaces()) { @@ -392,6 +395,7 @@ public synchronized void removeRSGroup(String groupName) throws IOException { Map newGroupMap = Maps.newHashMap(rsGroupMap); newGroupMap.remove(groupName); flushConfig(newGroupMap); + LOG.info("Remove group {} done", groupName); } @Override @@ -639,6 +643,8 @@ private synchronized void flushConfig(Map newGroupMap) thro return; } + LOG.debug("Offline mode, cannot persist to {}", RSGROUP_TABLE_NAME); + Map oldGroupMap = Maps.newHashMap(holder.groupName2Group); RSGroupInfo oldDefaultGroup = oldGroupMap.remove(RSGroupInfo.DEFAULT_GROUP); RSGroupInfo newDefaultGroup = newGroupMap.remove(RSGroupInfo.DEFAULT_GROUP); @@ -655,6 +661,8 @@ private synchronized void flushConfig(Map newGroupMap) thro // according to the inputted newGroupMap (an updated copy of rsGroupMap) this.holder = new RSGroupInfoHolder(newGroupMap); + LOG.info("New RSGroup map: {}", newGroupMap.toString()); + // Do not need to update tableMap // because only the update on servers in default group is allowed above, // or IOException will be thrown @@ -662,15 +670,18 @@ private synchronized void flushConfig(Map newGroupMap) thro } /* For online mode, persist to hbase:rsgroup and Zookeeper */ + LOG.debug("Online mode, persisting to {} and ZK", RSGROUP_TABLE_NAME); flushConfigTable(newGroupMap); // Make changes visible after having been persisted to the source of truth resetRSGroupMap(newGroupMap); saveRSGroupMapToZK(newGroupMap); updateCacheOfRSGroups(newGroupMap.keySet()); + LOG.info("Flush config done, new RSGroup map: {}", newGroupMap.toString()); } private void saveRSGroupMapToZK(Map newGroupMap) throws IOException { + LOG.debug("Saving RSGroup info to ZK"); try { String groupBasePath = ZNodePaths.joinZNode(watcher.getZNodePaths().baseZNode, RS_GROUP_ZNODE); @@ -800,6 +811,7 @@ private void createRSGroupTable() throws IOException { if (optProcId.isPresent()) { procId = optProcId.getAsLong(); } else { + LOG.debug("Creating group table {}", RSGROUP_TABLE_NAME); procId = masterServices.createSystemTable(RSGROUP_TABLE_DESC); } // wait for region to be online @@ -849,9 +861,11 @@ private void multiMutate(List mutations) throws IOException { } MutateRowsRequest request = builder.build(); AsyncTable table = conn.getTable(RSGROUP_TABLE_NAME); + LOG.debug("Multimutating {} with {} mutations", RSGROUP_TABLE_NAME, mutations.size()); FutureUtils.get(table. coprocessorService( MultiRowMutationService::newStub, (stub, controller, done) -> stub.mutateRows(controller, request, done), ROW_KEY)); + LOG.info("Multimutating {} with {} mutations done", RSGROUP_TABLE_NAME, mutations.size()); } private void checkGroupName(String groupName) throws ConstraintException { @@ -871,7 +885,7 @@ public RSGroupInfo getRSGroupForTable(TableName tableName) throws IOException { * @param servers servers to remove */ private void checkForDeadOrOnlineServers(Set
servers) throws IOException { - // This uglyness is because we only have Address, not ServerName. + // This ugliness is because we only have Address, not ServerName. Set
onlineServers = new HashSet<>(); List drainingServers = masterServices.getServerManager().getDrainingServersList(); for (ServerName server : masterServices.getServerManager().getOnlineServers().keySet()) { @@ -1001,7 +1015,7 @@ private void moveRegionsBetweenGroups(Set regionsOwners, Set
new // Get regions that are associated with this server and filter regions by group tables. for (RegionInfo region : getRegionsInfo.apply((T) owner.getAddress())) { if (!validation.apply(region)) { - LOG.info("Moving region {}, which do not belong to RSGroup {}", + LOG.info("Moving region {}, which does not belong to RSGroup {}", region.getShortNameToLog(), targetGroupName); // Move region back to source RSGroup servers ServerName dest = @@ -1186,6 +1200,7 @@ public boolean balanceRSGroup(String groupName) throws IOException { } private void moveTablesAndWait(Set tables, String targetGroup) throws IOException { + LOG.debug("Moving {} tables to target group {}", tables.size(), targetGroup); List procIds = new ArrayList(); for (TableName tableName : tables) { TableDescriptor oldTd = masterServices.getTableDescriptors().get(tableName); @@ -1205,6 +1220,8 @@ private void moveTablesAndWait(Set tables, String targetGroup) throws ProcedureSyncWait.waitForProcedureToCompleteIOE(masterServices.getMasterProcedureExecutor(), proc, Long.MAX_VALUE); } + LOG.info("Move tables done. Moved {} tables to {}: {}", tables.size(), targetGroup, + tables.stream().map(TableName::getNameAsString).collect(Collectors.toSet())); } @Override @@ -1270,7 +1287,9 @@ public void moveServers(Set
servers, String targetGroupName) throws IOE Set
movedServers = moveServers(servers, srcGrp.getName(), targetGroupName); moveServerRegionsFromGroup(movedServers, srcGrp.getServers(), targetGroupName, srcGrp.getName()); - LOG.info("Move servers done: {} => {}", srcGrp.getName(), targetGroupName); + LOG.info("Move servers done. Moved {} servers {} => {}: {}", movedServers.size(), + srcGrp.getName(), targetGroupName, + movedServers.stream().map(Address::toString).collect(Collectors.toSet())); } } @@ -1304,6 +1323,7 @@ public synchronized void renameRSGroup(String oldName, String newName) throws IO .map(TableDescriptor::getTableName) .collect(Collectors.toSet()); setRSGroup(updateTables, newName); + LOG.info("Rename RSGroup done: {} => {}", oldName, newName); } @Override From c8145e000d927bb16a7107d6e5ebff11324f7d93 Mon Sep 17 00:00:00 2001 From: Caroline Zhou Date: Mon, 23 Aug 2021 13:55:41 -0700 Subject: [PATCH 2/5] move long messages to debug level --- .../hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java | 8 +++++--- .../hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java | 10 ++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java index 12a66027d5d7..4e3c0f799d2f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupBasedLoadBalancer.java @@ -237,9 +237,11 @@ private List, List>> generateGroupAssignments( if (!fallbackRegions.isEmpty()) { List candidates = null; if (isFallbackEnabled()) { - LOG.debug("Falling back {} regions to servers outside their RSGroup. Regions: {}", - fallbackRegions.size(), fallbackRegions.stream().map(RegionInfo::getRegionNameAsString) - .collect(Collectors.toSet())); + if (LOG.isDebugEnabled()) { + LOG.debug("Falling back {} regions to servers outside their RSGroup. Regions: {}", + fallbackRegions.size(), fallbackRegions.stream() + .map(RegionInfo::getRegionNameAsString).collect(Collectors.toSet())); + } candidates = getFallBackCandidates(servers); } candidates = (candidates == null || candidates.isEmpty()) ? diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java index aa56e19f5dbc..1e2f11464bb8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java @@ -256,9 +256,11 @@ private synchronized void updateDefaultServers() { newGroupMap.put(RSGroupInfo.DEFAULT_GROUP, newDefaultGroupInfo); // do not need to persist, as we do not persist default group. resetRSGroupMap(newGroupMap); - LOG.info("Updated default servers, now {} servers online: {}", - newDefaultGroupInfo.getServers().size(), - newDefaultGroupInfo.getServers().stream().map(Address::toString).collect(Collectors.toSet())); + LOG.info("Updated default servers, {} servers", newDefaultGroupInfo.getServers().size()); + if (LOG.isDebugEnabled()) { + LOG.debug("New default servers list: {}", newDefaultGroupInfo.getServers().stream() + .map(Address::toString).collect(Collectors.toSet())); + } } private synchronized void init() throws IOException { @@ -661,7 +663,7 @@ private synchronized void flushConfig(Map newGroupMap) thro // according to the inputted newGroupMap (an updated copy of rsGroupMap) this.holder = new RSGroupInfoHolder(newGroupMap); - LOG.info("New RSGroup map: {}", newGroupMap.toString()); + LOG.debug("New RSGroup map: {}", newGroupMap.toString()); // Do not need to update tableMap // because only the update on servers in default group is allowed above, From e7aeb13457611b72b1d5e8485ede131795848597 Mon Sep 17 00:00:00 2001 From: Caroline Zhou Date: Mon, 23 Aug 2021 21:00:37 -0700 Subject: [PATCH 3/5] pr rev --- .../apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java index 1e2f11464bb8..e4cb3d48f16b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java @@ -663,7 +663,7 @@ private synchronized void flushConfig(Map newGroupMap) thro // according to the inputted newGroupMap (an updated copy of rsGroupMap) this.holder = new RSGroupInfoHolder(newGroupMap); - LOG.debug("New RSGroup map: {}", newGroupMap.toString()); + LOG.debug("New RSGroup map: {}", newGroupMap); // Do not need to update tableMap // because only the update on servers in default group is allowed above, @@ -679,7 +679,7 @@ private synchronized void flushConfig(Map newGroupMap) thro resetRSGroupMap(newGroupMap); saveRSGroupMapToZK(newGroupMap); updateCacheOfRSGroups(newGroupMap.keySet()); - LOG.info("Flush config done, new RSGroup map: {}", newGroupMap.toString()); + LOG.info("Flush config done, new RSGroup map: {}", newGroupMap); } private void saveRSGroupMapToZK(Map newGroupMap) throws IOException { From 608893ba58b56429fe8d086427c5279fc3bc48fc Mon Sep 17 00:00:00 2001 From: Caroline Zhou Date: Wed, 25 Aug 2021 18:06:04 -0700 Subject: [PATCH 4/5] remove tostring --- .../hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java index e4cb3d48f16b..fd544746fa9f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java @@ -258,8 +258,7 @@ private synchronized void updateDefaultServers() { resetRSGroupMap(newGroupMap); LOG.info("Updated default servers, {} servers", newDefaultGroupInfo.getServers().size()); if (LOG.isDebugEnabled()) { - LOG.debug("New default servers list: {}", newDefaultGroupInfo.getServers().stream() - .map(Address::toString).collect(Collectors.toSet())); + LOG.debug("New default servers list: {}", newDefaultGroupInfo.getServers()); } } @@ -1222,8 +1221,7 @@ private void moveTablesAndWait(Set tables, String targetGroup) throws ProcedureSyncWait.waitForProcedureToCompleteIOE(masterServices.getMasterProcedureExecutor(), proc, Long.MAX_VALUE); } - LOG.info("Move tables done. Moved {} tables to {}: {}", tables.size(), targetGroup, - tables.stream().map(TableName::getNameAsString).collect(Collectors.toSet())); + LOG.info("Move tables done. Moved {} tables to {}: {}", tables.size(), targetGroup, tables); } @Override @@ -1289,9 +1287,8 @@ public void moveServers(Set
servers, String targetGroupName) throws IOE Set
movedServers = moveServers(servers, srcGrp.getName(), targetGroupName); moveServerRegionsFromGroup(movedServers, srcGrp.getServers(), targetGroupName, srcGrp.getName()); - LOG.info("Move servers done. Moved {} servers {} => {}: {}", movedServers.size(), - srcGrp.getName(), targetGroupName, - movedServers.stream().map(Address::toString).collect(Collectors.toSet())); + LOG.info("Move servers done. Moved {} servers from {} => {}: {}", movedServers.size(), + srcGrp.getName(), targetGroupName, movedServers); } } From 1c7711ace83fe8101c6b585ada00ebeda6e2b26d Mon Sep 17 00:00:00 2001 From: Caroline Zhou Date: Thu, 26 Aug 2021 17:18:13 -0700 Subject: [PATCH 5/5] move lists to debug --- .../hbase/rsgroup/RSGroupInfoManagerImpl.java | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java index fd544746fa9f..be13d949f981 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java @@ -1221,7 +1221,10 @@ private void moveTablesAndWait(Set tables, String targetGroup) throws ProcedureSyncWait.waitForProcedureToCompleteIOE(masterServices.getMasterProcedureExecutor(), proc, Long.MAX_VALUE); } - LOG.info("Move tables done. Moved {} tables to {}: {}", tables.size(), targetGroup, tables); + LOG.info("Move tables done: moved {} tables to {}", tables.size(), targetGroup); + if (LOG.isDebugEnabled()) { + LOG.debug("Tables moved to {}: {}", targetGroup, tables); + } } @Override @@ -1287,8 +1290,12 @@ public void moveServers(Set
servers, String targetGroupName) throws IOE Set
movedServers = moveServers(servers, srcGrp.getName(), targetGroupName); moveServerRegionsFromGroup(movedServers, srcGrp.getServers(), targetGroupName, srcGrp.getName()); - LOG.info("Move servers done. Moved {} servers from {} => {}: {}", movedServers.size(), - srcGrp.getName(), targetGroupName, movedServers); + LOG.info("Move servers done: moved {} servers from {} to {}", movedServers.size(), + srcGrp.getName(), targetGroupName); + if (LOG.isDebugEnabled()) { + LOG.debug("Servers moved from {} to {}: {}", srcGrp.getName(), targetGroupName, + movedServers); + } } }