From 7581fb83795f293961a7cc87693cffb174525fc3 Mon Sep 17 00:00:00 2001 From: comnetwork Date: Wed, 2 Feb 2022 20:22:21 +0800 Subject: [PATCH 1/4] HBASE-26712 Balancer encounters NPE in rare case --- .../apache/hadoop/hbase/master/HMaster.java | 13 ++ .../master/assignment/AssignmentManager.java | 4 +- .../hbase/master/TestMasterBalancerNPE.java | 201 ++++++++++++++++++ 3 files changed, 216 insertions(+), 2 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalancerNPE.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 8dd49ff4d04f..29bf74749eca 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -4095,4 +4095,17 @@ public List getMetaLocations() { public Collection getLiveRegionServers() { return regionServerTracker.getRegionServers(); } + + @RestrictedApi(explanation = "Should only be called in tests", link = "", + allowedOnPath = ".*/src/test/.*") + void setLoadBalancer(RSGroupBasedLoadBalancer loadBalancer) { + this.balancer = loadBalancer; + } + + @RestrictedApi(explanation = "Should only be called in tests", link = "", + allowedOnPath = ".*/src/test/.*") + void setAssignmentManager(AssignmentManager assignmentManager) { + this.assignmentManager = assignmentManager; + } + } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index 015883a79b95..d94edea912a2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -829,9 +829,9 @@ public Future moveAsync(RegionPlan regionPlan) throws HBaseIOException { public Future balance(RegionPlan regionPlan) throws HBaseIOException { ServerName current = this.getRegionStates().getRegionAssignments().get(regionPlan.getRegionInfo()); - if (!current.equals(regionPlan.getSource())) { + if (current == null || !current.equals(regionPlan.getSource())) { LOG.debug("Skip region plan {}, source server not match, current region location is {}", - regionPlan, current); + regionPlan, current == null ? "(null)" : current); return null; } return moveAsync(regionPlan); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalancerNPE.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalancerNPE.java new file mode 100644 index 000000000000..f3c959e61d36 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalancerNPE.java @@ -0,0 +1,201 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.atomic.AtomicReference; + +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.master.assignment.AssignmentManager; +import org.apache.hadoop.hbase.rsgroup.RSGroupBasedLoadBalancer; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.After; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; +import org.mockito.Mockito; +import org.mockito.invocation.InvocationOnMock; + +@Category({ MasterTests.class, MediumTests.class }) +public class TestMasterBalancerNPE { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestMasterBalancerNPE.class); + + private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); + private static final byte[] FAMILYNAME = Bytes.toBytes("fam"); + @Rule + public TestName name = new TestName(); + + @Before + public void setupConfiguration() { + TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_BALANCER_PERIOD, Integer.MAX_VALUE); + } + + @After + public void shutdown() throws Exception { + TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_BALANCER_PERIOD, + HConstants.DEFAULT_HBASE_BALANCER_PERIOD); + TEST_UTIL.shutdownMiniCluster(); + } + + /** + * This test is for HBASE-26712, to make the region is unassigned just before + * {@link AssignmentManager#balance} is invoked on the region. + */ + @Test + public void testBalancerNPE() throws Exception { + TEST_UTIL.startMiniCluster(2); + TEST_UTIL.getAdmin().balancerSwitch(false, true); + TableName tableName = createTable(name.getMethodName()); + final HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); + List regionInfos = TEST_UTIL.getAdmin().getRegions(tableName); + assertTrue(regionInfos.size() == 1); + final ServerName serverName1 = + TEST_UTIL.getMiniHBaseCluster().getRegionServer(0).getServerName(); + final ServerName serverName2 = + TEST_UTIL.getMiniHBaseCluster().getRegionServer(1).getServerName(); + + final RegionInfo regionInfo = regionInfos.get(0); + + RSGroupBasedLoadBalancer loadBalancer = master.getLoadBalancer(); + RSGroupBasedLoadBalancer spiedLoadBalancer = Mockito.spy(loadBalancer); + final AtomicReference regionPlanRef = new AtomicReference(); + + /** + * Mock {@link RSGroupBasedLoadBalancer#balanceCluster} to return the {@link RegionPlan} to move + * the only region to the other RegionServer. + */ + Mockito.doAnswer((InvocationOnMock invocation) -> { + @SuppressWarnings("unchecked") + Map>> tableNameToRegionServerNameToRegionInfos = + (Map>>) invocation.getArgument(0); + Map> regionServerNameToRegionInfos = + tableNameToRegionServerNameToRegionInfos.get(tableName); + assertTrue(regionServerNameToRegionInfos.size() == 2); + List assignedRegionServerNames = new ArrayList(); + for (Map.Entry> entry : regionServerNameToRegionInfos + .entrySet()) { + if (entry.getValue().size() > 0) { + assignedRegionServerNames.add(entry.getKey()); + } + } + assertTrue(assignedRegionServerNames.size() == 1); + ServerName assignedRegionServerName = assignedRegionServerNames.get(0); + ServerName notAssignedRegionServerName = + assignedRegionServerName.equals(serverName1) ? serverName2 : serverName1; + RegionPlan regionPlan = + new RegionPlan(regionInfo, assignedRegionServerName, notAssignedRegionServerName); + regionPlanRef.set(regionPlan); + return Arrays.asList(regionPlan); + }).when(spiedLoadBalancer).balanceCluster(Mockito.anyMap()); + + AssignmentManager assignmentManager = master.getAssignmentManager(); + final AssignmentManager spiedAssignmentManager = Mockito.spy(assignmentManager); + final CyclicBarrier cyclicBarrier = new CyclicBarrier(2); + + /** + * Override {@link AssignmentManager#balance} to invoke real {@link AssignmentManager#balance} + * after the region is successfully unassigned. + */ + Mockito.doAnswer((InvocationOnMock invocation) -> { + RegionPlan regionPlan = invocation.getArgument(0); + RegionPlan referedRegionPlan = regionPlanRef.get(); + assertTrue(referedRegionPlan != null); + if (referedRegionPlan.equals(regionPlan)) { + /** + * To make {@link AssignmentManager#unassign} could be invoked just before + * {@link AssignmentManager#balance} is invoked. + */ + cyclicBarrier.await(); + /** + * After {@link AssignmentManager#unassign} is completed,we could invoke + * {@link AssignmentManager#balance}. + */ + cyclicBarrier.await(); + } + return invocation.callRealMethod(); + }).when(spiedAssignmentManager).balance(Mockito.any()); + + + try { + final AtomicReference exceptionRef = new AtomicReference(null); + Thread unassignThread = new Thread(() -> { + try { + /** + * To invoke {@link AssignmentManager#unassign} just before + * {@link AssignmentManager#balance} is invoked. + */ + cyclicBarrier.await(); + spiedAssignmentManager.unassign(regionInfo); + assertTrue(spiedAssignmentManager.getRegionStates().getRegionAssignments() + .get(regionInfo) == null); + /** + * After {@link AssignmentManager#unassign} is completed,we could invoke + * {@link AssignmentManager#balance}. + */ + cyclicBarrier.await(); + } catch (Exception e) { + exceptionRef.set(e); + } + }); + unassignThread.setName("UnassignThread"); + unassignThread.start(); + + synchronized (loadBalancer) { + synchronized (spiedLoadBalancer) { + master.setLoadBalancer(spiedLoadBalancer); + master.setAssignmentManager(spiedAssignmentManager); + TEST_UTIL.getAdmin().balancerSwitch(true, false); + master.balance(); + } + } + unassignThread.join(); + assertTrue(exceptionRef.get() == null); + } finally { + master.setLoadBalancer(loadBalancer); + master.setAssignmentManager(assignmentManager); + } + + } + + private TableName createTable(String table) throws IOException { + TableName tableName = TableName.valueOf(table); + TEST_UTIL.createTable(tableName, FAMILYNAME); + return tableName; + } + +} From 53f03e050adc56151efa5a2003a0d0488efab297 Mon Sep 17 00:00:00 2001 From: comnetwork Date: Thu, 3 Feb 2022 09:36:46 +0800 Subject: [PATCH 2/4] add more comments --- .../hadoop/hbase/master/TestMasterBalancerNPE.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalancerNPE.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalancerNPE.java index f3c959e61d36..ace31a1f0319 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalancerNPE.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalancerNPE.java @@ -147,6 +147,9 @@ public void testBalancerNPE() throws Exception { */ cyclicBarrier.await(); } + /** + * Before HBASE-26712,here may throw NPE. + */ return invocation.callRealMethod(); }).when(spiedAssignmentManager).balance(Mockito.any()); @@ -179,7 +182,14 @@ public void testBalancerNPE() throws Exception { synchronized (spiedLoadBalancer) { master.setLoadBalancer(spiedLoadBalancer); master.setAssignmentManager(spiedAssignmentManager); + /** + * enable balance + */ TEST_UTIL.getAdmin().balancerSwitch(true, false); + /** + * Before HBASE-26712,here invokes {@link AssignmentManager#balance(RegionPlan)} + * which may throw NPE. + */ master.balance(); } } From 67dd000cf2157605accc380913cec0bcfc9c6622 Mon Sep 17 00:00:00 2001 From: comnetwork Date: Fri, 11 Feb 2022 11:24:00 +0800 Subject: [PATCH 3/4] simplify test --- .../apache/hadoop/hbase/master/HMaster.java | 11 +++++- .../hbase/master/TestMasterBalancerNPE.java | 37 +++++++++---------- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 29bf74749eca..11e5da52007e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -363,6 +363,7 @@ public class HMaster extends HBaseServerBase implements Maste private RSGroupBasedLoadBalancer balancer; private BalancerChore balancerChore; + private static boolean disableBalancerChoreForTest = false; private RegionNormalizerManager regionNormalizerManager; private ClusterStatusChore clusterStatusChore; private ClusterStatusPublisher clusterStatusPublisherChore = null; @@ -1101,7 +1102,9 @@ private void finishActiveMasterInitialization(MonitoredTask status) throws IOExc this.clusterStatusChore = new ClusterStatusChore(this, balancer); getChoreService().scheduleChore(clusterStatusChore); this.balancerChore = new BalancerChore(this); - getChoreService().scheduleChore(balancerChore); + if (!disableBalancerChoreForTest) { + getChoreService().scheduleChore(balancerChore); + } if (regionNormalizerManager != null) { getChoreService().scheduleChore(regionNormalizerManager.getRegionNormalizerChore()); } @@ -4108,4 +4111,10 @@ void setAssignmentManager(AssignmentManager assignmentManager) { this.assignmentManager = assignmentManager; } + @RestrictedApi(explanation = "Should only be called in tests", link = "", + allowedOnPath = ".*/src/test/.*") + static void setDisableBalancerChoreForTest(boolean disable) { + disableBalancerChoreForTest = disable; + } + } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalancerNPE.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalancerNPE.java index ace31a1f0319..d564785e6b02 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalancerNPE.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalancerNPE.java @@ -29,7 +29,6 @@ import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtil; -import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; @@ -62,13 +61,15 @@ public class TestMasterBalancerNPE { @Before public void setupConfiguration() { - TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_BALANCER_PERIOD, Integer.MAX_VALUE); + /** + * Make {@link BalancerChore} not run,so does not disrupt the test. + */ + HMaster.setDisableBalancerChoreForTest(true); } @After public void shutdown() throws Exception { - TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_BALANCER_PERIOD, - HConstants.DEFAULT_HBASE_BALANCER_PERIOD); + HMaster.setDisableBalancerChoreForTest(false); TEST_UTIL.shutdownMiniCluster(); } @@ -178,28 +179,24 @@ public void testBalancerNPE() throws Exception { unassignThread.setName("UnassignThread"); unassignThread.start(); - synchronized (loadBalancer) { - synchronized (spiedLoadBalancer) { - master.setLoadBalancer(spiedLoadBalancer); - master.setAssignmentManager(spiedAssignmentManager); - /** - * enable balance - */ - TEST_UTIL.getAdmin().balancerSwitch(true, false); - /** - * Before HBASE-26712,here invokes {@link AssignmentManager#balance(RegionPlan)} - * which may throw NPE. - */ - master.balance(); - } - } + master.setLoadBalancer(spiedLoadBalancer); + master.setAssignmentManager(spiedAssignmentManager); + /** + * enable balance + */ + TEST_UTIL.getAdmin().balancerSwitch(true, false); + /** + * Before HBASE-26712,here invokes {@link AssignmentManager#balance(RegionPlan)} + * which may throw NPE. + */ + master.balance(); + unassignThread.join(); assertTrue(exceptionRef.get() == null); } finally { master.setLoadBalancer(loadBalancer); master.setAssignmentManager(assignmentManager); } - } private TableName createTable(String table) throws IOException { From 487c8e4360ed1c7818a8f756b55558b1efc3d5a6 Mon Sep 17 00:00:00 2001 From: comnetwork Date: Mon, 14 Feb 2022 16:10:36 +0800 Subject: [PATCH 4/4] insync with branch-2 --- .../org/apache/hadoop/hbase/master/TestMasterBalancerNPE.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalancerNPE.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalancerNPE.java index d564785e6b02..5ce8a6842826 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalancerNPE.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterBalancerNPE.java @@ -189,7 +189,7 @@ public void testBalancerNPE() throws Exception { * Before HBASE-26712,here invokes {@link AssignmentManager#balance(RegionPlan)} * which may throw NPE. */ - master.balance(); + master.balanceOrUpdateMetrics(); unassignThread.join(); assertTrue(exceptionRef.get() == null);