diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java index b0682d227e42..0522ad19f5a8 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -136,6 +136,12 @@ public enum OperationStatusCode { /** Default value for the max percent of regions in transition */ public static final double DEFAULT_HBASE_MASTER_BALANCER_MAX_RIT_PERCENT = 1.0; + /** Time in milliseconds to wait meta region assignment, when moving non-meta regions. */ + public static final String HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT = + "hbase.master.waiting.meta.assignment.timeout"; + + public static final long HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT_DEFAULT = 10000; + /** Config for the max balancing time */ public static final String HBASE_BALANCER_MAX_BALANCING = "hbase.balancer.max.balancing"; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index df8402bc8c50..69db145b9957 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -393,6 +393,9 @@ public void run() { // Cached clusterId on stand by masters to serve clusterID requests from clients. private final CachedClusterId cachedClusterId; + // Waiting time of non-meta region's moving for meta regions assignment. + private final long timeoutWaitMetaRegionAssignment; + public static class RedirectServlet extends HttpServlet { private static final long serialVersionUID = 2894774810058302473L; private final int regionServerInfoPort; @@ -498,6 +501,9 @@ public HMaster(final Configuration conf, CoordinatedStateManager csm) this.maxBalancingTime = getMaxBalancingTime(); this.maxRitPercent = conf.getDouble(HConstants.HBASE_MASTER_BALANCER_MAX_RIT_PERCENT, HConstants.DEFAULT_HBASE_MASTER_BALANCER_MAX_RIT_PERCENT); + this.timeoutWaitMetaRegionAssignment = + conf.getLong(HConstants.HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT, + HConstants.HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT_DEFAULT); // Do we publish the status? @@ -1845,12 +1851,20 @@ public void move(final byte[] encodedRegionName, // closed serverManager.sendRegionWarmup(rp.getDestination(), hri); + // Here wait until all the meta regions are not in transition. + if (!hri.isMetaRegion() && assignmentManager.getRegionStates().isMetaRegionInTransition()) { + Thread.sleep(timeoutWaitMetaRegionAssignment); + if (assignmentManager.getRegionStates().isMetaRegionInTransition()) { + throw new HBaseIOException("Fail-fast of the region move, " + + " because hbase:meta region is still in transition. Failed region move info:" + rp); + } + } LOG.info(getClientIdAuditPrefix() + " move " + rp + ", running balancer"); this.assignmentManager.balance(rp); if (this.cpHost != null) { this.cpHost.postMove(hri, rp.getSource(), rp.getDestination()); } - } catch (IOException ioe) { + } catch (IOException | InterruptedException ioe) { if (ioe instanceof HBaseIOException) { throw (HBaseIOException)ioe; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java index 381c8a4b1e49..39df948adb3b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java @@ -28,6 +28,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; @@ -46,7 +47,9 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.util.StringUtils; +import org.apache.zookeeper.KeeperException; import org.junit.AfterClass; +import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -66,6 +69,9 @@ public class TestMaster { public static void beforeAllTests() throws Exception { // we will retry operations when PleaseHoldException is thrown TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 3); + // Here just set 1 ms for testing. + TEST_UTIL.getConfiguration(). + setLong(HConstants.HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT, 1); // Set hbase.min.version.move.system.tables as version 0 so that // testMoveRegionWhenNotInitialized never fails even if hbase-default has valid default // value present for production use-case. @@ -188,5 +194,60 @@ public void testMoveThrowsPleaseHoldException() throws IOException { TEST_UTIL.deleteTable(tableName); } } + + @Test (timeout = 300000) + public void testMoveRegionWhenMetaRegionInTransition() + throws IOException, InterruptedException, KeeperException { + TableName tableName = TableName.valueOf("testMoveRegionWhenMetaRegionInTransition"); + HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster(); + HTableDescriptor htd = new HTableDescriptor(tableName); + HColumnDescriptor hcd = new HColumnDescriptor("value"); + RegionStates regionStates = master.getAssignmentManager().getRegionStates(); + htd.addFamily(hcd); + + admin.createTable(htd, null); + try { + HRegionInfo hri = admin.getTableRegions(tableName).get(0); + + HRegionInfo metaRegion = admin.getTableRegions(TableName.META_TABLE_NAME).get(0); + + ServerName rs0 = TEST_UTIL.getHBaseCluster().getRegionServer(0).getServerName(); + ServerName rs1 = TEST_UTIL.getHBaseCluster().getRegionServer(1).getServerName(); + + admin.move(hri.getEncodedNameAsBytes(), rs0.getServerName().getBytes()); + while (regionStates.isRegionInTransition(hri)) { + // Make sure the region is not in transition + Thread.sleep(1000); + } + // Meta region should be in transition + master.assignmentManager.unassign(metaRegion); + // Then move the region to a new region server. + try{ + master.move(hri.getEncodedNameAsBytes(), rs1.getServerName().getBytes()); + Assert.fail("Admin move should not be successful here."); + } catch (HBaseIOException e) { + assertTrue(e.getMessage().contains("Fail-fast")); + } + // Wait for the movement. + Thread.sleep(HConstants.HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT_DEFAULT); + // The region should be still on rs0. + TEST_UTIL.assertRegionOnServer(hri, rs0, 5000); + + // Wait until the meta region is reassigned. + admin.assign(metaRegion.getEncodedNameAsBytes()); + while (regionStates.isMetaRegionInTransition()) { + Thread.sleep(1000); + } + + // Try to move region to rs1 once again. + admin.move(hri.getEncodedNameAsBytes(), rs1.getServerName().getBytes()); + + Thread.sleep(HConstants.HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT_DEFAULT); + // It should be moved to rs1 this time. + TEST_UTIL.assertRegionOnServer(hri, rs1, 5000); + } finally { + TEST_UTIL.deleteTable(tableName); + } + } }