Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,12 @@ public enum OperationStatusCode {
/** Default value for the max percent of regions in transition */
public static final double DEFAULT_HBASE_MASTER_BALANCER_MAX_RIT_PERCENT = 1.0;

/** Time in milliseconds to wait meta region assignment, when moving non-meta regions. */
public static final String HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT =
"hbase.master.waiting.meta.assignment.timeout";

public static final long HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT_DEFAULT = 10000;

/** Config for the max balancing time */
public static final String HBASE_BALANCER_MAX_BALANCING = "hbase.balancer.max.balancing";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,9 @@ public void run() {
// Cached clusterId on stand by masters to serve clusterID requests from clients.
private final CachedClusterId cachedClusterId;

// Waiting time of non-meta region's moving for meta regions assignment.
private final long timeoutWaitMetaRegionAssignment;

public static class RedirectServlet extends HttpServlet {
private static final long serialVersionUID = 2894774810058302473L;
private final int regionServerInfoPort;
Expand Down Expand Up @@ -498,6 +501,9 @@ public HMaster(final Configuration conf, CoordinatedStateManager csm)
this.maxBalancingTime = getMaxBalancingTime();
this.maxRitPercent = conf.getDouble(HConstants.HBASE_MASTER_BALANCER_MAX_RIT_PERCENT,
HConstants.DEFAULT_HBASE_MASTER_BALANCER_MAX_RIT_PERCENT);
this.timeoutWaitMetaRegionAssignment =
conf.getLong(HConstants.HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT,
HConstants.HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT_DEFAULT);

// Do we publish the status?

Expand Down Expand Up @@ -1845,12 +1851,20 @@ public void move(final byte[] encodedRegionName,
// closed
serverManager.sendRegionWarmup(rp.getDestination(), hri);

// Here wait until all the meta regions are not in transition.
if (!hri.isMetaRegion() && assignmentManager.getRegionStates().isMetaRegionInTransition()) {
Thread.sleep(timeoutWaitMetaRegionAssignment);
if (assignmentManager.getRegionStates().isMetaRegionInTransition()) {
throw new HBaseIOException("Fail-fast of the region move, " +
" because hbase:meta region is still in transition. Failed region move info:" + rp);
}
}
LOG.info(getClientIdAuditPrefix() + " move " + rp + ", running balancer");
this.assignmentManager.balance(rp);
if (this.cpHost != null) {
this.cpHost.postMove(hri, rp.getSource(), rp.getDestination());
}
} catch (IOException ioe) {
} catch (IOException | InterruptedException ioe) {
if (ioe instanceof HBaseIOException) {
throw (HBaseIOException)ioe;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
Expand All @@ -46,7 +47,9 @@
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.util.StringUtils;
import org.apache.zookeeper.KeeperException;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;
Expand All @@ -66,6 +69,9 @@ public class TestMaster {
public static void beforeAllTests() throws Exception {
// we will retry operations when PleaseHoldException is thrown
TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 3);
// Here just set 1 ms for testing.
TEST_UTIL.getConfiguration().
setLong(HConstants.HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT, 1);
// Set hbase.min.version.move.system.tables as version 0 so that
// testMoveRegionWhenNotInitialized never fails even if hbase-default has valid default
// value present for production use-case.
Expand Down Expand Up @@ -188,5 +194,60 @@ public void testMoveThrowsPleaseHoldException() throws IOException {
TEST_UTIL.deleteTable(tableName);
}
}

@Test (timeout = 300000)
public void testMoveRegionWhenMetaRegionInTransition()
throws IOException, InterruptedException, KeeperException {
TableName tableName = TableName.valueOf("testMoveRegionWhenMetaRegionInTransition");
HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
HTableDescriptor htd = new HTableDescriptor(tableName);
HColumnDescriptor hcd = new HColumnDescriptor("value");
RegionStates regionStates = master.getAssignmentManager().getRegionStates();
htd.addFamily(hcd);

admin.createTable(htd, null);
try {
HRegionInfo hri = admin.getTableRegions(tableName).get(0);

HRegionInfo metaRegion = admin.getTableRegions(TableName.META_TABLE_NAME).get(0);

ServerName rs0 = TEST_UTIL.getHBaseCluster().getRegionServer(0).getServerName();
ServerName rs1 = TEST_UTIL.getHBaseCluster().getRegionServer(1).getServerName();

admin.move(hri.getEncodedNameAsBytes(), rs0.getServerName().getBytes());
while (regionStates.isRegionInTransition(hri)) {
// Make sure the region is not in transition
Thread.sleep(1000);
}
// Meta region should be in transition
master.assignmentManager.unassign(metaRegion);
// Then move the region to a new region server.
try{
master.move(hri.getEncodedNameAsBytes(), rs1.getServerName().getBytes());
Assert.fail("Admin move should not be successful here.");
} catch (HBaseIOException e) {
assertTrue(e.getMessage().contains("Fail-fast"));
}
// Wait for the movement.
Thread.sleep(HConstants.HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT_DEFAULT);
// The region should be still on rs0.
TEST_UTIL.assertRegionOnServer(hri, rs0, 5000);

// Wait until the meta region is reassigned.
admin.assign(metaRegion.getEncodedNameAsBytes());
while (regionStates.isMetaRegionInTransition()) {
Thread.sleep(1000);
}

// Try to move region to rs1 once again.
admin.move(hri.getEncodedNameAsBytes(), rs1.getServerName().getBytes());

Thread.sleep(HConstants.HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT_DEFAULT);
// It should be moved to rs1 this time.
TEST_UTIL.assertRegionOnServer(hri, rs1, 5000);
} finally {
TEST_UTIL.deleteTable(tableName);
}
}
}