From 81a82f4f76b928f1cc0ec9f894582a4a2eecd37a Mon Sep 17 00:00:00 2001 From: Nihal Jain Date: Mon, 23 Sep 2024 22:46:33 +0530 Subject: [PATCH] HBASE-27903 Skip submitting Split/Merge procedure when split/merge is disabled at table level (#6186) (#6169) - Fail fast by adding a check before even submitting a procedure - Update test cases to assert for expected exception post this change - Remove deprecated method mergeRegionsAsync's usage in test - Make use of RegionInfo.getShortNameToLog instead of logging complete region info - Update comments in procedure implementation - Fix tests as per 2.x code base: here the HBTU creates an instance of HBaseAdmin whose methods splitRegionAsync/mergeRegionsAsync seem to propagate error immediately without a call to future.get() which is in contrast to how it default admin instance of HBTU works in 3.x. Signed-off-by: Duo Zhang (cherry picked from commit e6584969af0ed029e0f733c415e857950438269e) --- .../apache/hadoop/hbase/master/HMaster.java | 23 +++++++++---- .../MergeTableRegionsProcedure.java | 33 ++++++++++--------- .../assignment/SplitTableRegionProcedure.java | 10 +++--- .../client/TestSplitOrMergeAtTableLevel.java | 23 +++++++------ 4 files changed, 53 insertions(+), 36 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index c6ac8e69c42c..3fe5abac2771 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -2197,21 +2197,26 @@ public long mergeRegions(final RegionInfo[] regionsToMerge, final boolean forcib final long nonce) throws IOException { checkInitialized(); + final String regionNamesToLog = RegionInfo.getShortNameToLog(regionsToMerge); + if (!isSplitOrMergeEnabled(MasterSwitchType.MERGE)) { - String regionsStr = Arrays.deepToString(regionsToMerge); - LOG.warn("Merge switch is off! skip merge of " + regionsStr); + LOG.warn("Merge switch is off! skip merge of " + regionNamesToLog); + throw new DoNotRetryIOException( + "Merge of " + regionNamesToLog + " failed because merge switch is off"); + } + + if (!getTableDescriptors().get(regionsToMerge[0].getTable()).isMergeEnabled()) { + LOG.warn("Merge is disabled for the table! Skipping merge of {}", regionNamesToLog); throw new DoNotRetryIOException( - "Merge of " + regionsStr + " failed because merge switch is off"); + "Merge of " + regionNamesToLog + " failed as region merge is disabled for the table"); } - final String mergeRegionsStr = Arrays.stream(regionsToMerge).map(RegionInfo::getEncodedName) - .collect(Collectors.joining(", ")); return MasterProcedureUtil.submitProcedure(new NonceProcedureRunnable(this, ng, nonce) { @Override protected void run() throws IOException { getMaster().getMasterCoprocessorHost().preMergeRegions(regionsToMerge); String aid = getClientIdAuditPrefix(); - LOG.info("{} merge regions {}", aid, mergeRegionsStr); + LOG.info("{} merge regions {}", aid, regionNamesToLog); submitProcedure(new MergeTableRegionsProcedure(procedureExecutor.getEnvironment(), regionsToMerge, forcible)); getMaster().getMasterCoprocessorHost().postMergeRegions(regionsToMerge); @@ -2235,6 +2240,12 @@ public long splitRegion(final RegionInfo regionInfo, final byte[] splitRow, fina "Split region " + regionInfo.getRegionNameAsString() + " failed due to split switch off"); } + if (!getTableDescriptors().get(regionInfo.getTable()).isSplitEnabled()) { + LOG.warn("Split is disabled for the table! Skipping split of {}", regionInfo); + throw new DoNotRetryIOException("Split region " + regionInfo.getRegionNameAsString() + + " failed as region split is disabled for the table"); + } + return MasterProcedureUtil .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) { @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java index 1f246e93573e..b79be474182e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java @@ -444,35 +444,36 @@ protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { private boolean prepareMergeRegion(final MasterProcedureEnv env) throws IOException { // Fail if we are taking snapshot for the given table TableName tn = regionsToMerge[0].getTable(); + final String regionNamesToLog = RegionInfo.getShortNameToLog(regionsToMerge); if (env.getMasterServices().getSnapshotManager().isTableTakingAnySnapshot(tn)) { - throw new MergeRegionException("Skip merging regions " - + RegionInfo.getShortNameToLog(regionsToMerge) + ", because we are snapshotting " + tn); + throw new MergeRegionException( + "Skip merging regions " + regionNamesToLog + ", because we are snapshotting " + tn); } - // Mostly this check is not used because we already check the switch before submit a merge - // procedure. Just for safe, check the switch again. This procedure can be rollbacked if - // the switch was set to false after submit. + // Mostly the below two checks are not used because we already check the switches before + // submitting the merge procedure. Just for safety, we are checking the switch again here. + // Also, in case the switch was set to false after submission, this procedure can be rollbacked, + // thanks to this double check! + // case 1: check for cluster level switch if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.MERGE)) { - String regionsStr = Arrays.deepToString(this.regionsToMerge); - LOG.warn("Merge switch is off! skip merge of " + regionsStr); + LOG.warn("Merge switch is off! skip merge of " + regionNamesToLog); setFailure(getClass().getSimpleName(), - new IOException("Merge of " + regionsStr + " failed because merge switch is off")); + new IOException("Merge of " + regionNamesToLog + " failed because merge switch is off")); return false; } - + // case 2: check for table level switch if (!env.getMasterServices().getTableDescriptors().get(getTableName()).isMergeEnabled()) { - String regionsStr = Arrays.deepToString(regionsToMerge); - LOG.warn("Merge is disabled for the table! Skipping merge of {}", regionsStr); + LOG.warn("Merge is disabled for the table! Skipping merge of {}", regionNamesToLog); setFailure(getClass().getSimpleName(), new IOException( - "Merge of " + regionsStr + " failed as region merge is disabled for the table")); + "Merge of " + regionNamesToLog + " failed as region merge is disabled for the table")); return false; } RegionStates regionStates = env.getAssignmentManager().getRegionStates(); for (RegionInfo ri : this.regionsToMerge) { if (MetaTableAccessor.hasMergeRegions(env.getMasterServices().getConnection(), ri)) { - String msg = "Skip merging " + RegionInfo.getShortNameToLog(regionsToMerge) - + ", because a parent, " + RegionInfo.getShortNameToLog(ri) + ", has a merge qualifier " + String msg = "Skip merging " + regionNamesToLog + ", because a parent, " + + RegionInfo.getShortNameToLog(ri) + ", has a merge qualifier " + "(if a 'merge column' in parent, it was recently merged but still has outstanding " + "references to its parents that must be cleared before it can participate in merge -- " + "major compact it to hurry clearing of its references)"; @@ -492,8 +493,8 @@ private boolean prepareMergeRegion(final MasterProcedureEnv env) throws IOExcept try { if (!isMergeable(env, state)) { setFailure(getClass().getSimpleName(), - new MergeRegionException("Skip merging " + RegionInfo.getShortNameToLog(regionsToMerge) - + ", because a parent, " + RegionInfo.getShortNameToLog(ri) + ", is not mergeable")); + new MergeRegionException("Skip merging " + regionNamesToLog + ", because a parent, " + + RegionInfo.getShortNameToLog(ri) + ", is not mergeable")); return false; } } catch (IOException e) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java index afa0f5e42b07..01cd012ddad1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java @@ -547,16 +547,18 @@ public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOExcepti return false; } - // Mostly this check is not used because we already check the switch before submit a split - // procedure. Just for safe, check the switch again. This procedure can be rollbacked if - // the switch was set to false after submit. + // Mostly the below two checks are not used because we already check the switches before + // submitting the split procedure. Just for safety, we are checking the switch again here. + // Also, in case the switch was set to false after submission, this procedure can be rollbacked, + // thanks to this double check! + // case 1: check for cluster level switch if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) { LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI); setFailure(new IOException( "Split region " + parentHRI.getRegionNameAsString() + " failed due to split switch off")); return false; } - + // case 2: check for table level switch if (!env.getMasterServices().getTableDescriptors().get(getTableName()).isSplitEnabled()) { LOG.warn("pid={}, split is disabled for the table! Skipping split of {}", getProcId(), parentHRI); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSplitOrMergeAtTableLevel.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSplitOrMergeAtTableLevel.java index 9e3bb7cb66fc..411a113bce70 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSplitOrMergeAtTableLevel.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSplitOrMergeAtTableLevel.java @@ -21,10 +21,11 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import java.io.IOException; import java.util.List; -import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; +import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.TableName; @@ -32,7 +33,6 @@ import org.apache.hadoop.hbase.testclassification.ClientTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.Threads; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.ClassRule; @@ -125,7 +125,7 @@ public void testTableMergeSwitch() throws Exception { assertFalse(admin.getDescriptor(tableName).isMergeEnabled()); trySplitAndEnsureItIsSuccess(tableName); - Threads.sleep(10000); + tryMergeAndEnsureItFails(tableName); admin.disableTable(tableName); enableTableMerge(tableName); @@ -157,15 +157,17 @@ private void trySplitAndEnsureItFails(final TableName tableName) throws Exceptio List regions = admin.getRegions(tableName); int originalCount = regions.size(); - // split the table and make sure region count does not increase - Future f = admin.splitRegionAsync(regions.get(0).getEncodedNameAsBytes(), Bytes.toBytes(2)); try { + // split the table and make sure region count does not increase + Future f = + admin.splitRegionAsync(regions.get(0).getEncodedNameAsBytes(), Bytes.toBytes(2)); f.get(10, TimeUnit.SECONDS); fail("Should not get here."); - } catch (ExecutionException ee) { + } catch (IOException ee) { // expected to reach here // check and ensure that table does not get splitted assertTrue(admin.getRegions(tableName).size() == originalCount); + assertTrue("Expected DoNotRetryIOException!", ee instanceof DoNotRetryIOException); } } @@ -216,15 +218,16 @@ private void tryMergeAndEnsureItFails(final TableName tableName) throws Exceptio byte[] nameOfRegionA = regions.get(0).getEncodedNameAsBytes(); byte[] nameOfRegionB = regions.get(1).getEncodedNameAsBytes(); - // check and ensure that region do not get merged - Future f = admin.mergeRegionsAsync(nameOfRegionA, nameOfRegionB, true); try { + // check and ensure that region do not get merged + Future f = admin.mergeRegionsAsync(new byte[][] { nameOfRegionA, nameOfRegionB }, true); f.get(10, TimeUnit.SECONDS); fail("Should not get here."); - } catch (ExecutionException ee) { + } catch (IOException ee) { // expected to reach here // check and ensure that region do not get merged assertTrue(admin.getRegions(tableName).size() == originalCount); + assertTrue("Expected DoNotRetryIOException!", ee instanceof DoNotRetryIOException); } } @@ -255,7 +258,7 @@ private void tryMergeAndEnsureItIsSuccess(final TableName tableName) throws Exce byte[] nameOfRegionB = regions.get(1).getEncodedNameAsBytes(); // merge the table regions and wait until region count decreases - admin.mergeRegionsAsync(nameOfRegionA, nameOfRegionB, true); + admin.mergeRegionsAsync(new byte[][] { nameOfRegionA, nameOfRegionB }, true); TEST_UTIL.waitFor(30000, new ExplainingPredicate() { @Override