apache · brfrn169 · Apr 22, 2021 · Apr 11, 2021 · Apr 12, 2021 · Apr 16, 2021
diff --git a/...er/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java b/...er/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
@@ -62,6 +62,7 @@
 import org.apache.hadoop.hbase.regionserver.HStore;
 import org.apache.hadoop.hbase.regionserver.HStoreFile;
 import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy;
+import org.apache.hadoop.hbase.regionserver.RegionSplitRestriction;
 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.CommonFSUtils;
@@ -110,6 +111,21 @@ public SplitTableRegionProcedure(final MasterProcedureEnv env,
     // we fail-fast on construction. There it skips the split with just a warning.
     checkOnline(env, regionToSplit);
     this.bestSplitRow = splitRow;
+    TableDescriptor tableDescriptor = env.getMasterServices().getTableDescriptors()
+      .get(getTableName());
+    Configuration conf = env.getMasterConfiguration();
+    if (hasBestSplitRow()) {
+      // Apply the split restriction for the table to the user-specified split point
+      RegionSplitRestriction splitRestriction =
+        RegionSplitRestriction.create(tableDescriptor, conf);
+      byte[] restrictedSplitRow = splitRestriction.getRestrictedSplitPoint(bestSplitRow);
+      if (!Bytes.equals(bestSplitRow, restrictedSplitRow)) {
+        LOG.warn("The specified split point {} violates the split restriction of the table. "
+            + "Using {} as a split point.", Bytes.toStringBinary(bestSplitRow),
+          Bytes.toStringBinary(restrictedSplitRow));
+        bestSplitRow = restrictedSplitRow;
+      }
+    }
     checkSplittable(env, regionToSplit);
     final TableName table = regionToSplit.getTable();
     final long rid = getDaughterRegionIdTimestamp(regionToSplit);
@@ -125,15 +141,14 @@ public SplitTableRegionProcedure(final MasterProcedureEnv env,
         .setSplit(false)
         .setRegionId(rid)
         .build();
-    TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
-    if(htd.getRegionSplitPolicyClassName() != null) {
+    if(tableDescriptor.getRegionSplitPolicyClassName() != null) {
       // Since we don't have region reference here, creating the split policy instance without it.
       // This can be used to invoke methods which don't require Region reference. This instantiation
       // of a class on Master-side though it only makes sense on the RegionServer-side is
       // for Phoenix Local Indexing. Refer HBASE-12583 for more information.
       Class<? extends RegionSplitPolicy> clazz =
-          RegionSplitPolicy.getSplitPolicyClass(htd, env.getMasterConfiguration());
-      this.splitPolicy = ReflectionUtils.newInstance(clazz, env.getMasterConfiguration());
+        RegionSplitPolicy.getSplitPolicyClass(tableDescriptor, conf);
+      this.splitPolicy = ReflectionUtils.newInstance(clazz, conf);
     }
   }
 
@@ -219,7 +234,7 @@ private void checkSplittable(final MasterProcedureEnv env,
       throw e;
     }
 
-    if (bestSplitRow == null || bestSplitRow.length == 0) {
+    if (!hasBestSplitRow()) {
       throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, " +
         "maybe table is too small for auto split. For force split, try specifying split row");
     }

diff --git a/...c/main/java/org/apache/hadoop/hbase/regionserver/DelimitedKeyPrefixRegionSplitPolicy.java b/...c/main/java/org/apache/hadoop/hbase/regionserver/DelimitedKeyPrefixRegionSplitPolicy.java
@@ -37,7 +37,11 @@
  * <code>userid_eventtype_eventid</code>, and use prefix delimiter _, this split policy
  * ensures that all rows starting with the same userid, belongs to the same region.
  * @see KeyPrefixRegionSplitPolicy
+ *
+ * @deprecated since 3.0.0 and will be removed in 4.0.0. Use {@link RegionSplitRestriction},
+ *   instead.
  */
+@Deprecated
 @InterfaceAudience.Private
 public class DelimitedKeyPrefixRegionSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy {
 

diff --git a/...n/java/org/apache/hadoop/hbase/regionserver/DelimitedKeyPrefixRegionSplitRestriction.java b/...n/java/org/apache/hadoop/hbase/regionserver/DelimitedKeyPrefixRegionSplitRestriction.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.io.IOException;
+import java.util.Arrays;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A {@link RegionSplitRestriction} implementation that groups rows by a prefix of the row-key with
+ * a delimiter. Only the first delimiter for the row key will define the prefix of the row key that
+ * is used for grouping.
+ * <p>
+ * This ensures that a region is not split "inside" a prefix of a row key.
+ * I.e. rows can be co-located in a region by their prefix.
+ *
+ * As an example, if you have row keys delimited with <code>_</code>, like
+ * <code>userid_eventtype_eventid</code>, and use prefix delimiter _, this split policy ensures
+ * that all rows starting with the same userid, belongs to the same region.
+ */
+@InterfaceAudience.Private
+public class DelimitedKeyPrefixRegionSplitRestriction extends RegionSplitRestriction {
+  private static final Logger LOG =
+    LoggerFactory.getLogger(DelimitedKeyPrefixRegionSplitRestriction.class);
+
+  public static final String DELIMITER_KEY =
+    "hbase.regionserver.region.split_restriction.delimiter";
+
+  private byte[] delimiter = null;
+
+  @Override
+  public void initialize(TableDescriptor tableDescriptor, Configuration conf) throws IOException {
+    String delimiterString = tableDescriptor.getValue(DELIMITER_KEY);
+    if (delimiterString == null || delimiterString.length() == 0) {
+      delimiterString = conf.get(DELIMITER_KEY);
+      if (delimiterString == null || delimiterString.length() == 0) {
+        LOG.error("{} not specified for table {}. "
+          + "Using the default RegionSplitRestriction", DELIMITER_KEY,
+          tableDescriptor.getTableName());
+        return;
+      }
+    }
+    delimiter = Bytes.toBytes(delimiterString);
+  }
+
+  @Override
+  public byte[] getRestrictedSplitPoint(byte[] splitPoint) {
+    if (delimiter != null) {
+      // find the first occurrence of delimiter in split point
+      int index = org.apache.hbase.thirdparty.com.google.common.primitives.Bytes.indexOf(
+        splitPoint, delimiter);
+      if (index < 0) {
+        LOG.warn("Delimiter {} not found for split key {}", Bytes.toString(delimiter),
+          Bytes.toStringBinary(splitPoint));
+        return splitPoint;
+      }
+
+      // group split keys by a prefix
+      return Arrays.copyOf(splitPoint, Math.min(index, splitPoint.length));
+    } else {
+      return splitPoint;
+    }
+  }
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -695,6 +695,7 @@ void sawNoSuchFamily() {
 
   private TableDescriptor htableDescriptor = null;
   private RegionSplitPolicy splitPolicy;
+  private RegionSplitRestriction splitRestriction;
   private FlushPolicy flushPolicy;
 
   private final MetricsRegion metricsRegion;
@@ -1037,6 +1038,9 @@ private long initializeRegionInternals(final CancelableProgressable reporter,
     // Initialize split policy
     this.splitPolicy = RegionSplitPolicy.create(this, conf);
 
+    // Initialize split restriction
+    splitRestriction = RegionSplitRestriction.create(getTableDescriptor(), conf);
+
     // Initialize flush policy
     this.flushPolicy = FlushPolicyFactory.create(this, conf);
 
@@ -7870,6 +7874,9 @@ public Optional<byte[]> checkSplit(boolean force) {
     }
 
     byte[] ret = splitPolicy.getSplitPoint();
+    if (ret != null && ret.length > 0) {
+      ret = splitRestriction.getRestrictedSplitPoint(ret);
+    }
 
     if (ret != null) {
       try {

diff --git a/...server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyPrefixRegionSplitPolicy.java b/...server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyPrefixRegionSplitPolicy.java
@@ -29,7 +29,11 @@
  *
  * This ensures that a region is not split "inside" a prefix of a row key.
  * I.e. rows can be co-located in a region by their prefix.
+ *
+ * @deprecated since 3.0.0 and will be removed in 4.0.0. Use {@link RegionSplitRestriction},
+ *   instead.
  */
+@Deprecated
 @InterfaceAudience.Private
 public class KeyPrefixRegionSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy {
   private static final Logger LOG = LoggerFactory

diff --git a/...r/src/main/java/org/apache/hadoop/hbase/regionserver/KeyPrefixRegionSplitRestriction.java b/...r/src/main/java/org/apache/hadoop/hbase/regionserver/KeyPrefixRegionSplitRestriction.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.io.IOException;
+import java.util.Arrays;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A {@link RegionSplitRestriction} implementation that groups rows by a prefix of the row-key.
+ * <p>
+ * This ensures that a region is not split "inside" a prefix of a row key.
+ * I.e. rows can be co-located in a region by their prefix.
+ */
+@InterfaceAudience.Private
+public class KeyPrefixRegionSplitRestriction extends RegionSplitRestriction {
+  private static final Logger LOG =
+    LoggerFactory.getLogger(KeyPrefixRegionSplitRestriction.class);
+
+  public static final String PREFIX_LENGTH_KEY =
+    "hbase.regionserver.region.split_restriction.prefix_length";
+
+  private int prefixLength;
+
+  @Override
+  public void initialize(TableDescriptor tableDescriptor, Configuration conf) throws IOException {
+    String prefixLengthString = tableDescriptor.getValue(PREFIX_LENGTH_KEY);
+    if (prefixLengthString == null) {
+      prefixLengthString = conf.get(PREFIX_LENGTH_KEY);
+      if (prefixLengthString == null) {
+        LOG.error("{} not specified for table {}. "
+          + "Using the default RegionSplitRestriction", PREFIX_LENGTH_KEY,
+          tableDescriptor.getTableName());
+        return;
+      }
+    }
+    try {
+      prefixLength = Integer.parseInt(prefixLengthString);
+    } catch (NumberFormatException ignored) {
+    }
+    if (prefixLength <= 0) {
+      LOG.error("Invalid value for {} for table {}:{}. "
+        + "Using the default RegionSplitRestriction", PREFIX_LENGTH_KEY,
+        tableDescriptor.getTableName(), prefixLengthString);
+    }
+  }
+
+  @Override
+  public byte[] getRestrictedSplitPoint(byte[] splitPoint) {
+    if (prefixLength > 0) {
+      // group split keys by a prefix
+      return Arrays.copyOf(splitPoint, Math.min(prefixLength, splitPoint.length));
+    } else {
+      return splitPoint;
+    }
+  }
+}
diff --git a/...e-server/src/main/java/org/apache/hadoop/hbase/regionserver/NoRegionSplitRestriction.java b/...e-server/src/main/java/org/apache/hadoop/hbase/regionserver/NoRegionSplitRestriction.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.io.IOException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * A {@link RegionSplitRestriction} implementation that does nothing.
+ */
+@InterfaceAudience.Private
+public class NoRegionSplitRestriction extends RegionSplitRestriction {
+
+  @Override
+  public void initialize(TableDescriptor tableDescriptor, Configuration conf) throws IOException {
+  }
+
+  @Override
+  public byte[] getRestrictedSplitPoint(byte[] splitPoint) {
+    // Do nothing
+    return splitPoint;
+  }
+}