Skip to content

Commit 5f4e2e1

Browse files
authored
HBASE-25766 Introduce RegionSplitRestriction that restricts the pattern of the split point (#3150)
Signed-off-by: Duo Zhang <[email protected]> Signed-off-by: Michael Stack <[email protected]>
1 parent 50920ee commit 5f4e2e1

File tree

9 files changed

+548
-5
lines changed

9 files changed

+548
-5
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
import org.apache.hadoop.hbase.regionserver.HStore;
6363
import org.apache.hadoop.hbase.regionserver.HStoreFile;
6464
import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy;
65+
import org.apache.hadoop.hbase.regionserver.RegionSplitRestriction;
6566
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
6667
import org.apache.hadoop.hbase.util.Bytes;
6768
import org.apache.hadoop.hbase.util.CommonFSUtils;
@@ -110,6 +111,21 @@ public SplitTableRegionProcedure(final MasterProcedureEnv env,
110111
// we fail-fast on construction. There it skips the split with just a warning.
111112
checkOnline(env, regionToSplit);
112113
this.bestSplitRow = splitRow;
114+
TableDescriptor tableDescriptor = env.getMasterServices().getTableDescriptors()
115+
.get(getTableName());
116+
Configuration conf = env.getMasterConfiguration();
117+
if (hasBestSplitRow()) {
118+
// Apply the split restriction for the table to the user-specified split point
119+
RegionSplitRestriction splitRestriction =
120+
RegionSplitRestriction.create(tableDescriptor, conf);
121+
byte[] restrictedSplitRow = splitRestriction.getRestrictedSplitPoint(bestSplitRow);
122+
if (!Bytes.equals(bestSplitRow, restrictedSplitRow)) {
123+
LOG.warn("The specified split point {} violates the split restriction of the table. "
124+
+ "Using {} as a split point.", Bytes.toStringBinary(bestSplitRow),
125+
Bytes.toStringBinary(restrictedSplitRow));
126+
bestSplitRow = restrictedSplitRow;
127+
}
128+
}
113129
checkSplittable(env, regionToSplit);
114130
final TableName table = regionToSplit.getTable();
115131
final long rid = getDaughterRegionIdTimestamp(regionToSplit);
@@ -125,15 +141,14 @@ public SplitTableRegionProcedure(final MasterProcedureEnv env,
125141
.setSplit(false)
126142
.setRegionId(rid)
127143
.build();
128-
TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
129-
if(htd.getRegionSplitPolicyClassName() != null) {
144+
if(tableDescriptor.getRegionSplitPolicyClassName() != null) {
130145
// Since we don't have region reference here, creating the split policy instance without it.
131146
// This can be used to invoke methods which don't require Region reference. This instantiation
132147
// of a class on Master-side though it only makes sense on the RegionServer-side is
133148
// for Phoenix Local Indexing. Refer HBASE-12583 for more information.
134149
Class<? extends RegionSplitPolicy> clazz =
135-
RegionSplitPolicy.getSplitPolicyClass(htd, env.getMasterConfiguration());
136-
this.splitPolicy = ReflectionUtils.newInstance(clazz, env.getMasterConfiguration());
150+
RegionSplitPolicy.getSplitPolicyClass(tableDescriptor, conf);
151+
this.splitPolicy = ReflectionUtils.newInstance(clazz, conf);
137152
}
138153
}
139154

@@ -219,7 +234,7 @@ private void checkSplittable(final MasterProcedureEnv env,
219234
throw e;
220235
}
221236

222-
if (bestSplitRow == null || bestSplitRow.length == 0) {
237+
if (!hasBestSplitRow()) {
223238
throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, " +
224239
"maybe table is too small for auto split. For force split, try specifying split row");
225240
}

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DelimitedKeyPrefixRegionSplitPolicy.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,11 @@
3737
* <code>userid_eventtype_eventid</code>, and use prefix delimiter _, this split policy
3838
* ensures that all rows starting with the same userid, belongs to the same region.
3939
* @see KeyPrefixRegionSplitPolicy
40+
*
41+
* @deprecated since 3.0.0 and will be removed in 4.0.0. Use {@link RegionSplitRestriction},
42+
* instead.
4043
*/
44+
@Deprecated
4145
@InterfaceAudience.Private
4246
public class DelimitedKeyPrefixRegionSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy {
4347

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hbase.regionserver;
19+
20+
import java.io.IOException;
21+
import java.util.Arrays;
22+
import org.apache.hadoop.conf.Configuration;
23+
import org.apache.hadoop.hbase.client.TableDescriptor;
24+
import org.apache.hadoop.hbase.util.Bytes;
25+
import org.apache.yetus.audience.InterfaceAudience;
26+
import org.slf4j.Logger;
27+
import org.slf4j.LoggerFactory;
28+
29+
/**
30+
* A {@link RegionSplitRestriction} implementation that groups rows by a prefix of the row-key with
31+
* a delimiter. Only the first delimiter for the row key will define the prefix of the row key that
32+
* is used for grouping.
33+
* <p>
34+
* This ensures that a region is not split "inside" a prefix of a row key.
35+
* I.e. rows can be co-located in a region by their prefix.
36+
*
37+
* As an example, if you have row keys delimited with <code>_</code>, like
38+
* <code>userid_eventtype_eventid</code>, and use prefix delimiter _, this split policy ensures
39+
* that all rows starting with the same userid, belongs to the same region.
40+
*/
41+
@InterfaceAudience.Private
42+
public class DelimitedKeyPrefixRegionSplitRestriction extends RegionSplitRestriction {
43+
private static final Logger LOG =
44+
LoggerFactory.getLogger(DelimitedKeyPrefixRegionSplitRestriction.class);
45+
46+
public static final String DELIMITER_KEY =
47+
"hbase.regionserver.region.split_restriction.delimiter";
48+
49+
private byte[] delimiter = null;
50+
51+
@Override
52+
public void initialize(TableDescriptor tableDescriptor, Configuration conf) throws IOException {
53+
String delimiterString = tableDescriptor.getValue(DELIMITER_KEY);
54+
if (delimiterString == null || delimiterString.length() == 0) {
55+
delimiterString = conf.get(DELIMITER_KEY);
56+
if (delimiterString == null || delimiterString.length() == 0) {
57+
LOG.error("{} not specified for table {}. "
58+
+ "Using the default RegionSplitRestriction", DELIMITER_KEY,
59+
tableDescriptor.getTableName());
60+
return;
61+
}
62+
}
63+
delimiter = Bytes.toBytes(delimiterString);
64+
}
65+
66+
@Override
67+
public byte[] getRestrictedSplitPoint(byte[] splitPoint) {
68+
if (delimiter != null) {
69+
// find the first occurrence of delimiter in split point
70+
int index = org.apache.hbase.thirdparty.com.google.common.primitives.Bytes.indexOf(
71+
splitPoint, delimiter);
72+
if (index < 0) {
73+
LOG.warn("Delimiter {} not found for split key {}", Bytes.toString(delimiter),
74+
Bytes.toStringBinary(splitPoint));
75+
return splitPoint;
76+
}
77+
78+
// group split keys by a prefix
79+
return Arrays.copyOf(splitPoint, Math.min(index, splitPoint.length));
80+
} else {
81+
return splitPoint;
82+
}
83+
}
84+
}

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -695,6 +695,7 @@ void sawNoSuchFamily() {
695695

696696
private TableDescriptor htableDescriptor = null;
697697
private RegionSplitPolicy splitPolicy;
698+
private RegionSplitRestriction splitRestriction;
698699
private FlushPolicy flushPolicy;
699700

700701
private final MetricsRegion metricsRegion;
@@ -1037,6 +1038,9 @@ private long initializeRegionInternals(final CancelableProgressable reporter,
10371038
// Initialize split policy
10381039
this.splitPolicy = RegionSplitPolicy.create(this, conf);
10391040

1041+
// Initialize split restriction
1042+
splitRestriction = RegionSplitRestriction.create(getTableDescriptor(), conf);
1043+
10401044
// Initialize flush policy
10411045
this.flushPolicy = FlushPolicyFactory.create(this, conf);
10421046

@@ -7870,6 +7874,9 @@ public Optional<byte[]> checkSplit(boolean force) {
78707874
}
78717875

78727876
byte[] ret = splitPolicy.getSplitPoint();
7877+
if (ret != null && ret.length > 0) {
7878+
ret = splitRestriction.getRestrictedSplitPoint(ret);
7879+
}
78737880

78747881
if (ret != null) {
78757882
try {

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyPrefixRegionSplitPolicy.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,11 @@
2929
*
3030
* This ensures that a region is not split "inside" a prefix of a row key.
3131
* I.e. rows can be co-located in a region by their prefix.
32+
*
33+
* @deprecated since 3.0.0 and will be removed in 4.0.0. Use {@link RegionSplitRestriction},
34+
* instead.
3235
*/
36+
@Deprecated
3337
@InterfaceAudience.Private
3438
public class KeyPrefixRegionSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy {
3539
private static final Logger LOG = LoggerFactory
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hbase.regionserver;
19+
20+
import java.io.IOException;
21+
import java.util.Arrays;
22+
import org.apache.hadoop.conf.Configuration;
23+
import org.apache.hadoop.hbase.client.TableDescriptor;
24+
import org.apache.yetus.audience.InterfaceAudience;
25+
import org.slf4j.Logger;
26+
import org.slf4j.LoggerFactory;
27+
28+
/**
29+
* A {@link RegionSplitRestriction} implementation that groups rows by a prefix of the row-key.
30+
* <p>
31+
* This ensures that a region is not split "inside" a prefix of a row key.
32+
* I.e. rows can be co-located in a region by their prefix.
33+
*/
34+
@InterfaceAudience.Private
35+
public class KeyPrefixRegionSplitRestriction extends RegionSplitRestriction {
36+
private static final Logger LOG =
37+
LoggerFactory.getLogger(KeyPrefixRegionSplitRestriction.class);
38+
39+
public static final String PREFIX_LENGTH_KEY =
40+
"hbase.regionserver.region.split_restriction.prefix_length";
41+
42+
private int prefixLength;
43+
44+
@Override
45+
public void initialize(TableDescriptor tableDescriptor, Configuration conf) throws IOException {
46+
String prefixLengthString = tableDescriptor.getValue(PREFIX_LENGTH_KEY);
47+
if (prefixLengthString == null) {
48+
prefixLengthString = conf.get(PREFIX_LENGTH_KEY);
49+
if (prefixLengthString == null) {
50+
LOG.error("{} not specified for table {}. "
51+
+ "Using the default RegionSplitRestriction", PREFIX_LENGTH_KEY,
52+
tableDescriptor.getTableName());
53+
return;
54+
}
55+
}
56+
try {
57+
prefixLength = Integer.parseInt(prefixLengthString);
58+
} catch (NumberFormatException ignored) {
59+
}
60+
if (prefixLength <= 0) {
61+
LOG.error("Invalid value for {} for table {}:{}. "
62+
+ "Using the default RegionSplitRestriction", PREFIX_LENGTH_KEY,
63+
tableDescriptor.getTableName(), prefixLengthString);
64+
}
65+
}
66+
67+
@Override
68+
public byte[] getRestrictedSplitPoint(byte[] splitPoint) {
69+
if (prefixLength > 0) {
70+
// group split keys by a prefix
71+
return Arrays.copyOf(splitPoint, Math.min(prefixLength, splitPoint.length));
72+
} else {
73+
return splitPoint;
74+
}
75+
}
76+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hbase.regionserver;
19+
20+
import java.io.IOException;
21+
import org.apache.hadoop.conf.Configuration;
22+
import org.apache.hadoop.hbase.client.TableDescriptor;
23+
import org.apache.yetus.audience.InterfaceAudience;
24+
25+
/**
26+
* A {@link RegionSplitRestriction} implementation that does nothing.
27+
*/
28+
@InterfaceAudience.Private
29+
public class NoRegionSplitRestriction extends RegionSplitRestriction {
30+
31+
@Override
32+
public void initialize(TableDescriptor tableDescriptor, Configuration conf) throws IOException {
33+
}
34+
35+
@Override
36+
public byte[] getRestrictedSplitPoint(byte[] splitPoint) {
37+
// Do nothing
38+
return splitPoint;
39+
}
40+
}

0 commit comments

Comments
 (0)