Skip to content

Commit 1191617

Browse files
committed
HBASE-22632 SplitTableRegionProcedure and MergeTableRegionsProcedure should skip store files for unknown column families (apache#344)
Signed-off-by: Guanghao Zhang <[email protected]>
1 parent ff2b968 commit 1191617

File tree

3 files changed

+146
-28
lines changed

3 files changed

+146
-28
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -639,25 +639,24 @@ private void createMergedRegion(final MasterProcedureEnv env) throws IOException
639639
* @param regionFs region file system
640640
* @param mergedDir the temp directory of merged region
641641
*/
642-
private void mergeStoreFiles(
643-
final MasterProcedureEnv env, final HRegionFileSystem regionFs, final Path mergedDir)
644-
throws IOException {
642+
private void mergeStoreFiles(final MasterProcedureEnv env, final HRegionFileSystem regionFs,
643+
final Path mergedDir) throws IOException {
645644
final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
646645
final Configuration conf = env.getMasterConfiguration();
647646
final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
648647

649-
for (String family : regionFs.getFamilies()) {
650-
final ColumnFamilyDescriptor hcd = htd.getColumnFamily(Bytes.toBytes(family));
648+
for (ColumnFamilyDescriptor hcd : htd.getColumnFamilies()) {
649+
String family = hcd.getNameAsString();
651650
final Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family);
652651

653652
if (storeFiles != null && storeFiles.size() > 0) {
654653
for (StoreFileInfo storeFileInfo : storeFiles) {
655654
// Create reference file(s) of the region in mergedDir.
656655
// As this procedure is running on master, use CacheConfig.DISABLED means
657656
// don't cache any block.
658-
regionFs.mergeStoreFile(mergedRegion, family,
659-
new HStoreFile(mfs.getFileSystem(), storeFileInfo, conf, CacheConfig.DISABLED,
660-
hcd.getBloomFilterType(), true), mergedDir);
657+
regionFs.mergeStoreFile(mergedRegion, family, new HStoreFile(mfs.getFileSystem(),
658+
storeFileInfo, conf, CacheConfig.DISABLED, hcd.getBloomFilterType(), true),
659+
mergedDir);
661660
}
662661
}
663662
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,7 @@ private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env,
626626
final HRegionFileSystem regionFs) throws IOException {
627627
final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
628628
final Configuration conf = env.getMasterConfiguration();
629+
TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
629630
// The following code sets up a thread pool executor with as many slots as
630631
// there's files to split. It then fires up everything, waits for
631632
// completion and finally checks for any exception
@@ -635,12 +636,15 @@ private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env,
635636
// clean this up.
636637
int nbFiles = 0;
637638
final Map<String, Collection<StoreFileInfo>> files =
638-
new HashMap<String, Collection<StoreFileInfo>>(regionFs.getFamilies().size());
639-
for (String family: regionFs.getFamilies()) {
639+
new HashMap<String, Collection<StoreFileInfo>>(htd.getColumnFamilyCount());
640+
for (ColumnFamilyDescriptor cfd : htd.getColumnFamilies()) {
641+
String family = cfd.getNameAsString();
640642
Collection<StoreFileInfo> sfis = regionFs.getStoreFiles(family);
641-
if (sfis == null) continue;
643+
if (sfis == null) {
644+
continue;
645+
}
642646
Collection<StoreFileInfo> filteredSfis = null;
643-
for (StoreFileInfo sfi: sfis) {
647+
for (StoreFileInfo sfi : sfis) {
644648
// Filter. There is a lag cleaning up compacted reference files. They get cleared
645649
// after a delay in case outstanding Scanners still have references. Because of this,
646650
// the listing of the Store content may have straggler reference files. Skip these.
@@ -661,20 +665,19 @@ private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env,
661665
}
662666
if (nbFiles == 0) {
663667
// no file needs to be splitted.
664-
return new Pair<Integer, Integer>(0,0);
668+
return new Pair<Integer, Integer>(0, 0);
665669
}
666670
// Max #threads is the smaller of the number of storefiles or the default max determined above.
667671
int maxThreads = Math.min(
668672
conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX,
669673
conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)),
670674
nbFiles);
671675
LOG.info("pid=" + getProcId() + " splitting " + nbFiles + " storefiles, region=" +
672-
getParentRegion().getShortNameToLog() + ", threads=" + maxThreads);
673-
final ExecutorService threadPool = Executors.newFixedThreadPool(
674-
maxThreads, Threads.getNamedThreadFactory("StoreFileSplitter-%1$d"));
675-
final List<Future<Pair<Path,Path>>> futures = new ArrayList<Future<Pair<Path,Path>>>(nbFiles);
676+
getParentRegion().getShortNameToLog() + ", threads=" + maxThreads);
677+
final ExecutorService threadPool = Executors.newFixedThreadPool(maxThreads,
678+
Threads.getNamedThreadFactory("StoreFileSplitter-%1$d"));
679+
final List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles);
676680

677-
TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
678681
// Split each store file.
679682
for (Map.Entry<String, Collection<StoreFileInfo>> e : files.entrySet()) {
680683
byte[] familyName = Bytes.toBytes(e.getKey());
@@ -689,9 +692,9 @@ private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env,
689692
for (StoreFileInfo storeFileInfo : storeFiles) {
690693
// As this procedure is running on master, use CacheConfig.DISABLED means
691694
// don't cache any block.
692-
StoreFileSplitter sfs = new StoreFileSplitter(regionFs, familyName,
693-
new HStoreFile(mfs.getFileSystem(), storeFileInfo, conf, CacheConfig.DISABLED,
694-
hcd.getBloomFilterType(), true));
695+
StoreFileSplitter sfs =
696+
new StoreFileSplitter(regionFs, familyName, new HStoreFile(mfs.getFileSystem(),
697+
storeFileInfo, conf, CacheConfig.DISABLED, hcd.getBloomFilterType(), true));
695698
futures.add(threadPool.submit(sfs));
696699
}
697700
}
@@ -703,7 +706,7 @@ private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env,
703706
// When splits ran on the RegionServer, how-long-to-wait-configuration was named
704707
// hbase.regionserver.fileSplitTimeout. If set, use its value.
705708
long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout",
706-
conf.getLong("hbase.regionserver.fileSplitTimeout", 600000));
709+
conf.getLong("hbase.regionserver.fileSplitTimeout", 600000));
707710
try {
708711
boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS);
709712
if (stillRunning) {
@@ -712,11 +715,11 @@ private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env,
712715
while (!threadPool.isTerminated()) {
713716
Thread.sleep(50);
714717
}
715-
throw new IOException("Took too long to split the" +
716-
" files and create the references, aborting split");
718+
throw new IOException(
719+
"Took too long to split the" + " files and create the references, aborting split");
717720
}
718721
} catch (InterruptedException e) {
719-
throw (InterruptedIOException)new InterruptedIOException().initCause(e);
722+
throw (InterruptedIOException) new InterruptedIOException().initCause(e);
720723
}
721724

722725
int daughterA = 0;
@@ -736,9 +739,8 @@ private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env,
736739

737740
if (LOG.isDebugEnabled()) {
738741
LOG.debug("pid=" + getProcId() + " split storefiles for region " +
739-
getParentRegion().getShortNameToLog() +
740-
" Daughter A: " + daughterA + " storefiles, Daughter B: " +
741-
daughterB + " storefiles.");
742+
getParentRegion().getShortNameToLog() + " Daughter A: " + daughterA +
743+
" storefiles, Daughter B: " + daughterB + " storefiles.");
742744
}
743745
return new Pair<Integer, Integer>(daughterA, daughterB);
744746
}
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one or more contributor license
3+
* agreements. See the NOTICE file distributed with this work for additional information regarding
4+
* copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
5+
* "License"); you may not use this file except in compliance with the License. You may obtain a
6+
* copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable
7+
* law or agreed to in writing, software distributed under the License is distributed on an "AS IS"
8+
* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
9+
* for the specific language governing permissions and limitations under the License.
10+
*/
11+
package org.apache.hadoop.hbase.master.procedure;
12+
13+
import java.io.IOException;
14+
import java.util.List;
15+
import java.util.concurrent.ExecutionException;
16+
import java.util.concurrent.TimeUnit;
17+
import java.util.concurrent.TimeoutException;
18+
import org.apache.hadoop.fs.Path;
19+
import org.apache.hadoop.hbase.HBaseClassTestRule;
20+
import org.apache.hadoop.hbase.HBaseTestingUtility;
21+
import org.apache.hadoop.hbase.TableName;
22+
import org.apache.hadoop.hbase.client.Admin;
23+
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
24+
import org.apache.hadoop.hbase.client.RegionInfo;
25+
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
26+
import org.apache.hadoop.hbase.io.hfile.HFileContext;
27+
import org.apache.hadoop.hbase.master.MasterFileSystem;
28+
import org.apache.hadoop.hbase.regionserver.StoreFileWriter;
29+
import org.apache.hadoop.hbase.testclassification.MasterTests;
30+
import org.apache.hadoop.hbase.testclassification.MediumTests;
31+
import org.apache.hadoop.hbase.util.Bytes;
32+
import org.apache.hadoop.hbase.util.FSUtils;
33+
import org.junit.After;
34+
import org.junit.AfterClass;
35+
import org.junit.BeforeClass;
36+
import org.junit.ClassRule;
37+
import org.junit.Rule;
38+
import org.junit.Test;
39+
import org.junit.experimental.categories.Category;
40+
import org.junit.rules.TestName;
41+
42+
/**
43+
* Testcase for HBASE-22632
44+
*/
45+
@Category({ MasterTests.class, MediumTests.class })
46+
public class TestIgnoreUnknownFamily {
47+
48+
@ClassRule
49+
public static final HBaseClassTestRule CLASS_RULE =
50+
HBaseClassTestRule.forClass(TestIgnoreUnknownFamily.class);
51+
52+
private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
53+
54+
private static final byte[] FAMILY = Bytes.toBytes("cf");
55+
56+
private static final byte[] UNKNOWN_FAMILY = Bytes.toBytes("wrong_cf");
57+
58+
@Rule
59+
public TestName name = new TestName();
60+
61+
@BeforeClass
62+
public static void setUp() throws Exception {
63+
UTIL.startMiniCluster(1);
64+
}
65+
66+
@AfterClass
67+
public static void tearDown() throws Exception {
68+
UTIL.shutdownMiniCluster();
69+
}
70+
71+
@After
72+
public void tearDownAfterTest() throws IOException {
73+
Admin admin = UTIL.getAdmin();
74+
for (TableName tableName : admin.listTableNames()) {
75+
admin.disableTable(tableName);
76+
admin.deleteTable(tableName);
77+
}
78+
}
79+
80+
private void addStoreFileToKnownFamily(RegionInfo region) throws IOException {
81+
MasterFileSystem mfs = UTIL.getMiniHBaseCluster().getMaster().getMasterFileSystem();
82+
Path regionDir =
83+
FSUtils.getRegionDirFromRootDir(FSUtils.getRootDir(mfs.getConfiguration()), region);
84+
Path familyDir = new Path(regionDir, Bytes.toString(UNKNOWN_FAMILY));
85+
StoreFileWriter writer =
86+
new StoreFileWriter.Builder(mfs.getConfiguration(), mfs.getFileSystem())
87+
.withOutputDir(familyDir).withFileContext(new HFileContext()).build();
88+
writer.close();
89+
}
90+
91+
@Test
92+
public void testSplit()
93+
throws IOException, InterruptedException, ExecutionException, TimeoutException {
94+
TableName tableName = TableName.valueOf(name.getMethodName());
95+
Admin admin = UTIL.getAdmin();
96+
admin.createTable(TableDescriptorBuilder.newBuilder(tableName)
97+
.setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build());
98+
RegionInfo region = admin.getRegions(tableName).get(0);
99+
addStoreFileToKnownFamily(region);
100+
admin.splitRegionAsync(region.getRegionName(), Bytes.toBytes(0)).get(30, TimeUnit.SECONDS);
101+
}
102+
103+
@Test
104+
public void testMerge()
105+
throws IOException, InterruptedException, ExecutionException, TimeoutException {
106+
TableName tableName = TableName.valueOf(name.getMethodName());
107+
Admin admin = UTIL.getAdmin();
108+
admin.createTable(
109+
TableDescriptorBuilder.newBuilder(tableName)
110+
.setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build(),
111+
new byte[][] { Bytes.toBytes(0) });
112+
List<RegionInfo> regions = admin.getRegions(tableName);
113+
addStoreFileToKnownFamily(regions.get(0));
114+
admin.mergeRegionsAsync(regions.get(0).getEncodedNameAsBytes(),
115+
regions.get(1).getEncodedNameAsBytes(), false).get(30, TimeUnit.SECONDS);
116+
}
117+
}

0 commit comments

Comments
 (0)