Skip to content

Commit f66d67b

Browse files
frostruanApache9
authored andcommitted
HBASE-27305 add an option to skip file splitting when bulkload hfiles (#4709)
Co-authored-by: huiruan <[email protected]> Signed-off-by: Duo Zhang <[email protected]> (cherry picked from commit 00a719e) Conflicts: hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java
1 parent 1bd0b58 commit f66d67b

File tree

2 files changed

+37
-0
lines changed

2 files changed

+37
-0
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/tool/LoadIncrementalHFiles.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,9 @@ public class LoadIncrementalHFiles extends Configured implements Tool {
141141
BulkLoadHFiles.IGNORE_UNMATCHED_CF_CONF_KEY;
142142
public final static String ALWAYS_COPY_FILES = BulkLoadHFiles.ALWAYS_COPY_FILES;
143143

144+
public static final String FAIL_IF_NEED_SPLIT_HFILE =
145+
"hbase.loadincremental.fail.if.need.split.hfile";
146+
144147
// We use a '.' prefix which is ignored when walking directory trees
145148
// above. It is invalid family name.
146149
static final String TMP_DIR = ".tmp";
@@ -162,6 +165,8 @@ public class LoadIncrementalHFiles extends Configured implements Tool {
162165

163166
private boolean replicate = true;
164167

168+
private boolean failIfNeedSplitHFile = false;
169+
165170
/**
166171
* Represents an HFile waiting to be loaded. An queue is used in this class in order to support
167172
* the case where a region has split during the process of the load. When this happens, the HFile
@@ -195,6 +200,7 @@ public void initialize() {
195200
assignSeqIds = conf.getBoolean(ASSIGN_SEQ_IDS, true);
196201
maxFilesPerRegionPerFamily = conf.getInt(MAX_FILES_PER_REGION_PER_FAMILY, 32);
197202
bulkLoadByFamily = conf.getBoolean(BulkLoadHFiles.BULK_LOAD_HFILES_BY_FAMILY, false);
203+
failIfNeedSplitHFile = conf.getBoolean(FAIL_IF_NEED_SPLIT_HFILE, false);
198204
nrThreads =
199205
conf.getInt("hbase.loadincremental.threads.max", Runtime.getRuntime().availableProcessors());
200206
numRetries = new AtomicInteger(0);
@@ -803,6 +809,11 @@ CacheConfig.DISABLED, true, getConf())) {
803809
Bytes.compareTo(last.get(), startEndKeys.getSecond()[firstKeyRegionIdx]) < 0
804810
|| Bytes.equals(startEndKeys.getSecond()[firstKeyRegionIdx], HConstants.EMPTY_BYTE_ARRAY);
805811
if (!lastKeyInRange) {
812+
if (failIfNeedSplitHFile) {
813+
throw new IOException(
814+
"The key range of hfile=" + hfilePath + " fits into no region. " + "And because "
815+
+ FAIL_IF_NEED_SPLIT_HFILE + " was set to true, we just skip the next steps.");
816+
}
806817
int lastKeyRegionIdx = getRegionIndex(startEndKeys, last.get());
807818
int splitIdx = (firstKeyRegionIdx + lastKeyRegionIdx) >>> 1;
808819
// make sure the splitPoint is valid in case region overlap occur, maybe the splitPoint bigger

hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestLoadIncrementalHFiles.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import static org.junit.Assert.assertArrayEquals;
2121
import static org.junit.Assert.assertEquals;
22+
import static org.junit.Assert.assertThrows;
2223
import static org.junit.Assert.assertTrue;
2324
import static org.junit.Assert.fail;
2425

@@ -793,4 +794,29 @@ protected List<LoadQueueItem> tryAtomicRegionLoad(Connection connection, TableNa
793794
util.getConfiguration().setBoolean(BulkLoadHFiles.BULK_LOAD_HFILES_BY_FAMILY, false);
794795
}
795796
}
797+
798+
@Test
799+
public void testFailIfNeedSplitHFile() throws IOException {
800+
TableName tableName = TableName.valueOf(tn.getMethodName());
801+
Table table = util.createTable(tableName, FAMILY);
802+
803+
util.loadTable(table, FAMILY);
804+
805+
FileSystem fs = util.getTestFileSystem();
806+
Path sfPath = new Path(fs.getWorkingDirectory(), new Path(Bytes.toString(FAMILY), "file"));
807+
HFileTestUtil.createHFile(util.getConfiguration(), fs, sfPath, FAMILY, QUALIFIER,
808+
Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
809+
810+
util.getAdmin().split(tableName);
811+
util.waitFor(10000, 1000, () -> util.getAdmin().getRegions(tableName).size() > 1);
812+
813+
Configuration config = new Configuration(util.getConfiguration());
814+
config.setBoolean(BulkLoadHFilesTool.FAIL_IF_NEED_SPLIT_HFILE, true);
815+
BulkLoadHFilesTool tool = new BulkLoadHFilesTool(config);
816+
817+
String[] args = new String[] { fs.getWorkingDirectory().toString(), tableName.toString() };
818+
assertThrows(IOException.class, () -> tool.run(args));
819+
util.getHBaseCluster().getRegions(tableName)
820+
.forEach(r -> assertEquals(1, r.getStore(FAMILY).getStorefiles().size()));
821+
}
796822
}

0 commit comments

Comments
 (0)