Skip to content

Commit 5a06739

Browse files
authored
HBASE-22627 Port HBASE-22617 (Recovered WAL directories not getting cleaned up) to branch-1 (#339)
HBASE-22617 Recovered WAL directories not getting cleaned up (Duo Zhang) Signed-off-by: Zach York <[email protected]>
1 parent 5023690 commit 5a06739

File tree

18 files changed

+132
-118
lines changed

18 files changed

+132
-118
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,8 @@
3232
import org.apache.hadoop.fs.FileSystem;
3333
import org.apache.hadoop.fs.Path;
3434
import org.apache.hadoop.fs.PathFilter;
35-
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
3635
import org.apache.hadoop.hbase.HRegionInfo;
3736
import org.apache.hadoop.hbase.classification.InterfaceAudience;
38-
import org.apache.hadoop.hbase.regionserver.HRegion;
3937
import org.apache.hadoop.hbase.regionserver.StoreFile;
4038
import org.apache.hadoop.hbase.util.Bytes;
4139
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -85,7 +83,7 @@ public static void archiveRegion(Configuration conf, FileSystem fs, HRegionInfo
8583
throws IOException {
8684
Path rootDir = FSUtils.getRootDir(conf);
8785
archiveRegion(fs, rootDir, FSUtils.getTableDir(rootDir, info.getTable()),
88-
HRegion.getRegionDir(rootDir, info));
86+
FSUtils.getRegionDirFromRootDir(rootDir, info));
8987
}
9088

9189
/**

hbase-server/src/main/java/org/apache/hadoop/hbase/io/HFileLink.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
import org.apache.hadoop.hbase.TableName;
3232
import org.apache.hadoop.hbase.HConstants;
3333
import org.apache.hadoop.hbase.HRegionInfo;
34-
import org.apache.hadoop.hbase.regionserver.HRegion;
3534
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
3635
import org.apache.hadoop.hbase.util.FSUtils;
3736
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
@@ -486,7 +485,7 @@ public static Path getHFileFromBackReference(final Path rootDir, final Path link
486485
String linkName = createHFileLinkName(FSUtils.getTableName(tablePath),
487486
regionPath.getName(), hfileName);
488487
Path linkTableDir = FSUtils.getTableDir(rootDir, linkTableName);
489-
Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName);
488+
Path regionDir = new Path(linkTableDir, linkRegionName);
490489
return new Path(new Path(regionDir, familyPath.getName()), linkName);
491490
}
492491

hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ public boolean checkFileSystem() {
214214
return this.walFsOk;
215215
}
216216

217-
protected FileSystem getWALFileSystem() {
217+
public FileSystem getWALFileSystem() {
218218
return this.walFs;
219219
}
220220

@@ -691,6 +691,4 @@ public void archiveMetaLog(final ServerName serverName) {
691691
LOG.warn("Failed archiving meta log for server " + serverName, ie);
692692
}
693693
}
694-
695-
696694
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
import org.apache.hadoop.hbase.client.Scan;
4444
import org.apache.hadoop.hbase.client.Table;
4545
import org.apache.hadoop.hbase.exceptions.HBaseException;
46-
import org.apache.hadoop.hbase.regionserver.HRegion;
4746
import org.apache.hadoop.hbase.master.AssignmentManager;
4847
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
4948
import org.apache.hadoop.hbase.master.MasterFileSystem;
@@ -323,7 +322,7 @@ protected static void deleteFromFs(final MasterProcedureEnv env,
323322
for (HRegionInfo hri : regions) {
324323
LOG.debug("Archiving region " + hri.getRegionNameAsString() + " from FS");
325324
HFileArchiver.archiveRegion(fs, mfs.getRootDir(),
326-
tempTableDir, HRegion.getRegionDir(tempTableDir, hri.getEncodedName()));
325+
tempTableDir, new Path(tempTableDir, hri.getEncodedName()));
327326
}
328327
LOG.debug("Table '" + tableName + "' archived!");
329328
}

hbase-server/src/main/java/org/apache/hadoop/hbase/migration/NamespaceUpgrade.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,8 +319,7 @@ public void migrateMeta() throws IOException {
319319
}
320320

321321
// Since meta table name has changed rename meta region dir from it's old encoding to new one
322-
Path oldMetaRegionDir = HRegion.getRegionDir(rootDir,
323-
new Path(newMetaDir, "1028785192").toString());
322+
Path oldMetaRegionDir = new Path(rootDir, new Path(newMetaDir, "1028785192").toString());
324323
if (fs.exists(oldMetaRegionDir)) {
325324
LOG.info("Migrating meta region " + oldMetaRegionDir + " to " + newMetaRegionDir);
326325
if (!fs.rename(oldMetaRegionDir, newMetaRegionDir)) {

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java

Lines changed: 50 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@
2121
import com.google.common.annotations.VisibleForTesting;
2222
import com.google.common.base.Optional;
2323
import com.google.common.base.Preconditions;
24+
import com.google.common.collect.Iterables;
2425
import com.google.common.collect.Lists;
2526
import com.google.common.collect.Maps;
27+
import com.google.common.collect.Sets;
2628
import com.google.common.io.Closeables;
2729
import com.google.protobuf.ByteString;
2830
import com.google.protobuf.Descriptors;
@@ -4173,7 +4175,7 @@ private void removeNonExistentColumnFamilyForReplay(
41734175
if (nonExistentList != null) {
41744176
for (byte[] family : nonExistentList) {
41754177
// Perhaps schema was changed between crash and replay
4176-
LOG.info("No family for " + Bytes.toString(family) + " omit from reply.");
4178+
LOG.info("No family for " + Bytes.toString(family) + " omit from replay.");
41774179
familyMap.remove(family);
41784180
}
41794181
}
@@ -4286,62 +4288,76 @@ protected long replayRecoveredEditsIfAny(Map<byte[], Long> maxSeqIdInStores,
42864288
minSeqIdForTheRegion = maxSeqIdInStore;
42874289
}
42884290
}
4289-
long seqid = minSeqIdForTheRegion;
4291+
long seqId = minSeqIdForTheRegion;
42904292

42914293
FileSystem walFS = getWalFileSystem();
4292-
Path regionDir = getWALRegionDir();
42934294
FileSystem rootFS = getFilesystem();
4294-
Path defaultRegionDir = getRegionDir(FSUtils.getRootDir(conf), getRegionInfo());
4295+
Path regionDir = FSUtils.getRegionDirFromRootDir(FSUtils.getRootDir(conf), getRegionInfo());
4296+
Path regionWALDir = getWALRegionDir();
4297+
Path wrongRegionWALDir = FSUtils.getWrongWALRegionDir(conf, getRegionInfo().getTable(),
4298+
getRegionInfo().getEncodedName());
42954299

4300+
// We made a mistake in HBASE-20734 so we need to do this dirty hack...
4301+
NavigableSet<Path> filesUnderWrongRegionWALDir =
4302+
WALSplitter.getSplitEditFilesSorted(walFS, wrongRegionWALDir);
4303+
seqId = Math.max(seqId, replayRecoveredEditsForPaths(minSeqIdForTheRegion, walFS,
4304+
filesUnderWrongRegionWALDir, reporter, regionDir));
42964305
// This is to ensure backwards compatability with HBASE-20723 where recovered edits can appear
42974306
// under the root dir even if walDir is set.
4298-
NavigableSet<Path> filesUnderRootDir = null;
4299-
if (!regionDir.equals(defaultRegionDir)) {
4300-
filesUnderRootDir =
4301-
WALSplitter.getSplitEditFilesSorted(rootFS, defaultRegionDir);
4302-
seqid = Math.max(seqid,
4303-
replayRecoveredEditsForPaths(minSeqIdForTheRegion, rootFS, filesUnderRootDir, reporter,
4304-
defaultRegionDir));
4305-
}
4306-
NavigableSet<Path> files = WALSplitter.getSplitEditFilesSorted(walFS, regionDir);
4307-
seqid = Math.max(seqid, replayRecoveredEditsForPaths(minSeqIdForTheRegion, walFS,
4308-
files, reporter, regionDir));
4309-
4310-
if (seqid > minSeqIdForTheRegion) {
4307+
NavigableSet<Path> filesUnderRootDir = Sets.newTreeSet();
4308+
if (!regionWALDir.equals(regionDir)) {
4309+
filesUnderRootDir = WALSplitter.getSplitEditFilesSorted(rootFS, regionDir);
4310+
seqId = Math.max(seqId, replayRecoveredEditsForPaths(minSeqIdForTheRegion, rootFS,
4311+
filesUnderRootDir, reporter, regionDir));
4312+
}
4313+
NavigableSet<Path> files = WALSplitter.getSplitEditFilesSorted(walFS, regionWALDir);
4314+
seqId = Math.max(seqId, replayRecoveredEditsForPaths(minSeqIdForTheRegion, walFS,
4315+
files, reporter, regionWALDir));
4316+
if (seqId > minSeqIdForTheRegion) {
43114317
// Then we added some edits to memory. Flush and cleanup split edit files.
4312-
internalFlushcache(null, seqid, stores.values(), status, false);
4318+
internalFlushcache(null, seqId, stores.values(), status, false);
43134319
}
4314-
// Now delete the content of recovered edits. We're done w/ them.
4315-
if (files.size() > 0 && this.conf.getBoolean("hbase.region.archive.recovered.edits", false)) {
4320+
// Now delete the content of recovered edits. We're done w/ them.
4321+
if (conf.getBoolean("hbase.region.archive.recovered.edits", false)) {
43164322
// For debugging data loss issues!
43174323
// If this flag is set, make use of the hfile archiving by making recovered.edits a fake
43184324
// column family. Have to fake out file type too by casting our recovered.edits as storefiles
4319-
String fakeFamilyName = WALSplitter.getRegionDirRecoveredEditsDir(regionDir).getName();
4320-
Set<StoreFile> fakeStoreFiles = new HashSet<>(files.size());
4321-
for (Path file: files) {
4322-
fakeStoreFiles.add(
4323-
new StoreFile(walFS, file, this.conf, null, null));
4325+
String fakeFamilyName = WALSplitter.getRegionDirRecoveredEditsDir(regionWALDir).getName();
4326+
Set<StoreFile> fakeStoreFiles = new HashSet<>();
4327+
for (Path file: Iterables.concat(files, filesUnderWrongRegionWALDir)) {
4328+
fakeStoreFiles.add(new StoreFile(walFS, file, conf, null, null));
4329+
}
4330+
for (Path file: filesUnderRootDir) {
4331+
fakeStoreFiles.add(new StoreFile(rootFS, file, conf, null, null));
43244332
}
43254333
getRegionWALFileSystem().removeStoreFiles(fakeFamilyName, fakeStoreFiles);
43264334
} else {
4327-
if (filesUnderRootDir != null) {
4328-
for (Path file : filesUnderRootDir) {
4329-
if (!rootFS.delete(file, false)) {
4330-
LOG.error("Failed delete of {} under root directory." + file);
4331-
} else {
4332-
LOG.debug("Deleted recovered.edits root directory file=" + file);
4333-
}
4335+
for (Path file : filesUnderRootDir) {
4336+
if (!rootFS.delete(file, false)) {
4337+
LOG.error("Failed delete of " + file + " from under the root directory");
4338+
} else {
4339+
LOG.debug("Deleted recovered.edits under root directory, file=" + file);
43344340
}
43354341
}
4336-
for (Path file: files) {
4342+
for (Path file : Iterables.concat(files, filesUnderWrongRegionWALDir)) {
43374343
if (!walFS.delete(file, false)) {
43384344
LOG.error("Failed delete of " + file);
43394345
} else {
43404346
LOG.debug("Deleted recovered.edits file=" + file);
43414347
}
43424348
}
43434349
}
4344-
return seqid;
4350+
4351+
// We have replayed all the recovered edits. Let's delete the wrong directories introduced
4352+
// in HBASE-20734, see HBASE-22617 for more details.
4353+
FileSystem walFs = getWalFileSystem();
4354+
if (walFs.exists(wrongRegionWALDir)) {
4355+
if (!walFs.delete(wrongRegionWALDir, true)) {
4356+
LOG.warn("Unable to delete " + wrongRegionWALDir);
4357+
}
4358+
}
4359+
4360+
return seqId;
43454361
}
43464362

43474363
private long replayRecoveredEditsForPaths(long minSeqIdForTheRegion, FileSystem fs,
@@ -7206,34 +7222,6 @@ public static void addRegionToMETA(final HRegion meta, final HRegion r) throws I
72067222
meta.put(row, HConstants.CATALOG_FAMILY, cells);
72077223
}
72087224

7209-
/**
7210-
* Computes the Path of the HRegion
7211-
*
7212-
* @param tabledir qualified path for table
7213-
* @param name ENCODED region name
7214-
* @return Path of HRegion directory
7215-
* @deprecated For tests only; to be removed.
7216-
*/
7217-
@Deprecated
7218-
public static Path getRegionDir(final Path tabledir, final String name) {
7219-
return new Path(tabledir, name);
7220-
}
7221-
7222-
/**
7223-
* Computes the Path of the HRegion
7224-
*
7225-
* @param rootdir qualified path of HBase root directory
7226-
* @param info HRegionInfo for the region
7227-
* @return qualified path of region directory
7228-
* @deprecated For tests only; to be removed.
7229-
*/
7230-
@Deprecated
7231-
@VisibleForTesting
7232-
public static Path getRegionDir(final Path rootdir, final HRegionInfo info) {
7233-
return new Path(
7234-
FSUtils.getTableDir(rootdir, info.getTable()), info.getEncodedName());
7235-
}
7236-
72377225
/**
72387226
* Determines if the specified row is within the row range specified by the
72397227
* specified HRegionInfo

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -640,19 +640,26 @@ Path commitDaughterRegion(final HRegionInfo regionInfo)
640640
/**
641641
* Create the region splits directory.
642642
*/
643-
void createSplitsDir() throws IOException {
643+
void createSplitsDir(HRegionInfo daughterA, HRegionInfo daughterB) throws IOException {
644644
Path splitdir = getSplitsDir();
645645
if (fs.exists(splitdir)) {
646646
LOG.info("The " + splitdir + " directory exists. Hence deleting it to recreate it");
647647
if (!deleteDir(splitdir)) {
648-
throw new IOException("Failed deletion of " + splitdir
649-
+ " before creating them again.");
648+
throw new IOException("Failed deletion of " + splitdir + " before creating them again.");
650649
}
651650
}
652651
// splitDir doesn't exists now. No need to do an exists() call for it.
653652
if (!createDir(splitdir)) {
654653
throw new IOException("Failed create of " + splitdir);
655654
}
655+
Path daughterATmpDir = getSplitsDir(daughterA);
656+
if (!createDir(daughterATmpDir)) {
657+
throw new IOException("Failed create of " + daughterATmpDir);
658+
}
659+
Path daughterBTmpDir = getSplitsDir(daughterB);
660+
if (!createDir(daughterBTmpDir)) {
661+
throw new IOException("Failed create of " + daughterBTmpDir);
662+
}
656663
}
657664

658665
/**

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransactionImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ public PairOfSameType<Region> stepsBeforePONR(final Server server,
351351
hri_b, std);
352352
}
353353

354-
this.parent.getRegionFileSystem().createSplitsDir();
354+
this.parent.getRegionFileSystem().createSplitsDir(hri_a, hri_b);
355355

356356
transition(SplitTransactionPhase.CREATE_SPLIT_DIR);
357357

hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -648,8 +648,9 @@ private void restoreReferenceFile(final Path familyDir, final HRegionInfo region
648648
if (linkPath != null) {
649649
in = HFileLink.buildFromHFileLinkPattern(conf, linkPath).open(fs);
650650
} else {
651-
linkPath = new Path(new Path(HRegion.getRegionDir(snapshotManifest.getSnapshotDir(),
652-
regionInfo.getEncodedName()), familyDir.getName()), hfileName);
651+
linkPath = new Path(new Path(new Path(snapshotManifest.getSnapshotDir(),
652+
regionInfo.getEncodedName()),
653+
familyDir.getName()), hfileName);
653654
in = fs.open(linkPath);
654655
}
655656
OutputStream out = fs.create(outPath);

hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@
8484
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
8585
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
8686
import org.apache.hadoop.hbase.protobuf.generated.FSProtos;
87-
import org.apache.hadoop.hbase.regionserver.HRegion;
8887
import org.apache.hadoop.hdfs.DistributedFileSystem;
8988
import org.apache.hadoop.io.IOUtils;
9089
import org.apache.hadoop.io.SequenceFile;
@@ -708,26 +707,36 @@ public static void waitOnSafeMode(final Configuration conf,
708707
* @return the region directory used to store WALs under the WALRootDir
709708
* @throws IOException if there is an exception determining the WALRootDir
710709
*/
711-
public static Path getWALRegionDir(final Configuration conf,
712-
final HRegionInfo regionInfo)
713-
throws IOException {
710+
public static Path getWALRegionDir(final Configuration conf, final HRegionInfo regionInfo)
711+
throws IOException {
714712
return new Path(getWALTableDir(conf, regionInfo.getTable()),
715713
regionInfo.getEncodedName());
716714
}
717715

716+
/**
717+
* Returns the WAL region directory based on the region info
718+
* @param conf configuration to determine WALRootDir
719+
* @param tableName the table name
720+
* @param encodedRegionName the encoded region name
721+
* @return the region directory used to store WALs under the WALRootDir
722+
* @throws IOException if there is an exception determining the WALRootDir
723+
*/
724+
public static Path getWALRegionDir(final Configuration conf, final TableName tableName,
725+
final String encodedRegionName) throws IOException {
726+
return new Path(getWALTableDir(conf, tableName), encodedRegionName);
727+
}
728+
718729
/**
719730
* Checks if meta region exists
720731
*
721732
* @param fs file system
722-
* @param rootdir root directory of HBase installation
733+
* @param rootDir root directory of HBase installation
723734
* @return true if exists
724735
* @throws IOException e
725736
*/
726737
@SuppressWarnings("deprecation")
727-
public static boolean metaRegionExists(FileSystem fs, Path rootdir)
728-
throws IOException {
729-
Path metaRegionDir =
730-
HRegion.getRegionDir(rootdir, HRegionInfo.FIRST_META_REGIONINFO);
738+
public static boolean metaRegionExists(FileSystem fs, Path rootDir) throws IOException {
739+
Path metaRegionDir = getRegionDirFromRootDir(rootDir, HRegionInfo.FIRST_META_REGIONINFO);
731740
return fs.exists(metaRegionDir);
732741
}
733742

@@ -861,8 +870,22 @@ public static Map<String, Integer> getTableFragmentation(
861870
*/
862871
public static Path getWALTableDir(final Configuration conf, final TableName tableName)
863872
throws IOException {
864-
return new Path(new Path(getWALRootDir(conf), tableName.getNamespaceAsString()),
873+
Path baseDir = new Path(getWALRootDir(conf), HConstants.BASE_NAMESPACE_DIR);
874+
return new Path(new Path(baseDir, tableName.getNamespaceAsString()),
875+
tableName.getQualifierAsString());
876+
}
877+
878+
/**
879+
* For backward compatibility with HBASE-20734, where we store recovered edits in a wrong
880+
* directory without BASE_NAMESPACE_DIR. See HBASE-22617 for more details.
881+
* @deprecated For compatibility, will be removed in 4.0.0.
882+
*/
883+
@Deprecated
884+
public static Path getWrongWALRegionDir(final Configuration conf, final TableName tableName,
885+
final String encodedRegionName) throws IOException {
886+
Path wrongTableDir = new Path(new Path(getWALRootDir(conf), tableName.getNamespaceAsString()),
865887
tableName.getQualifierAsString());
888+
return new Path(wrongTableDir, encodedRegionName);
866889
}
867890

868891
/**
@@ -1063,6 +1086,14 @@ protected boolean accept(Path p, @CheckForNull Boolean isDir) {
10631086
}
10641087
}
10651088

1089+
public static Path getRegionDirFromRootDir(Path rootDir, HRegionInfo region) {
1090+
return getRegionDirFromTableDir(getTableDir(rootDir, region.getTable()), region);
1091+
}
1092+
1093+
public static Path getRegionDirFromTableDir(Path tableDir, HRegionInfo region) {
1094+
return new Path(tableDir, ServerRegionReplicaUtil.getRegionInfoForFs(region).getEncodedName());
1095+
}
1096+
10661097
/**
10671098
* Given a particular table dir, return all the regiondirs inside it, excluding files such as
10681099
* .tableinfo

0 commit comments

Comments
 (0)