Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.ScheduledChore;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Get;
Expand Down Expand Up @@ -184,12 +183,12 @@ public int scan() throws IOException {
for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) {
if (this.services.isInMaintenanceMode()) {
// Stop cleaning if the master is in maintenance mode
LOG.debug("In maintenence mode, not cleaning");
LOG.debug("In maintenance mode, not cleaning");
break;
}

List<RegionInfo> parents = CatalogFamilyFormat.getMergeRegions(e.getValue().rawCells());
if (parents != null && cleanMergeRegion(e.getKey(), parents)) {
if (parents != null && cleanMergeRegion(this.services, e.getKey(), parents)) {
gcs++;
}
}
Expand All @@ -202,7 +201,7 @@ public int scan() throws IOException {
if (this.services.isInMaintenanceMode()) {
// Stop cleaning if the master is in maintenance mode
if (LOG.isDebugEnabled()) {
LOG.debug("In maintenence mode, not cleaning");
LOG.debug("In maintenance mode, not cleaning");
}
break;
}
Expand Down Expand Up @@ -249,30 +248,24 @@ public CatalogJanitorReport getLastReport() {
* @return true if we delete references in merged region on hbase:meta and archive the files on
* the file system
*/
private boolean cleanMergeRegion(final RegionInfo mergedRegion, List<RegionInfo> parents)
throws IOException {
static boolean cleanMergeRegion(MasterServices services, final RegionInfo mergedRegion,
List<RegionInfo> parents) throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("Cleaning merged region {}", mergedRegion);
}
FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
Path rootdir = this.services.getMasterFileSystem().getRootDir();
Path tabledir = CommonFSUtils.getTableDir(rootdir, mergedRegion.getTable());
TableDescriptor htd = getDescriptor(mergedRegion.getTable());
HRegionFileSystem regionFs = null;
try {
regionFs = HRegionFileSystem.openRegionFromFileSystem(this.services.getConfiguration(), fs,
tabledir, mergedRegion, true);
} catch (IOException e) {
LOG.warn("Merged region does not exist: " + mergedRegion.getEncodedName());
}
if (regionFs == null || !regionFs.hasReferences(htd)) {

Pair<Boolean, Boolean> result =
checkRegionReferences(services, mergedRegion.getTable(), mergedRegion);

if (hasNoReferences(result)) {
if (LOG.isDebugEnabled()) {
LOG.debug(
"Deleting parents ({}) from fs; merged child {} no longer holds references", parents
.stream().map(r -> RegionInfo.getShortNameToLog(r)).collect(Collectors.joining(", ")),
mergedRegion);
}
ProcedureExecutor<MasterProcedureEnv> pe = this.services.getMasterProcedureExecutor();

ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor();
GCMultipleMergedRegionsProcedure mergeRegionProcedure =
new GCMultipleMergedRegionsProcedure(pe.getEnvironment(), mergedRegion, parents);
pe.submitProcedure(mergeRegionProcedure);
Expand All @@ -281,7 +274,15 @@ private boolean cleanMergeRegion(final RegionInfo mergedRegion, List<RegionInfo>
mergedRegion);
}
return true;
} else {
if (LOG.isDebugEnabled()) {
LOG.debug(
"Deferring cleanup up of {} parents of merged region {}, because references "
+ "still exist in merged region or we encountered an exception in checking",
parents.size(), mergedRegion.getEncodedName());
}
}

return false;
}

Expand Down Expand Up @@ -333,8 +334,10 @@ static boolean cleanParent(MasterServices services, RegionInfo parent, Result ro
}
// Run checks on each daughter split.
PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(rowContent);
Pair<Boolean, Boolean> a = checkDaughterInFs(services, parent, daughters.getFirst());
Pair<Boolean, Boolean> b = checkDaughterInFs(services, parent, daughters.getSecond());
Pair<Boolean, Boolean> a =
checkRegionReferences(services, parent.getTable(), daughters.getFirst());
Pair<Boolean, Boolean> b =
checkRegionReferences(services, parent.getTable(), daughters.getSecond());
if (hasNoReferences(a) && hasNoReferences(b)) {
String daughterA =
daughters.getFirst() != null ? daughters.getFirst().getShortNameToLog() : "null";
Expand Down Expand Up @@ -387,59 +390,45 @@ private static boolean hasNoReferences(final Pair<Boolean, Boolean> p) {
}

/**
* Checks if a daughter region -- either splitA or splitB -- still holds references to parent.
* @param parent Parent region
* @param daughter Daughter region
* @return A pair where the first boolean says whether or not the daughter region directory exists
* in the filesystem and then the second boolean says whether the daughter has references
* to the parent.
* Checks if a region still holds references to parent.
* @param tableName The table for the region
* @param region The region to check
* @return A pair where the first boolean says whether the region directory exists in the
* filesystem and then the second boolean says whether the region has references to a
* parent.
*/
private static Pair<Boolean, Boolean> checkDaughterInFs(MasterServices services,
final RegionInfo parent, final RegionInfo daughter) throws IOException {
if (daughter == null) {
private static Pair<Boolean, Boolean> checkRegionReferences(MasterServices services,
TableName tableName, RegionInfo region) throws IOException {
if (region == null) {
return new Pair<>(Boolean.FALSE, Boolean.FALSE);
}

FileSystem fs = services.getMasterFileSystem().getFileSystem();
Path rootdir = services.getMasterFileSystem().getRootDir();
Path tabledir = CommonFSUtils.getTableDir(rootdir, daughter.getTable());

Path daughterRegionDir = new Path(tabledir, daughter.getEncodedName());

HRegionFileSystem regionFs;
Path tabledir = CommonFSUtils.getTableDir(rootdir, tableName);
Path regionDir = new Path(tabledir, region.getEncodedName());

try {
if (!CommonFSUtils.isExists(fs, daughterRegionDir)) {
if (!CommonFSUtils.isExists(fs, regionDir)) {
return new Pair<>(Boolean.FALSE, Boolean.FALSE);
}
} catch (IOException ioe) {
LOG.error("Error trying to determine if daughter region exists, "
+ "assuming exists and has references", ioe);
LOG.error("Error trying to determine if region exists, assuming exists and has references",
ioe);
return new Pair<>(Boolean.TRUE, Boolean.TRUE);
Comment on lines +416 to 418
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this is the big difference b/ the two implementations right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct. Previously we'd treat this error as "lets assume it doesn't exist and go forward with cleanup". This assumption was inaccurate in at least some failure modes, so unsafe. The new assumption is "we don't know if the region exists, so let's not risk cleaning up". This is safer, but the downside is we might wait longer to GC merged regions. I think that's not a problem at all.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is safer, but the downside is we might wait longer to GC merged regions. I think that's not a problem at all.

+1

}

boolean references = false;
TableDescriptor parentDescriptor = services.getTableDescriptors().get(parent.getTable());
TableDescriptor tableDescriptor = services.getTableDescriptors().get(tableName);
try {
regionFs = HRegionFileSystem.openRegionFromFileSystem(services.getConfiguration(), fs,
tabledir, daughter, true);

for (ColumnFamilyDescriptor family : parentDescriptor.getColumnFamilies()) {
references = regionFs.hasReferences(family.getNameAsString());
if (references) {
break;
}
}
HRegionFileSystem regionFs = HRegionFileSystem
.openRegionFromFileSystem(services.getConfiguration(), fs, tabledir, region, true);
boolean references = regionFs.hasReferences(tableDescriptor);
return new Pair<>(Boolean.TRUE, references);
} catch (IOException e) {
LOG.error("Error trying to determine referenced files from : " + daughter.getEncodedName()
+ ", to: " + parent.getEncodedName() + " assuming has references", e);
LOG.error("Error trying to determine if region {} has references, assuming it does",
region.getEncodedName(), e);
return new Pair<>(Boolean.TRUE, Boolean.TRUE);
}
return new Pair<>(Boolean.TRUE, references);
}

private TableDescriptor getDescriptor(final TableName tableName) throws IOException {
return this.services.getTableDescriptors().get(tableName);
}

private void updateAssignmentManagerMetrics() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,12 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.any;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.when;

import java.io.IOException;
import java.util.ArrayList;
Expand All @@ -34,6 +38,7 @@
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
Expand All @@ -52,6 +57,7 @@
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.io.Reference;
import org.apache.hadoop.hbase.master.MasterFileSystem;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.assignment.MockMasterServices;
import org.apache.hadoop.hbase.master.janitor.CatalogJanitor.SplitParentFirstComparator;
Expand Down Expand Up @@ -126,6 +132,100 @@ private RegionInfo createRegionInfo(TableName tableName, byte[] startKey, byte[]
.setSplit(split).build();
}

@Test
public void testCleanMerge() throws IOException {
TableDescriptor td = createTableDescriptorForCurrentMethod();
// Create regions.
RegionInfo merged =
createRegionInfo(td.getTableName(), Bytes.toBytes("aaa"), Bytes.toBytes("eee"));
RegionInfo parenta =
createRegionInfo(td.getTableName(), Bytes.toBytes("aaa"), Bytes.toBytes("ccc"));
RegionInfo parentb =
createRegionInfo(td.getTableName(), Bytes.toBytes("ccc"), Bytes.toBytes("eee"));

List<RegionInfo> parents = new ArrayList<>();
parents.add(parenta);
parents.add(parentb);

Path rootdir = this.masterServices.getMasterFileSystem().getRootDir();
Path tabledir = CommonFSUtils.getTableDir(rootdir, td.getTableName());
Path storedir =
HRegionFileSystem.getStoreHomedir(tabledir, merged, td.getColumnFamilies()[0].getName());

Path parentaRef = createMergeReferenceFile(storedir, merged, parenta);
Path parentbRef = createMergeReferenceFile(storedir, merged, parentb);

// references exist, should not clean
assertFalse(CatalogJanitor.cleanMergeRegion(masterServices, merged, parents));

masterServices.getMasterFileSystem().getFileSystem().delete(parentaRef, false);

// one reference still exists, should not clean
assertFalse(CatalogJanitor.cleanMergeRegion(masterServices, merged, parents));

masterServices.getMasterFileSystem().getFileSystem().delete(parentbRef, false);

// all references removed, should clean
assertTrue(CatalogJanitor.cleanMergeRegion(masterServices, merged, parents));
}

@Test
public void testDontCleanMergeIfFileSystemException() throws IOException {
TableDescriptor td = createTableDescriptorForCurrentMethod();
// Create regions.
RegionInfo merged =
createRegionInfo(td.getTableName(), Bytes.toBytes("aaa"), Bytes.toBytes("eee"));
RegionInfo parenta =
createRegionInfo(td.getTableName(), Bytes.toBytes("aaa"), Bytes.toBytes("ccc"));
RegionInfo parentb =
createRegionInfo(td.getTableName(), Bytes.toBytes("ccc"), Bytes.toBytes("eee"));

List<RegionInfo> parents = new ArrayList<>();
parents.add(parenta);
parents.add(parentb);

Path rootdir = this.masterServices.getMasterFileSystem().getRootDir();
Path tabledir = CommonFSUtils.getTableDir(rootdir, td.getTableName());
Path storedir =
HRegionFileSystem.getStoreHomedir(tabledir, merged, td.getColumnFamilies()[0].getName());
createMergeReferenceFile(storedir, merged, parenta);

MasterServices mockedMasterServices = spy(masterServices);
MasterFileSystem mockedMasterFileSystem = spy(masterServices.getMasterFileSystem());
FileSystem mockedFileSystem = spy(masterServices.getMasterFileSystem().getFileSystem());

when(mockedMasterServices.getMasterFileSystem()).thenReturn(mockedMasterFileSystem);
when(mockedMasterFileSystem.getFileSystem()).thenReturn(mockedFileSystem);

// throw on the first exists check
doThrow(new IOException("Some exception")).when(mockedFileSystem).exists(any());

assertFalse(CatalogJanitor.cleanMergeRegion(mockedMasterServices, merged, parents));

// throw on the second exists check (within HRegionfileSystem)
AtomicBoolean returned = new AtomicBoolean(false);
doAnswer(invocationOnMock -> {
if (!returned.get()) {
returned.set(true);
return masterServices.getMasterFileSystem().getFileSystem()
.exists(invocationOnMock.getArgument(0));
}
throw new IOException("Some exception");
}).when(mockedFileSystem).exists(any());

assertFalse(CatalogJanitor.cleanMergeRegion(mockedMasterServices, merged, parents));
}

private Path createMergeReferenceFile(Path storeDir, RegionInfo mergedRegion,
RegionInfo parentRegion) throws IOException {
Reference ref = Reference.createTopReference(mergedRegion.getStartKey());
long now = EnvironmentEdgeManager.currentTime();
// Reference name has this format: StoreFile#REF_NAME_PARSER
Path p = new Path(storeDir, Long.toString(now) + "." + parentRegion.getEncodedName());
FileSystem fs = this.masterServices.getMasterFileSystem().getFileSystem();
return ref.write(fs, p);
}

/**
* Test clearing a split parent.
*/
Expand Down