Skip to content

Commit 03e3c14

Browse files
committed
HBASE-21070 Fix SnapshotFileCache for HBase backed by S3
SnapshotFileCache depends on getting the last modified time of the snapshot directory, however, S3 FileSystem's do not update the last modified time of the top 'folder' when objects are added/removed.
1 parent 3f40df8 commit 03e3c14

File tree

2 files changed

+98
-71
lines changed

2 files changed

+98
-71
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java

Lines changed: 57 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
*/
1818
package org.apache.hadoop.hbase.master.snapshot;
1919

20-
import java.io.FileNotFoundException;
2120
import java.io.IOException;
2221
import java.util.Collection;
2322
import java.util.HashMap;
@@ -41,6 +40,7 @@
4140
import org.slf4j.Logger;
4241
import org.slf4j.LoggerFactory;
4342

43+
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
4444
import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
4545

4646
/**
@@ -89,12 +89,13 @@ interface SnapshotFileInspector {
8989
private final FileSystem fs;
9090
private final SnapshotFileInspector fileInspector;
9191
private final Path snapshotDir;
92-
private final Set<String> cache = new HashSet<>();
92+
private Set<String> cache = new HashSet<>();
9393
/**
9494
* This is a helper map of information about the snapshot directories so we don't need to rescan
9595
* them if they haven't changed since the last time we looked.
9696
*/
97-
private final Map<String, SnapshotDirectoryInfo> snapshots = new HashMap<>();
97+
private Map<String, SnapshotDirectoryInfo> snapshots =
98+
new HashMap<String, SnapshotDirectoryInfo>();
9899
private final Timer refreshTimer;
99100

100101
private long lastModifiedTime = Long.MIN_VALUE;
@@ -214,67 +215,70 @@ public synchronized Iterable<FileStatus> getUnreferencedFiles(Iterable<FileStatu
214215

215216
private synchronized void refreshCache() throws IOException {
216217
// get the status of the snapshots directory and check if it is has changes
217-
FileStatus dirStatus;
218-
try {
219-
dirStatus = fs.getFileStatus(snapshotDir);
220-
} catch (FileNotFoundException e) {
221-
if (this.cache.size() > 0) {
222-
LOG.error("Snapshot directory: " + snapshotDir + " doesn't exist");
218+
// We need to check the internal folder as Object Stores do not always update
219+
// the parent directory's modification time.
220+
FileStatus[] snapshotsOnDisk = FSUtils.listStatus(fs, snapshotDir,
221+
path -> !path.getName().equals(SnapshotDescriptionUtils.SNAPSHOT_TMP_DIR_NAME));
222+
if (snapshotsOnDisk == null) {
223+
// remove all the remembered snapshots because we don't have any left
224+
if (LOG.isDebugEnabled() && this.snapshots.size() > 0) {
225+
LOG.debug("No snapshots on-disk under: {}, cache empty", snapshotDir);
223226
}
227+
this.snapshots.clear();
228+
this.cache.clear();
224229
return;
225230
}
226231

232+
long subDirLastModifiedTime = Long.MIN_VALUE;
233+
for (FileStatus fileStatus : snapshotsOnDisk) {
234+
if (fileStatus.getModificationTime() > subDirLastModifiedTime) {
235+
subDirLastModifiedTime = fileStatus.getModificationTime();
236+
}
237+
}
238+
227239
// if the snapshot directory wasn't modified since we last check, we are done
228-
if (dirStatus.getModificationTime() <= this.lastModifiedTime) return;
240+
if (subDirLastModifiedTime <= this.lastModifiedTime &&
241+
snapshotsOnDisk.length == this.snapshots.size()) {
242+
LOG.debug("Snapshot directory has not been modified since the last refresh, " +
243+
"skipping cache refresh");
244+
return;
245+
}
229246

230247
// directory was modified, so we need to reload our cache
231248
// there could be a slight race here where we miss the cache, check the directory modification
232249
// time, then someone updates the directory, causing us to not scan the directory again.
233250
// However, snapshot directories are only created once, so this isn't an issue.
251+
LOG.debug("Snapshot Directory: {} has changed since last run, refreshing SnapshotFileCache.",
252+
snapshotDir);
234253

235-
// 1. update the modified time
236-
this.lastModifiedTime = dirStatus.getModificationTime();
254+
// 1. Create a temp cache
255+
final Set<String> tempCache = new HashSet<String>();
256+
Map<String, SnapshotDirectoryInfo> known = new HashMap<String, SnapshotDirectoryInfo>();
237257

238-
// 2.clear the cache
239-
this.cache.clear();
240-
Map<String, SnapshotDirectoryInfo> known = new HashMap<>();
241-
242-
// 3. check each of the snapshot directories
243-
FileStatus[] snapshots = FSUtils.listStatus(fs, snapshotDir);
244-
if (snapshots == null) {
245-
// remove all the remembered snapshots because we don't have any left
246-
if (LOG.isDebugEnabled() && this.snapshots.size() > 0) {
247-
LOG.debug("No snapshots on-disk, cache empty");
248-
}
249-
this.snapshots.clear();
250-
return;
251-
}
252-
253-
// 3.1 iterate through the on-disk snapshots
254-
for (FileStatus snapshot : snapshots) {
258+
// 2. iterate through the on-disk snapshots
259+
for (FileStatus snapshot : snapshotsOnDisk) {
255260
String name = snapshot.getPath().getName();
256-
// its not the tmp dir,
257-
if (!name.equals(SnapshotDescriptionUtils.SNAPSHOT_TMP_DIR_NAME)) {
258-
SnapshotDirectoryInfo files = this.snapshots.remove(name);
259-
// 3.1.1 if we don't know about the snapshot or its been modified, we need to update the
260-
// files the latter could occur where I create a snapshot, then delete it, and then make a
261-
// new snapshot with the same name. We will need to update the cache the information from
262-
// that new snapshot, even though it has the same name as the files referenced have
263-
// probably changed.
264-
if (files == null || files.hasBeenModified(snapshot.getModificationTime())) {
265-
// get all files for the snapshot and create a new info
266-
Collection<String> storedFiles = fileInspector.filesUnderSnapshot(snapshot.getPath());
267-
files = new SnapshotDirectoryInfo(snapshot.getModificationTime(), storedFiles);
268-
}
269-
// 3.2 add all the files to cache
270-
this.cache.addAll(files.getFiles());
271-
known.put(name, files);
261+
SnapshotDirectoryInfo files = this.snapshots.remove(name);
262+
// 2.1 if we don't know about the snapshot or its been modified, we need to update the
263+
// files the latter could occur where I create a snapshot, then delete it, and then make a
264+
// new snapshot with the same name. We will need to update the cache the information from
265+
// that new snapshot, even though it has the same name as the files referenced have
266+
// probably changed.
267+
if (files == null || files.hasBeenModified(snapshot.getModificationTime())) {
268+
// get all files for the snapshot and create a new info
269+
Collection<String> storedFiles = fileInspector.filesUnderSnapshot(snapshot.getPath());
270+
files = new SnapshotDirectoryInfo(snapshot.getModificationTime(), storedFiles);
272271
}
272+
// 2.2 add all the files to cache
273+
tempCache.addAll(files.getFiles());
274+
known.put(name, files);
273275
}
274276

275-
// 4. set the snapshots we are tracking
276-
this.snapshots.clear();
277-
this.snapshots.putAll(known);
277+
this.lastModifiedTime = subDirLastModifiedTime;
278+
279+
// 3. Update the cache and snapshot objects
280+
this.cache = tempCache;
281+
this.snapshots = known;
278282
}
279283

280284
/**
@@ -305,6 +309,11 @@ public boolean isStopped() {
305309
return this.stop;
306310
}
307311

312+
@VisibleForTesting
313+
long getLastModifiedTime() {
314+
return lastModifiedTime;
315+
}
316+
308317
/**
309318
* Information about a snapshot directory
310319
*/

hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotFileCache.java

Lines changed: 41 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,18 @@ public class TestSnapshotFileCache {
5858

5959
private static final Logger LOG = LoggerFactory.getLogger(TestSnapshotFileCache.class);
6060
private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
61+
// don't refresh the cache unless we tell it to
62+
private static final long PERIOD = Long.MAX_VALUE;
6163
private static FileSystem fs;
6264
private static Path rootDir;
65+
private static Path snapshotDir;
6366

6467
@BeforeClass
6568
public static void startCluster() throws Exception {
6669
UTIL.startMiniDFSCluster(1);
6770
fs = UTIL.getDFSCluster().getFileSystem();
6871
rootDir = UTIL.getDefaultRootDirPath();
72+
snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir);
6973
}
7074

7175
@AfterClass
@@ -76,48 +80,57 @@ public static void stopCluster() throws Exception {
7680
@After
7781
public void cleanupFiles() throws Exception {
7882
// cleanup the snapshot directory
79-
Path snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir);
8083
fs.delete(snapshotDir, true);
8184
}
8285

8386
@Test
8487
public void testLoadAndDelete() throws IOException {
85-
// don't refresh the cache unless we tell it to
86-
long period = Long.MAX_VALUE;
87-
SnapshotFileCache cache = new SnapshotFileCache(fs, rootDir, period, 10000000,
88+
SnapshotFileCache cache = new SnapshotFileCache(fs, rootDir, PERIOD, 10000000,
8889
"test-snapshot-file-cache-refresh", new SnapshotFiles());
8990

90-
createAndTestSnapshotV1(cache, "snapshot1a", false, true);
91+
createAndTestSnapshotV1(cache, "snapshot1a", false, true, false);
9192

92-
createAndTestSnapshotV2(cache, "snapshot2a", false, true);
93+
createAndTestSnapshotV2(cache, "snapshot2a", false, true, false);
9394
}
9495

9596
@Test
9697
public void testReloadModifiedDirectory() throws IOException {
97-
// don't refresh the cache unless we tell it to
98-
long period = Long.MAX_VALUE;
99-
SnapshotFileCache cache = new SnapshotFileCache(fs, rootDir, period, 10000000,
98+
SnapshotFileCache cache = new SnapshotFileCache(fs, rootDir, PERIOD, 10000000,
10099
"test-snapshot-file-cache-refresh", new SnapshotFiles());
101100

102-
createAndTestSnapshotV1(cache, "snapshot1", false, true);
101+
createAndTestSnapshotV1(cache, "snapshot1", false, true, false);
103102
// now delete the snapshot and add a file with a different name
104-
createAndTestSnapshotV1(cache, "snapshot1", false, false);
103+
createAndTestSnapshotV1(cache, "snapshot1", false, false, false);
105104

106-
createAndTestSnapshotV2(cache, "snapshot2", false, true);
105+
createAndTestSnapshotV2(cache, "snapshot2", false, true, false);
107106
// now delete the snapshot and add a file with a different name
108-
createAndTestSnapshotV2(cache, "snapshot2", false, false);
107+
createAndTestSnapshotV2(cache, "snapshot2", false, false, false);
109108
}
110109

111110
@Test
112111
public void testSnapshotTempDirReload() throws IOException {
113-
long period = Long.MAX_VALUE;
114-
// This doesn't refresh cache until we invoke it explicitly
115-
SnapshotFileCache cache = new SnapshotFileCache(fs, rootDir, period, 10000000,
112+
SnapshotFileCache cache =
113+
new SnapshotFileCache(fs, rootDir, PERIOD, 10000000, "test-snapshot-file-cache-refresh", new SnapshotFiles());
114+
115+
// Add a new non-tmp snapshot
116+
createAndTestSnapshotV1(cache, "snapshot0v1", false, false, false);
117+
createAndTestSnapshotV1(cache, "snapshot0v2", false, false, false);
118+
}
119+
120+
@Test
121+
public void testCacheUpdatedWhenLastModifiedOfSnapDirNotUpdated() throws IOException {
122+
SnapshotFileCache cache = new SnapshotFileCache(fs, rootDir, PERIOD, 10000000,
116123
"test-snapshot-file-cache-refresh", new SnapshotFiles());
117124

118125
// Add a new non-tmp snapshot
119-
createAndTestSnapshotV1(cache, "snapshot0v1", false, false);
120-
createAndTestSnapshotV1(cache, "snapshot0v2", false, false);
126+
createAndTestSnapshotV1(cache, "snapshot1v1", false, false, true);
127+
createAndTestSnapshotV1(cache, "snapshot1v2", false, false, true);
128+
129+
// Add a new tmp snapshot
130+
createAndTestSnapshotV2(cache, "snapshot2v1", true, false, true);
131+
132+
// Add another tmp snapshot
133+
createAndTestSnapshotV2(cache, "snapshot2v2", true, false, true);
121134
}
122135

123136
class SnapshotFiles implements SnapshotFileCache.SnapshotFileInspector {
@@ -130,23 +143,24 @@ public Collection<String> filesUnderSnapshot(final Path snapshotDir) throws IOEx
130143
}
131144

132145
private SnapshotMock.SnapshotBuilder createAndTestSnapshotV1(final SnapshotFileCache cache,
133-
final String name, final boolean tmp, final boolean removeOnExit) throws IOException {
146+
final String name, final boolean tmp, final boolean removeOnExit, boolean setFolderTime)
147+
throws IOException {
134148
SnapshotMock snapshotMock = new SnapshotMock(UTIL.getConfiguration(), fs, rootDir);
135149
SnapshotMock.SnapshotBuilder builder = snapshotMock.createSnapshotV1(name, name);
136-
createAndTestSnapshot(cache, builder, tmp, removeOnExit);
150+
createAndTestSnapshot(cache, builder, tmp, removeOnExit, setFolderTime);
137151
return builder;
138152
}
139153

140154
private void createAndTestSnapshotV2(final SnapshotFileCache cache, final String name,
141-
final boolean tmp, final boolean removeOnExit) throws IOException {
155+
final boolean tmp, final boolean removeOnExit, boolean setFolderTime) throws IOException {
142156
SnapshotMock snapshotMock = new SnapshotMock(UTIL.getConfiguration(), fs, rootDir);
143157
SnapshotMock.SnapshotBuilder builder = snapshotMock.createSnapshotV2(name, name);
144-
createAndTestSnapshot(cache, builder, tmp, removeOnExit);
158+
createAndTestSnapshot(cache, builder, tmp, removeOnExit, setFolderTime);
145159
}
146160

147161
private void createAndTestSnapshot(final SnapshotFileCache cache,
148162
final SnapshotMock.SnapshotBuilder builder,
149-
final boolean tmp, final boolean removeOnExit) throws IOException {
163+
final boolean tmp, final boolean removeOnExit, boolean setFolderTime) throws IOException {
150164
List<Path> files = new ArrayList<>();
151165
for (int i = 0; i < 3; ++i) {
152166
for (Path filePath: builder.addRegion()) {
@@ -157,6 +171,10 @@ private void createAndTestSnapshot(final SnapshotFileCache cache,
157171
// Finalize the snapshot
158172
builder.commit();
159173

174+
if (setFolderTime) {
175+
fs.setTimes(snapshotDir, cache.getLastModifiedTime(), -1);
176+
}
177+
160178
// Make sure that all files are still present
161179
for (Path path: files) {
162180
assertFalse("Cache didn't find " + path, contains(getNonSnapshotFiles(cache, path), path));

0 commit comments

Comments
 (0)