From 21ff17c412d8e25432db30aa8b61983640ce8d74 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 16 Jan 2020 11:39:20 +0000 Subject: [PATCH 1/6] HADOOP-16746 mkdirs and auth mode Not yet finished: * new BulkOperationState type of mkdirs for log * no longer (accidentally) removing auth flag passed in. * ignored test now works * other tests fail because they need a way to mark a dir as non-auth Change-Id: I975931b22756bc51235868b782aff20286d55681 --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 2 +- .../fs/s3a/s3guard/BulkOperationState.java | 4 ++ .../fs/s3a/s3guard/DynamoDBMetadataStore.java | 20 +++++-- ...ynamoDBMetadataStoreAuthoritativeMode.java | 59 ++++++++++++++++--- 4 files changed, 69 insertions(+), 16 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 615be2cfbdb95..94d10b08df5c3 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -3524,7 +3524,7 @@ void finishedWrite(String key, long length, String eTag, String versionId, // information gleaned from addAncestors is preserved into the // subsequent put. stateToClose = S3Guard.initiateBulkWrite(metadataStore, - BulkOperationState.OperationType.Put, + BulkOperationState.OperationType.Mkdir, keyToPath(key)); activeState = stateToClose; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/BulkOperationState.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/BulkOperationState.java index db9abce265e3c..fcb3dce4d0b45 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/BulkOperationState.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/BulkOperationState.java @@ -98,5 +98,9 @@ public enum OperationType { * Listing update. */ Listing, + /** + * Mkdir operation. + */ + Mkdir, } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java index 1ea82dfae373e..1e59679f27da2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java @@ -899,19 +899,18 @@ private Collection completeAncestry( List sortedPaths = new ArrayList<>(pathsToCreate); sortedPaths.sort(PathOrderComparators.TOPMOST_PM_FIRST); // iterate through the paths. - for (DDBPathMetadata meta : sortedPaths) { - Preconditions.checkArgument(meta != null); - Path path = meta.getFileStatus().getPath(); + for (DDBPathMetadata entry : sortedPaths) { + Preconditions.checkArgument(entry != null); + Path path = entry.getFileStatus().getPath(); LOG.debug("Adding entry {}", path); if (path.isRoot()) { // this is a root entry: do not add it. break; } - // create the new entry - DDBPathMetadata entry = new DDBPathMetadata(meta); // add it to the ancestor state, failing if it is already there and // of a different type. DDBPathMetadata oldEntry = ancestorState.put(path, entry); + boolean addAncestors = true; if (oldEntry != null) { if (!oldEntry.getFileStatus().isDirectory() || !entry.getFileStatus().isDirectory()) { @@ -928,12 +927,18 @@ private Collection completeAncestry( // a directory is already present. Log and continue. LOG.debug("Directory at {} being updated with value {}", path, entry); + // and we skip the the subsequent parent scan as we've already been + // here + addAncestors = false; } } // add the entry to the ancestry map as an explicitly requested entry. ancestry.put(path, Pair.of(EntryOrigin.Requested, entry)); + // now scan up the ancestor tree to see if there are any + // immediately missing entries. Path parent = path.getParent(); - while (!parent.isRoot() && !ancestry.containsKey(parent)) { + while (addAncestors + && !parent.isRoot() && !ancestry.containsKey(parent)) { if (!ancestorState.findEntry(parent, true)) { // there is no entry in the ancestor state. // look in the store @@ -947,6 +952,9 @@ private Collection completeAncestry( md = itemToPathMetadata(item, username); LOG.debug("Found existing entry for parent: {}", md); newEntry = Pair.of(EntryOrigin.Retrieved, md); + // and we break, assuming that if there is an entry, its parents + // are valid too. + addAncestors = false; } else { // A directory entry was not found in the DB. Create one. LOG.debug("auto-create ancestor path {} for child path {}", diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java index 126176fbfd20c..5bedd43709768 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java @@ -148,6 +148,8 @@ public class ITestDynamoDBMetadataStoreAuthoritativeMode */ private final List toolsToClose = new ArrayList<>(); + private DynamoDBMetadataStore metastore; + /** * After all tests have run, close the filesystems. */ @@ -205,9 +207,10 @@ public void setup() throws Exception { S3_METADATA_STORE_IMPL); unguardedFS = (S3AFileSystem) FileSystem.newInstance(uri, unguardedConf); } + metastore = (DynamoDBMetadataStore) authFS.getMetadataStore(); auditor = new AuthoritativeAuditOperation( authFS.createStoreContext(), - (DynamoDBMetadataStore) authFS.getMetadataStore(), + metastore, true, true); @@ -272,7 +275,7 @@ public void testEmptyDirMarkerIsAuth() { } @Test - @Ignore("HADOOP-16697. Needs mkdir to be authoritative") +// @Ignore("HADOOP-16697. Needs mkdir to be authoritative") public void testMkDirAuth() throws Throwable { describe("create an empty dir and assert it is tagged as authoritative"); authFS.mkdirs(dir); @@ -282,10 +285,11 @@ public void testMkDirAuth() throws Throwable { @Test public void testListStatusMakesEmptyDirAuth() throws Throwable { describe("Verify listStatus marks an Empty dir as auth"); - authFS.mkdirs(dir); - expectNonauthRecursive(dir); + mkNonauthDir(dir); + // initial dir is non-auth + expectNonauthNonRecursive(dir); authFS.listStatus(dir); - // dir is auth; subdir is not + // dir is auth; expectAuthRecursive(dir); // Next list will not go to s3 assertListDoesNotUpdateAuth(dir); @@ -298,13 +302,15 @@ public void testListStatusMakesDirAuth() throws Throwable { mkAuthDir(dir); expectAuthRecursive(dir); + // create subdir as auth; parent dir entry is created on demand + // and marked as non-auth authFS.mkdirs(subdir); // dir is auth; subdir is not - expectAuthNonRecursive(dir); + expectAuthNonRecursive(subdir); expectNonauthRecursive(dir); - assertListDoesNotUpdateAuth(dir); + assertListDoesNotUpdateAuth(subdir); // Subdir list makes it auth - assertListUpdatesAuth(subdir); + assertListUpdatesAuth(dir); } @Test @@ -695,9 +701,31 @@ private void assertListDoesNotUpdateAuth(Path path) throws Exception { */ private void mkAuthDir(Path path) throws IOException { authFS.mkdirs(path); - authFS.listStatus(path); } + /** + * Create a non-auth directory, by creating (then deleting) a subdir. + * @param path dir + */ + private void mkNonauthDir(Path path) throws IOException { + authFS.mkdirs(dir); + // overwrite + S3Guard.putWithTtl(metastore, + nonAuthEmptyDirectoryMarker((S3AFileStatus) authFS.getFileStatus(dir)), + null, null ); + } + + /** + * Create an empty dir marker which, when passed to the + * DDB metastore, is considered authoritative. + * @param status file status + * @return path metadata. + */ + private PathMetadata nonAuthEmptyDirectoryMarker( + final S3AFileStatus status) { + return new DDBPathMetadata(status, Tristate.TRUE, + false, true, 0); + } /** * Performed a recursive audit of the directory * -require everything to be authoritative. @@ -729,4 +757,17 @@ private Path expectNonauthRecursive(Path path) throws Exception { .getPath(); } + /** + * Performed a recursive audit of the directory + * -expect a failure. + * @param path directory + * @return the path returned by the exception + */ + private Path expectNonauthNonRecursive(Path path) throws Exception { + return intercept( + AuthoritativeAuditOperation.NonAuthoritativeDirException.class, + () -> auditor.executeAudit(path, true, true)) + .getPath(); + } + } From 5866f7f5e0d759e105ed2d803210fb22e30a9248 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 16 Jan 2020 17:36:37 +0000 Subject: [PATCH 2/6] HADOOP-16746 mkdirs and auth mode More testing on newly discovered issue: listStatus marks a child dir as unauth, even if it was auth Change-Id: I7d8ab25f5e73e9ea4767e8456f7e0afc92dec28c --- .../apache/hadoop/fs/s3a/s3guard/S3Guard.java | 2 +- ...ynamoDBMetadataStoreAuthoritativeMode.java | 42 ++++++++++++++----- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java index 6a1da50556d28..fcbd765483edf 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java @@ -316,7 +316,7 @@ public static FileStatus[] dirListingUnion(MetadataStore ms, Path path, if (!isAuthoritative){ FileStatus status = dirMetaMap.get(s.getPath()); - if (status != null + if (status != null && !s.isDirectory() && s.getModificationTime() > status.getModificationTime()) { LOG.debug("Update ms with newer metadata of: {}", status); S3Guard.putWithTtl(ms, pathMetadata, timeProvider, operationState); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java index 5bedd43709768..b049d3295c49f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java @@ -275,7 +275,6 @@ public void testEmptyDirMarkerIsAuth() { } @Test -// @Ignore("HADOOP-16697. Needs mkdir to be authoritative") public void testMkDirAuth() throws Throwable { describe("create an empty dir and assert it is tagged as authoritative"); authFS.mkdirs(dir); @@ -302,13 +301,27 @@ public void testListStatusMakesDirAuth() throws Throwable { mkAuthDir(dir); expectAuthRecursive(dir); - // create subdir as auth; parent dir entry is created on demand - // and marked as non-auth authFS.mkdirs(subdir); - // dir is auth; subdir is not - expectAuthNonRecursive(subdir); - expectNonauthRecursive(dir); - assertListDoesNotUpdateAuth(subdir); + // dir and subdirs are auth + expectAuthRecursive(dir); + expectAuthRecursive(subdir); + // now mark the dir as nonauth + markDirNonauth(dir); + expectNonauthNonRecursive(dir); + expectAuthRecursive(subdir); + + // look at the MD & make sure that the dir and subdir are auth + final DirListingMetadata listing = metastore.listChildren(dir); + Assertions.assertThat(listing) + .describedAs("metadata of %s", dir) + .matches(d -> !d.isAuthoritative(), "is not auth"); + Assertions.assertThat(listing.getListing()) + .describedAs("listing of %s", dir) + .hasSize(1) + .allMatch(md -> ((DDBPathMetadata) md).isAuthoritativeDir(), + "is auth"); + + // Subdir list makes it auth assertListUpdatesAuth(dir); } @@ -708,10 +721,19 @@ private void mkAuthDir(Path path) throws IOException { * @param path dir */ private void mkNonauthDir(Path path) throws IOException { - authFS.mkdirs(dir); + authFS.mkdirs(path); // overwrite + markDirNonauth(path); + } + + /** + * Mark a directory as nonauth. + * @param path path to the directory + * @throws IOException failure + */ + private void markDirNonauth(final Path path) throws IOException { S3Guard.putWithTtl(metastore, - nonAuthEmptyDirectoryMarker((S3AFileStatus) authFS.getFileStatus(dir)), + nonAuthEmptyDirectoryMarker((S3AFileStatus) authFS.getFileStatus(path)), null, null ); } @@ -724,7 +746,7 @@ private void mkNonauthDir(Path path) throws IOException { private PathMetadata nonAuthEmptyDirectoryMarker( final S3AFileStatus status) { return new DDBPathMetadata(status, Tristate.TRUE, - false, true, 0); + false, false, 0); } /** * Performed a recursive audit of the directory From d02fd85bb56747a4e06d823ed3c3d07216a3a2c9 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Fri, 17 Jan 2020 17:48:44 +0000 Subject: [PATCH 3/6] HADOOP-16746. Authoritative Directories This fixes the problem wherein a listStatus of a parent dir would stamp on the isAuthoritative flag of every child entry. This was caused by us blindly overriding entries with new ones. This patch * avoids writing unchanged entries by building a list of those and passing it down to the meta store in the put(DirListMeta) call. * in DDB: filter out of those entries. * in non-auth mode, build up a list of entries to add, and write in a batch at the end. This is more efficient than the one-by-one operation which was being performed, especially as there is no BulkOperationState to cache previous work. * adds tests to verify no DDB writes take place on repeated lists * fixes up all calls of put() to handle the new list of unchanged entries * Fixes a bug in TestS3Guard which caused NPEs The code in S3Guard.dirListingUnion() is a bit ugly as it has two very different sequences (auth vs nonauth) intermingled. We should consider splitting up the two union mechanisms, so as to make it easier to understand how the different update/unions work. Change-Id: If5f2427f49a46cb7827f1634b6528ddd0e265778 --- .../fs/s3a/s3guard/DynamoDBMetadataStore.java | 13 ++- .../fs/s3a/s3guard/LocalMetadataStore.java | 4 +- .../hadoop/fs/s3a/s3guard/MetadataStore.java | 9 ++ .../fs/s3a/s3guard/NullMetadataStore.java | 2 + .../apache/hadoop/fs/s3a/s3guard/S3Guard.java | 95 ++++++++++++++----- .../s3a/impl/TestPartialDeleteFailures.java | 1 + .../s3guard/ITestDynamoDBMetadataStore.java | 14 ++- ...ynamoDBMetadataStoreAuthoritativeMode.java | 41 +++++++- .../ITestDynamoDBMetadataStoreScale.java | 3 +- .../fs/s3a/s3guard/ITestS3GuardFsck.java | 5 +- .../fs/s3a/s3guard/MetadataStoreTestBase.java | 13 ++- .../hadoop/fs/s3a/s3guard/TestS3Guard.java | 7 +- 12 files changed, 160 insertions(+), 47 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java index 1e59679f27da2..f08e4647f921c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java @@ -202,6 +202,7 @@ * same region. The region may also be set explicitly by setting the config * parameter {@code fs.s3a.s3guard.ddb.region} to the corresponding region. */ +@SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter") @InterfaceAudience.Private @InterfaceStability.Evolving public class DynamoDBMetadataStore implements MetadataStore, @@ -1447,6 +1448,7 @@ static S3AFileStatus makeDirStatus(Path f, String owner) { * {@link #processBatchWriteRequest(DynamoDBMetadataStore.AncestorState, PrimaryKey[], Item[])} * is only tried once. * @param meta Directory listing metadata. + * @param unchangedEntries unchanged child entry paths * @param operationState operational state for a bulk update * @throws IOException IO problem */ @@ -1454,6 +1456,7 @@ static S3AFileStatus makeDirStatus(Path f, String owner) { @Retries.RetryTranslated public void put( final DirListingMetadata meta, + final List unchangedEntries, @Nullable final BulkOperationState operationState) throws IOException { LOG.debug("Saving {} dir meta for {} to table {} in region {}: {}", meta.isAuthoritative() ? "auth" : "nonauth", @@ -1471,8 +1474,14 @@ public void put( final List metasToPut = fullPathsToPut(ddbPathMeta, ancestorState); - // next add all children of the directory - metasToPut.addAll(pathMetaToDDBPathMeta(meta.getListing())); + // next add all changed children of the directory + // ones that came from the previous listing are left as-is + final Collection children = meta.getListing() + .stream() + .filter(e -> !unchangedEntries.contains(e.getFileStatus().getPath())) + .collect(Collectors.toList()); + + metasToPut.addAll(pathMetaToDDBPathMeta(children)); // sort so highest-level entries are written to the store first. // if a sequence fails, no orphan entries will have been written. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java index e22253157f0fd..bd0b7200cb354 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java @@ -45,6 +45,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.LinkedList; +import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; @@ -341,13 +342,14 @@ public void put(PathMetadata meta, @Override public synchronized void put(DirListingMetadata meta, + final List unchangedEntries, final BulkOperationState operationState) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug("put dirMeta {}", meta.prettyPrint()); } LocalMetadataEntry entry = localCache.getIfPresent(standardize(meta.getPath())); - if(entry == null){ + if (entry == null) { localCache.put(standardize(meta.getPath()), new LocalMetadataEntry(meta)); } else { entry.setDirListingMetadata(meta); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java index 2ee27eb6f7f21..2f71884b9a36c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java @@ -22,6 +22,7 @@ import java.io.Closeable; import java.io.IOException; import java.util.Collection; +import java.util.List; import java.util.Map; import com.google.common.annotations.VisibleForTesting; @@ -265,11 +266,19 @@ void put(Collection metas, * missing metadata updates (create, delete) made to the same path by * another process. * + * To optimize updates and avoid overwriting existing entries which + * may contain extra data, entries in the list of unchangedEntries may + * be excluded. That is: the listing metadata has the full list of + * what it believes are children, but implementations can opt to ignore + * some. * @param meta Directory listing metadata. + * @param unchangedEntries list of entries in the dir listing which have + * not changed since the directory was list scanned on s3guard. * @param operationState operational state for a bulk update * @throws IOException if there is an error */ void put(DirListingMetadata meta, + final List unchangedEntries, @Nullable BulkOperationState operationState) throws IOException; /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java index 8002968fa37f0..666c233575ad6 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java @@ -30,6 +30,7 @@ import java.io.IOException; import java.util.Collection; import java.util.HashMap; +import java.util.List; import java.util.Map; /** @@ -113,6 +114,7 @@ public void put(Collection meta, @Override public void put(DirListingMetadata meta, + final List unchangedEntries, final BulkOperationState operationState) throws IOException { } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java index fcbd765483edf..a0572517b2dae 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java @@ -23,6 +23,7 @@ import java.net.URI; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.Locale; import java.util.Map; @@ -299,29 +300,68 @@ public static FileStatus[] dirListingUnion(MetadataStore ms, Path path, // Since the authoritative case is already handled outside this function, // we will basically start with the set of directory entries in the // DirListingMetadata, and add any that only exist in the backingStatuses. + // + // We try to avoid writing any more child entries than need be to :- + // (a) save time and money. + // (b) avoid overwriting the authoritative bit of children (HADOOP-16746). + // For auth mode updates, we supply the full listing and a list of which + // child entries have not been changed; the store gets to optimize its + // update however it chooses. + // + // for non-auth-mode S3Guard, we just build a list of entries to add and + // submit them in a batch; this is more efficient than trickling out the + // updates one-by-one. + + // track all unchanged entries; used so the metastore can identify entries + // it doesn't need to update + List unchangedEntries = new ArrayList<>(dirMeta.getListing().size()); + List nonAuthEntriesToAdd = new ArrayList<>(backingStatuses.size()); boolean changed = false; - final Map dirMetaMap = dirMeta.getListing().stream() - .collect(Collectors.toMap( - pm -> pm.getFileStatus().getPath(), PathMetadata::getFileStatus) - ); + final Map dirMetaMap = dirMeta.getListing().stream() + .collect(Collectors.toMap(pm -> pm.getFileStatus().getPath(), pm -> pm)); BulkOperationState operationState = ms.initiateBulkWrite( BulkOperationState.OperationType.Listing, path); for (S3AFileStatus s : backingStatuses) { - if (deleted.contains(s.getPath())) { + final Path statusPath = s.getPath(); + if (deleted.contains(statusPath)) { continue; } - final PathMetadata pathMetadata = new PathMetadata(s); - - if (!isAuthoritative){ - FileStatus status = dirMetaMap.get(s.getPath()); - if (status != null && !s.isDirectory() - && s.getModificationTime() > status.getModificationTime()) { - LOG.debug("Update ms with newer metadata of: {}", status); - S3Guard.putWithTtl(ms, pathMetadata, timeProvider, operationState); + final PathMetadata originalMD = dirMetaMap.get(statusPath); + + // this is built up to be whatever entry is added to the dirMeta + // collection + PathMetadata pathMetadata = originalMD; + + if (!isAuthoritative) { + // in non-auth listings, we compare the file status of the metastore + // list with those in the FS, and overwrite the MS entry if + // either of two conditions are met + // - there is no entry in the metadata. + // - the FS entry for a file (not a dir) is newer. + FileStatus status = originalMD != null + ? originalMD.getFileStatus() + : null; + if (status == null + ||(s.getModificationTime() > status.getModificationTime() + && !(s.isDirectory() && status.isDirectory()))) { + LOG.debug("Update ms with newer metadata of: {}", s); + // ensure it gets into the dirListing + pathMetadata = new PathMetadata(s); + // add to the list of entries to add later, + nonAuthEntriesToAdd.add(pathMetadata); } } + if (pathMetadata == null) { + // there's no entry in the listing already + pathMetadata = new PathMetadata(s); + } + // use an object reference equality test + if (pathMetadata == originalMD) { + // no change -add the path to the list of unchangedEntries + unchangedEntries.add(statusPath); + } // Minor race condition here. Multiple threads could add to this // mutable DirListingMetadata. Since it is backed by a @@ -338,15 +378,19 @@ public static FileStatus[] dirListingUnion(MetadataStore ms, Path path, // directory metadata should be updated. Treat it as a change. changed = changed || (!dirMeta.isAuthoritative() && isAuthoritative); - if (changed && isAuthoritative) { - LOG.debug("Marking the directory {} as authoritative", path); - final MetastoreInstrumentation instrumentation - = ms.getInstrumentation(); - if (instrumentation != null) { - instrumentation.directoryMarkedAuthoritative(); + if (changed) { + if (isAuthoritative) { + // in an authoritative update, we pass in the full list of entries, + // but do declare which have not changed. + LOG.debug("Marking the directory {} as authoritative", path); + ms.getInstrumentation().directoryMarkedAuthoritative(); + dirMeta.setAuthoritative(true); // This is the full directory contents + // write the updated dir entry and any changed children. + S3Guard.putWithTtl(ms, dirMeta, unchangedEntries, timeProvider, operationState); + } else { + // non-auth, just push out the updated entry list + putWithTtl(ms, nonAuthEntriesToAdd, timeProvider, operationState); } - dirMeta.setAuthoritative(true); // This is the full directory contents - S3Guard.putWithTtl(ms, dirMeta, timeProvider, operationState); } IOUtils.cleanupWithLogger(LOG, operationState); @@ -433,7 +477,7 @@ public static void makeDirsOrdered(MetadataStore ms, List dirs, children.add(new PathMetadata(prevStatus)); } dirMeta = new DirListingMetadata(f, children, authoritative); - S3Guard.putWithTtl(ms, dirMeta, timeProvider, null); + S3Guard.putWithTtl(ms, dirMeta, Collections.emptyList(), timeProvider, null); } pathMetas.add(new PathMetadata(status)); @@ -662,10 +706,13 @@ public String toString() { * directory and its children. * @param ms metastore * @param dirMeta directory + * @param unchangedEntries list of unchanged entries from the listing * @param timeProvider nullable time provider * @throws IOException failure. */ - public static void putWithTtl(MetadataStore ms, DirListingMetadata dirMeta, + public static void putWithTtl(MetadataStore ms, + DirListingMetadata dirMeta, + final List unchangedEntries, final ITtlTimeProvider timeProvider, @Nullable final BulkOperationState operationState) throws IOException { @@ -673,7 +720,7 @@ public static void putWithTtl(MetadataStore ms, DirListingMetadata dirMeta, dirMeta.setLastUpdated(now); dirMeta.getListing() .forEach(pm -> pm.setLastUpdated(now)); - ms.put(dirMeta, operationState); + ms.put(dirMeta, unchangedEntries, operationState); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestPartialDeleteFailures.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestPartialDeleteFailures.java index 5fd1d528ba8dd..244d2eed324c7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestPartialDeleteFailures.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestPartialDeleteFailures.java @@ -310,6 +310,7 @@ public void put(final Collection metas, @Override public void put(final DirListingMetadata meta, + final List unchangedEntries, final BulkOperationState operationState) { created.add(meta.getPath()); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java index 89c401b787578..7def997b17661 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java @@ -28,6 +28,7 @@ import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -132,6 +133,8 @@ public ITestDynamoDBMetadataStore() { private static String testDynamoDBTableName; + private static final List UNCHANGED_ENTRIES = Collections.emptyList(); + /** * Create a path under the test path provided by * the FS contract. @@ -575,7 +578,8 @@ private void doTestBatchWrite(int numDelete, int numPut, Collection pathsToDelete = null; if (oldMetas != null) { // put all metadata of old paths and verify - ms.put(new DirListingMetadata(oldDir, oldMetas, false), putState); + ms.put(new DirListingMetadata(oldDir, oldMetas, false), UNCHANGED_ENTRIES, + putState); assertEquals("Child count", 0, ms.listChildren(newDir).withoutTombstones().numEntries()); Assertions.assertThat(ms.listChildren(oldDir).getListing()) @@ -942,13 +946,13 @@ public void testAncestorOverwriteConflict() throws Throwable { grandchildPath, new ArrayList<>(), false); intercept(PathIOException.class, E_INCONSISTENT_UPDATE, - () -> ddbms.put(grandchildListing, bulkWrite)); + () -> ddbms.put(grandchildListing, UNCHANGED_ENTRIES, bulkWrite)); // but a directory update under another path is fine DirListingMetadata grandchild2Listing = new DirListingMetadata( grandchild2Path, new ArrayList<>(), false); - ddbms.put(grandchild2Listing, bulkWrite); + ddbms.put(grandchild2Listing, UNCHANGED_ENTRIES, bulkWrite); // and it creates a new entry for its parent verifyInAncestor(bulkWrite, child2, true); } @@ -1079,7 +1083,7 @@ private void testGetEmptyDirFlagCanSetTrueOrUnknown(boolean auth) assertEquals(auth, dlm.isAuthoritative()); // Test with non-authoritative listing, empty dir - ms.put(dlm, null); + ms.put(dlm, UNCHANGED_ENTRIES, null); final PathMetadata pmdResultEmpty = ms.get(dirToPut, true); if(auth){ assertEquals(Tristate.TRUE, pmdResultEmpty.isEmptyDirectory()); @@ -1089,7 +1093,7 @@ private void testGetEmptyDirFlagCanSetTrueOrUnknown(boolean auth) // Test with non-authoritative listing, non-empty dir dlm.put(new PathMetadata(basicFileStatus(fileToPut, 1, false))); - ms.put(dlm, null); + ms.put(dlm, UNCHANGED_ENTRIES, null); final PathMetadata pmdResultNotEmpty = ms.get(dirToPut, true); assertEquals(Tristate.FALSE, pmdResultNotEmpty.isEmptyDirectory()); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java index b049d3295c49f..2dacf93302249 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java @@ -56,6 +56,7 @@ import static org.apache.hadoop.fs.s3a.S3AUtils.applyLocatedFiles; import static org.apache.hadoop.fs.s3a.Statistic.OBJECT_LIST_REQUESTS; import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_AUTHORITATIVE_DIRECTORIES_UPDATED; +import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_RECORD_WRITES; import static org.apache.hadoop.fs.s3a.s3guard.AuthoritativeAuditOperation.ERROR_PATH_NOT_AUTH_IN_FS; import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.authoritativeEmptyDirectoryMarker; import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Authoritative.CHECK_FLAG; @@ -322,8 +323,18 @@ public void testListStatusMakesDirAuth() throws Throwable { "is auth"); - // Subdir list makes it auth + // directory list makes the dir auth and leaves the child auth assertListUpdatesAuth(dir); + + // and afterwards, a followup list does not write anything to DDB + // (as the dir is auth, its not going to go near the FS to update...) + expectOperationUpdatesDDB(0, () -> authFS.listStatus(dir)); + // mark the dir nonauth again + markDirNonauth(dir); + // and only one record is written to DDB, the dir marker as auth + // the subdir is not overwritten + expectOperationUpdatesDDB(1, () -> authFS.listStatus(dir)); + } @Test @@ -557,7 +568,7 @@ private String f(String flag) { @Test public void testAuditS3GuardTool() throws Throwable { describe("Test the s3guard audit CLI"); - authFS.mkdirs(methodAuthPath); + mkNonauthDir(methodAuthPath); final String path = methodAuthPath.toString(); // this is non-auth, so the scan is rejected expectExecResult(EXIT_NOT_ACCEPTABLE, @@ -664,7 +675,8 @@ protected void touchFile(final Path file) throws IOException { } /** - * Invoke an operation expecting the meta store to be updated{@code updates} + * Invoke an operation expecting the meta store to have its + * directoryMarkedAuthoritative count to be be updated {@code updates} * times and S3 LIST requests made {@code lists} times. * @param Return type * @param updates Expected count @@ -688,6 +700,25 @@ private T expectAuthoritativeUpdate( return call; } + /** + * Invoke an operation expecting {@code writes} records written to DDB. + * @param Return type + * @param writes Expected count + * @param fn Function to invoke + * @return Result of the function call + * @throws Exception Failure + */ + private T expectOperationUpdatesDDB( + int writes, + Callable fn) + throws Exception { + S3ATestUtils.MetricDiff writeDiff = new S3ATestUtils.MetricDiff(authFS, + S3GUARD_METADATASTORE_RECORD_WRITES); + final T call = fn.call(); + writeDiff.assertDiffEquals(writes); + return call; + } + /** * Assert that a listStatus call increments the * "s3guard_metadatastore_authoritative_directories_updated" counter. @@ -722,7 +753,7 @@ private void mkAuthDir(Path path) throws IOException { */ private void mkNonauthDir(Path path) throws IOException { authFS.mkdirs(path); - // overwrite + // overwrite entry with a nonauth one markDirNonauth(path); } @@ -734,7 +765,7 @@ private void mkNonauthDir(Path path) throws IOException { private void markDirNonauth(final Path path) throws IOException { S3Guard.putWithTtl(metastore, nonAuthEmptyDirectoryMarker((S3AFileStatus) authFS.getFileStatus(path)), - null, null ); + null, null); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreScale.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreScale.java index 9614838c2b61d..f2f37f21ea5a7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreScale.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreScale.java @@ -21,6 +21,7 @@ import javax.annotation.Nullable; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; @@ -377,7 +378,7 @@ public void test_070_putDirMarker() throws Throwable { execute("list", OPERATIONS_PER_THREAD, expectThrottling(), - () -> ddbms.put(children, state)); + () -> ddbms.put(children, Collections.emptyList(), state)); } finally { retryingDelete(path); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardFsck.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardFsck.java index a6e7c668f98b1..46bc30ddf6634 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardFsck.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardFsck.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.net.URI; +import java.util.Collections; import java.util.List; import java.util.UUID; @@ -271,8 +272,8 @@ public void testIAuthoritativeDirectoryContentMismatch() throws Exception { final DirListingMetadata dlmIc = metadataStore.listChildren(cwdIncorrect); dlmC.setAuthoritative(true); dlmIc.setAuthoritative(true); - metadataStore.put(dlmC, null); - metadataStore.put(dlmIc, null); + metadataStore.put(dlmC, Collections.emptyList(), null); + metadataStore.put(dlmIc, Collections.emptyList(), null); // add a file raw so the listing will be different. touchRawAndWaitRaw(fileIc2); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java index 5c955e555b4bf..d141e35e10f4a 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -74,6 +75,8 @@ public abstract class MetadataStoreTestBase extends HadoopTestBase { private final long accessTime = 0; private static ITtlTimeProvider ttlTimeProvider; + private static final List UNCHANGED_ENTRIES = Collections.emptyList(); + /** * Each test should override this. Will use a new Configuration instance. * @return Contract which specifies the MetadataStore under test plus config. @@ -558,7 +561,7 @@ public void testListChildrenAuthoritative() throws IOException { dirMeta.setAuthoritative(true); dirMeta.put(new PathMetadata( makeFileStatus("/a1/b1/file_new", 100))); - ms.put(dirMeta, null); + ms.put(dirMeta, UNCHANGED_ENTRIES, null); dirMeta = ms.listChildren(strToPath("/a1/b1")); assertListingsEqual(dirMeta.getListing(), "/a1/b1/file1", "/a1/b1/file2", @@ -759,7 +762,7 @@ public void testPruneUnsetsAuthoritative() throws Exception { if (!allowMissing()) { DirListingMetadata parentDirMd = ms.listChildren(strToPath(parentDir)); parentDirMd.setAuthoritative(true); - ms.put(parentDirMd, null); + ms.put(parentDirMd, UNCHANGED_ENTRIES, null); } ms.prune(MetadataStore.PruneMode.ALL_BY_MODTIME, time); @@ -798,7 +801,7 @@ public void testPrunePreservesAuthoritative() throws Exception { // set parent dir as authoritative DirListingMetadata parentDirMd = ms.listChildren(strToPath(parentDir)); parentDirMd.setAuthoritative(true); - ms.put(parentDirMd, null); + ms.put(parentDirMd, UNCHANGED_ENTRIES, null); // prune the ms ms.prune(MetadataStore.PruneMode.ALL_BY_MODTIME, time); @@ -830,7 +833,7 @@ public void testPutDirListingMetadataPutsFileMetadata() } DirListingMetadata dirMeta = new DirListingMetadata(strToPath(dirPath), metas, authoritative); - ms.put(dirMeta, null); + ms.put(dirMeta, UNCHANGED_ENTRIES, null); if (!allowMissing()) { assertDirectorySize(dirPath, filenames.length); @@ -1011,7 +1014,7 @@ protected void putListStatusFiles(String dirPath, boolean authoritative, } DirListingMetadata dirMeta = new DirListingMetadata(strToPath(dirPath), metas, authoritative); - ms.put(dirMeta, null); + ms.put(dirMeta, UNCHANGED_ENTRIES, null); } protected void createNewDirs(String... dirs) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3Guard.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3Guard.java index 116fd772dc0ae..2190c0c66288c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3Guard.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3Guard.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.Locale; import java.util.UUID; @@ -57,9 +58,11 @@ public class TestS3Guard extends Assert { */ @Test public void testDirListingUnion() throws Exception { + final Configuration conf = new Configuration(false); MetadataStore ms = new LocalMetadataStore(); Path dirPath = new Path("s3a://bucket/dir"); + ms.initialize(conf, new S3Guard.TtlTimeProvider(conf)); // Two files in metadata store listing PathMetadata m1 = makePathMeta("s3a://bucket/dir/ms-file1", false); @@ -96,12 +99,12 @@ public void testPutWithTtlDirListingMeta() throws Exception { when(timeProvider.getNow()).thenReturn(100L); // act - S3Guard.putWithTtl(ms, dlm, timeProvider, null); + S3Guard.putWithTtl(ms, dlm, Collections.emptyList(), timeProvider, null); // assert assertEquals("last update in " + dlm, 100L, dlm.getLastUpdated()); verify(timeProvider, times(1)).getNow(); - verify(ms, times(1)).put(dlm, null); + verify(ms, times(1)).put(dlm, Collections.emptyList(), null); } @Test From 08308b32ba96098c6a41b9b54ccdeb30ad95ad62 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Mon, 20 Jan 2020 14:50:38 +0000 Subject: [PATCH 4/6] HADOOP-16746. finishing out testing of mkdirs logic. * Out of date entries only propagate from S3 to DDB *when there's a DDB entry*. This is the current policy; there's now a constant to change behaviour and more discussion about the details. So the next person maintaining it will understand what is going on better. * Only writes the auth bit to an empty dir when the path is auth, rather than do it whenever an empty dir is recorded in S3Guard. ITestRestrictedReadAccess showed that problem; there's been a bit of tuning in that test to make it more robust to configs and previous test runs. Tune ITestDynamoDBMetadataStoreAuthoritativeMode * remove superfluous test case * more asserts about directory states during rename * enable another ignored test case Change-Id: I309376f38e9983c901a53a26f89cbf372c7a01da --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 13 ++- .../apache/hadoop/fs/s3a/s3guard/S3Guard.java | 29 +++++-- .../hadoop/fs/s3a/ITestS3GuardWriteBack.java | 13 ++- .../s3a/auth/ITestRestrictedReadAccess.java | 17 ++-- ...ynamoDBMetadataStoreAuthoritativeMode.java | 80 ++++++++----------- 5 files changed, 86 insertions(+), 66 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 94d10b08df5c3..ddb336ebda3ce 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -3533,13 +3533,20 @@ void finishedWrite(String key, long length, String eTag, String versionId, S3AFileStatus status = createUploadFileStatus(p, isDir, length, getDefaultBlockSize(p), username, eTag, versionId); - if (!isDir) { + boolean authoritative = false; + if (isDir) { + // this is a directory marker so put it as such. + status.setIsEmptyDirectory(Tristate.TRUE); + // and maybe mark as auth + authoritative = allowAuthoritative(p); + } + if (!authoritative) { + // for files and non-auth directories S3Guard.putAndReturn(metadataStore, status, ttlTimeProvider, activeState); } else { - // this is a directory marker so put it as such. - status.setIsEmptyDirectory(Tristate.TRUE); + // authoritative directory S3Guard.putAuthDirectoryMarker(metadataStore, status, ttlTimeProvider, activeState); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java index a0572517b2dae..b0febdb3e60a4 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java @@ -63,6 +63,7 @@ @InterfaceAudience.Private @InterfaceStability.Unstable public final class S3Guard { + private static final Logger LOG = LoggerFactory.getLogger(S3Guard.class); @InterfaceAudience.Private @@ -76,6 +77,17 @@ public final class S3Guard { DynamoDBClientFactory.DefaultDynamoDBClientFactory.class; private static final S3AFileStatus[] EMPTY_LISTING = new S3AFileStatus[0]; + + /** + * Hard-coded policy : {@value}. + * If true, when merging an S3 LIST with S3Guard in non-auth mode, + * only updated entries are added; new entries are left out. + * This policy choice reduces the amount of data stored in Dynamo, + * and hence the complexity of the merge in a non-auth listing. + */ + @VisibleForTesting + public static final boolean DIR_MERGE_UPDATES_ALL_RECORDS_NONAUTH = true; + // Utility class. All static functions. private S3Guard() { } @@ -204,6 +216,7 @@ public static void putAuthDirectoryMarker( final PathMetadata fileMeta = authoritativeEmptyDirectoryMarker(status); putWithTtl(ms, fileMeta, timeProvider, operationState); } finally { + ms.getInstrumentation().directoryMarkedAuthoritative(); ms.getInstrumentation().entryAdded((System.nanoTime() - startTimeNano)); } } @@ -338,14 +351,18 @@ public static FileStatus[] dirListingUnion(MetadataStore ms, Path path, // in non-auth listings, we compare the file status of the metastore // list with those in the FS, and overwrite the MS entry if // either of two conditions are met - // - there is no entry in the metadata. - // - the FS entry for a file (not a dir) is newer. - FileStatus status = originalMD != null + // - there is no entry in the metadata and + // DIR_MERGE_UPDATES_ALL_RECORDS_NONAUTH is compiled to true + // - there is an entry in the metastore the FS entry is newer. + FileStatus mdStatus = originalMD != null ? originalMD.getFileStatus() : null; - if (status == null - ||(s.getModificationTime() > status.getModificationTime() - && !(s.isDirectory() && status.isDirectory()))) { + boolean shouldUpdate = mdStatus != null + && s.getModificationTime() > mdStatus.getModificationTime(); + if (DIR_MERGE_UPDATES_ALL_RECORDS_NONAUTH && mdStatus == null) { + shouldUpdate = true; + } + if (shouldUpdate) { LOG.debug("Update ms with newer metadata of: {}", s); // ensure it gets into the dirListing pathMetadata = new PathMetadata(s); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardWriteBack.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardWriteBack.java index 4ec7f4666de6d..c5e4cbcd45929 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardWriteBack.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardWriteBack.java @@ -24,7 +24,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata; -import org.junit.Assume; +import org.apache.hadoop.fs.s3a.s3guard.S3Guard; + import org.junit.Test; import java.io.IOException; @@ -32,6 +33,7 @@ import java.util.Arrays; import static org.apache.hadoop.fs.s3a.Constants.*; +import static org.junit.Assume.assumeTrue; /** * Test cases that validate S3Guard's behavior for writing things like @@ -39,6 +41,13 @@ */ public class ITestS3GuardWriteBack extends AbstractS3ATestBase { + @Override + public void setup() throws Exception { + assumeTrue("dirListingUnion always writes back records", + !S3Guard.DIR_MERGE_UPDATES_ALL_RECORDS_NONAUTH); + super.setup(); + } + /** * In listStatus(), when S3Guard is enabled, the full listing for a * directory is "written back" to the MetadataStore before the listing is @@ -49,7 +58,7 @@ public class ITestS3GuardWriteBack extends AbstractS3ATestBase { */ @Test public void testListStatusWriteBack() throws Exception { - Assume.assumeTrue(getFileSystem().hasMetadataStore()); + assumeTrue(getFileSystem().hasMetadataStore()); Path directory = path("ListStatusWriteBack"); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java index 050bd6e89f16d..a8e7a57057605 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestRestrictedReadAccess.java @@ -53,13 +53,13 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; import static org.apache.hadoop.fs.s3a.Constants.ASSUMED_ROLE_ARN; +import static org.apache.hadoop.fs.s3a.Constants.AUTHORITATIVE_PATH; import static org.apache.hadoop.fs.s3a.Constants.METADATASTORE_AUTHORITATIVE; import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL; import static org.apache.hadoop.fs.s3a.S3ATestUtils.assume; import static org.apache.hadoop.fs.s3a.S3ATestUtils.assumeS3GuardState; import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching; import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName; -import static org.apache.hadoop.fs.s3a.S3ATestUtils.isS3GuardTestPropertySet; import static org.apache.hadoop.fs.s3a.S3ATestUtils.lsR; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBucketOverrides; @@ -221,13 +221,10 @@ public ITestRestrictedReadAccess( public Configuration createConfiguration() { Configuration conf = super.createConfiguration(); String bucketName = getTestBucketName(conf); - // is s3guard enabled? - boolean guardedTestRun = isS3GuardTestPropertySet(conf); - // in a guarded test run, except for the special case of raw, - // all DDB settings are left alone. removeBaseAndBucketOverrides(bucketName, conf, - METADATASTORE_AUTHORITATIVE); + METADATASTORE_AUTHORITATIVE, + AUTHORITATIVE_PATH); removeBucketOverrides(bucketName, conf, S3_METADATA_STORE_IMPL); if (!s3guard) { @@ -317,8 +314,10 @@ public void initNoReadAccess() throws Throwable { verifyS3GuardSettings(realFS, "real filesystem"); // avoiding the parameterization to steer clear of accidentally creating - // patterns - basePath = path("testNoReadAccess-" + name); + // patterns; a timestamp is used to ensure tombstones from previous runs + // do not interfere + basePath = path("testNoReadAccess-" + name + + "-" + System.currentTimeMillis() / 1000); // define the paths and create them. describe("Creating test directories and files"); @@ -628,7 +627,7 @@ public void checkLocatedFileStatusNonexistentPath() throws Throwable { * Do some cleanup to see what happens with delete calls. * Cleanup happens in test teardown anyway; doing it here * just makes use of the delete calls to see how delete failures - * change with permissions and S3Guard stettings. + * change with permissions and S3Guard settings. */ public void checkDeleteOperations() throws Throwable { describe("Testing delete operations"); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java index 2dacf93302249..1d53494666228 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java @@ -55,8 +55,7 @@ import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; import static org.apache.hadoop.fs.s3a.S3AUtils.applyLocatedFiles; import static org.apache.hadoop.fs.s3a.Statistic.OBJECT_LIST_REQUESTS; -import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_AUTHORITATIVE_DIRECTORIES_UPDATED; -import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_RECORD_WRITES; +import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_AUTHORITATIVE_DIRECTORIES_UPDATED;import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_RECORD_WRITES; import static org.apache.hadoop.fs.s3a.s3guard.AuthoritativeAuditOperation.ERROR_PATH_NOT_AUTH_IN_FS; import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.authoritativeEmptyDirectoryMarker; import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Authoritative.CHECK_FLAG; @@ -140,8 +139,14 @@ public class ITestDynamoDBMetadataStoreAuthoritativeMode */ private AuthoritativeAuditOperation auditor; + /** + * Path {@code $methodAuthPath/dir}. + */ private Path dir; + /** + * Path {@code $methodAuthPath/dir/file}. + */ private Path dirFile; /** @@ -149,6 +154,9 @@ public class ITestDynamoDBMetadataStoreAuthoritativeMode */ private final List toolsToClose = new ArrayList<>(); + /** + * The metastore of the auth filesystem. + */ private DynamoDBMetadataStore metastore; /** @@ -322,7 +330,6 @@ public void testListStatusMakesDirAuth() throws Throwable { .allMatch(md -> ((DDBPathMetadata) md).isAuthoritativeDir(), "is auth"); - // directory list makes the dir auth and leaves the child auth assertListUpdatesAuth(dir); @@ -334,7 +341,6 @@ public void testListStatusMakesDirAuth() throws Throwable { // and only one record is written to DDB, the dir marker as auth // the subdir is not overwritten expectOperationUpdatesDDB(1, () -> authFS.listStatus(dir)); - } @Test @@ -352,7 +358,6 @@ public void testAddFileMarksNonAuth() throws Throwable { * marker is added. This must be auth. */ @Test - @Ignore("HADOOP-16697. Needs mkdir to be authoritative") public void testDeleteSingleFileLeavesMarkersAlone() throws Throwable { describe("Deleting a file with no peers makes no changes to ancestors"); mkAuthDir(methodAuthPath); @@ -375,6 +380,16 @@ public void testDeleteMultipleFileLeavesMarkersAlone() throws Throwable { expectAuthRecursive(methodAuthPath); } + @Test + public void testDeleteEmptyDirLeavesParentAuth() throws Throwable { + describe("Deleting a directory retains the auth status " + + "of the parent directory"); + mkAuthDir(dir); + mkAuthDir(dirFile); + expectAuthRecursive(dir); + authFS.delete(dirFile, false); + expectAuthRecursive(dir); + } /** * Assert the number of pruned files matches expectations. @@ -447,50 +462,23 @@ public void testRenameFile() throws Throwable { @Test public void testRenameDirMarksDestAsAuth() throws Throwable { describe("renaming a dir must mark dest tree as auth"); - final Path d = methodAuthPath; - final Path source = new Path(d, "source"); - final Path dest = new Path(d, "dest"); + final Path base = methodAuthPath; + mkAuthDir(base); + final Path source = new Path(base, "source"); + final Path dest = new Path(base, "dest"); mkAuthDir(source); - Path f = new Path(source, "subdir/file"); + expectAuthRecursive(base); + Path subdir = new Path(source, "subdir"); + Path f = new Path(subdir, "file"); touchFile(f); + expectNonauthRecursive(base); + // list the source directories so everything is + // marked as auth + authFS.listStatus(source); + authFS.listStatus(subdir); + expectAuthRecursive(base); authFS.rename(source, dest); - expectNonauthRecursive(d); - expectAuthRecursive(dest); - } - - @Test - public void testRenameWithNonEmptySubDir() throws Throwable { - final Path renameTestDir = methodAuthPath; - final Path srcDir = new Path(renameTestDir, "src1"); - final Path srcSubDir = new Path(srcDir, "sub"); - final Path finalDir = new Path(renameTestDir, "dest"); - FileSystem fs = authFS; - rm(fs, renameTestDir, true, false); - - fs.mkdirs(srcDir); - fs.mkdirs(finalDir); - writeTextFile(fs, new Path(srcDir, "source.txt"), - "this is the file in src dir", false); - writeTextFile(fs, new Path(srcSubDir, "subfile.txt"), - "this is the file in src/sub dir", false); - - assertPathExists("not created in src dir", - new Path(srcDir, "source.txt")); - assertPathExists("not created in src/sub dir", - new Path(srcSubDir, "subfile.txt")); - - boolean rename = fs.rename(srcDir, finalDir); - Assertions.assertThat(rename) - .describedAs("rename(%s, %s)", srcDir, finalDir) - .isTrue(); - - // POSIX rename behavior - assertPathExists("not renamed into dest dir", - new Path(finalDir, "source.txt")); - assertPathExists("not renamed into dest/sub dir", - new Path(finalDir, "sub/subfile.txt")); - assertPathDoesNotExist("not deleted", - new Path(srcDir, "source.txt")); + expectAuthRecursive(base); } @Test From cc007255fcff2084012c398a02c0807b48c61ba4 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Mon, 20 Jan 2020 16:28:56 +0000 Subject: [PATCH 5/6] HADOOP-16746. review work test tweaks * remove unused imports * in MetaStoreTestBase name empty list EMPTY_LIST To make clear that is what it is Change-Id: If53bf1c4810d397fe1b8d3d7b4c7c9ceb1344173 --- .../ITestDynamoDBMetadataStoreAuthoritativeMode.java | 4 +--- .../hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java | 12 ++++++------ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java index 1d53494666228..59a21f6950b0f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java @@ -45,8 +45,6 @@ import org.apache.hadoop.fs.s3a.Tristate; import org.apache.hadoop.io.IOUtils; -import static org.apache.hadoop.fs.contract.ContractTestUtils.rm; -import static org.apache.hadoop.fs.contract.ContractTestUtils.writeTextFile; import static org.apache.hadoop.fs.s3a.Constants.AUTHORITATIVE_PATH; import static org.apache.hadoop.fs.s3a.Constants.METADATASTORE_AUTHORITATIVE; import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_KEY; @@ -736,7 +734,7 @@ private void mkAuthDir(Path path) throws IOException { } /** - * Create a non-auth directory, by creating (then deleting) a subdir. + * Create a non-auth directory. * @param path dir */ private void mkNonauthDir(Path path) throws IOException { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java index d141e35e10f4a..47551f3374fd8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java @@ -75,7 +75,7 @@ public abstract class MetadataStoreTestBase extends HadoopTestBase { private final long accessTime = 0; private static ITtlTimeProvider ttlTimeProvider; - private static final List UNCHANGED_ENTRIES = Collections.emptyList(); + private static final List EMPTY_LIST = Collections.emptyList(); /** * Each test should override this. Will use a new Configuration instance. @@ -561,7 +561,7 @@ public void testListChildrenAuthoritative() throws IOException { dirMeta.setAuthoritative(true); dirMeta.put(new PathMetadata( makeFileStatus("/a1/b1/file_new", 100))); - ms.put(dirMeta, UNCHANGED_ENTRIES, null); + ms.put(dirMeta, EMPTY_LIST, null); dirMeta = ms.listChildren(strToPath("/a1/b1")); assertListingsEqual(dirMeta.getListing(), "/a1/b1/file1", "/a1/b1/file2", @@ -762,7 +762,7 @@ public void testPruneUnsetsAuthoritative() throws Exception { if (!allowMissing()) { DirListingMetadata parentDirMd = ms.listChildren(strToPath(parentDir)); parentDirMd.setAuthoritative(true); - ms.put(parentDirMd, UNCHANGED_ENTRIES, null); + ms.put(parentDirMd, EMPTY_LIST, null); } ms.prune(MetadataStore.PruneMode.ALL_BY_MODTIME, time); @@ -801,7 +801,7 @@ public void testPrunePreservesAuthoritative() throws Exception { // set parent dir as authoritative DirListingMetadata parentDirMd = ms.listChildren(strToPath(parentDir)); parentDirMd.setAuthoritative(true); - ms.put(parentDirMd, UNCHANGED_ENTRIES, null); + ms.put(parentDirMd, EMPTY_LIST, null); // prune the ms ms.prune(MetadataStore.PruneMode.ALL_BY_MODTIME, time); @@ -833,7 +833,7 @@ public void testPutDirListingMetadataPutsFileMetadata() } DirListingMetadata dirMeta = new DirListingMetadata(strToPath(dirPath), metas, authoritative); - ms.put(dirMeta, UNCHANGED_ENTRIES, null); + ms.put(dirMeta, EMPTY_LIST, null); if (!allowMissing()) { assertDirectorySize(dirPath, filenames.length); @@ -1014,7 +1014,7 @@ protected void putListStatusFiles(String dirPath, boolean authoritative, } DirListingMetadata dirMeta = new DirListingMetadata(strToPath(dirPath), metas, authoritative); - ms.put(dirMeta, UNCHANGED_ENTRIES, null); + ms.put(dirMeta, EMPTY_LIST, null); } protected void createNewDirs(String... dirs) From 46d434416f6383d903fd4e000f76bbc55f8638cc Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 23 Jan 2020 19:04:06 +0000 Subject: [PATCH 6/6] HADOOP-16746: split auth mode and nonauth mode union algorithms apart Move from to intermingled dirListingUnion algorithms in the same method, the auth and nonauth merge/update operations have been split into their own methods. There is a bit of duplication -but at least now the different operations are isolated enough it's possible to understand them. TestS3Guard has been extended to help test of some of this. We can't verify that existing entries don't get overwritten, but the unit tests are at least checking both algorithms. Change-Id: I24f3787e31dc3739d0f71b800ed3732b2fd15b94 --- .../apache/hadoop/fs/s3a/s3guard/S3Guard.java | 177 ++++++++++----- ...ynamoDBMetadataStoreAuthoritativeMode.java | 3 +- .../hadoop/fs/s3a/s3guard/TestS3Guard.java | 206 ++++++++++++++++-- 3 files changed, 301 insertions(+), 85 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java index b0febdb3e60a4..56c9a249186ca 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java @@ -63,7 +63,6 @@ @InterfaceAudience.Private @InterfaceStability.Unstable public final class S3Guard { - private static final Logger LOG = LoggerFactory.getLogger(S3Guard.class); @InterfaceAudience.Private @@ -77,7 +76,6 @@ public final class S3Guard { DynamoDBClientFactory.DefaultDynamoDBClientFactory.class; private static final S3AFileStatus[] EMPTY_LISTING = new S3AFileStatus[0]; - /** * Hard-coded policy : {@value}. * If true, when merging an S3 LIST with S3Guard in non-auth mode, @@ -86,7 +84,7 @@ public final class S3Guard { * and hence the complexity of the merge in a non-auth listing. */ @VisibleForTesting - public static final boolean DIR_MERGE_UPDATES_ALL_RECORDS_NONAUTH = true; + public static final boolean DIR_MERGE_UPDATES_ALL_RECORDS_NONAUTH = false; // Utility class. All static functions. private S3Guard() { } @@ -305,8 +303,6 @@ public static FileStatus[] dirListingUnion(MetadataStore ms, Path path, false); } - Set deleted = dirMeta.listTombstones(); - // Since we treat the MetadataStore as a "fresher" or "consistent" view // of metadata, we always use its metadata first. @@ -325,57 +321,62 @@ public static FileStatus[] dirListingUnion(MetadataStore ms, Path path, // submit them in a batch; this is more efficient than trickling out the // updates one-by-one. + BulkOperationState operationState = ms.initiateBulkWrite( + BulkOperationState.OperationType.Listing, + path); + if (isAuthoritative) { + authoritativeUnion(ms, path, backingStatuses, dirMeta, + timeProvider, operationState); + } else { + nonAuthoritativeUnion(ms, path, backingStatuses, dirMeta, + timeProvider, operationState); + } + IOUtils.cleanupWithLogger(LOG, operationState); + + return dirMetaToStatuses(dirMeta); + } + + /** + * Perform the authoritative union operation. + * Here all updated/missing entries are added back; we take care + * not to overwrite unchanged entries as that will lose their + * isAuthoritative bit (HADOOP-16746). + * @param ms MetadataStore to use. + * @param path path to directory + * @param backingStatuses Directory listing from the backing store. + * @param dirMeta Directory listing from MetadataStore. May be null. + * @param timeProvider Time provider to use when updating entries + * @param operationState ongoing operation + * @throws IOException if metadata store update failed + */ + private static void authoritativeUnion( + final MetadataStore ms, + final Path path, + final List backingStatuses, + final DirListingMetadata dirMeta, + final ITtlTimeProvider timeProvider, + final BulkOperationState operationState) throws IOException { // track all unchanged entries; used so the metastore can identify entries // it doesn't need to update List unchangedEntries = new ArrayList<>(dirMeta.getListing().size()); - List nonAuthEntriesToAdd = new ArrayList<>(backingStatuses.size()); - boolean changed = false; + boolean changed = !dirMeta.isAuthoritative(); + Set deleted = dirMeta.listTombstones(); final Map dirMetaMap = dirMeta.getListing().stream() .collect(Collectors.toMap(pm -> pm.getFileStatus().getPath(), pm -> pm)); - BulkOperationState operationState = ms.initiateBulkWrite( - BulkOperationState.OperationType.Listing, - path); for (S3AFileStatus s : backingStatuses) { final Path statusPath = s.getPath(); if (deleted.contains(statusPath)) { continue; } - final PathMetadata originalMD = dirMetaMap.get(statusPath); - - // this is built up to be whatever entry is added to the dirMeta + // this is built up to be whatever entry is to be added to the dirMeta // collection - PathMetadata pathMetadata = originalMD; - - if (!isAuthoritative) { - // in non-auth listings, we compare the file status of the metastore - // list with those in the FS, and overwrite the MS entry if - // either of two conditions are met - // - there is no entry in the metadata and - // DIR_MERGE_UPDATES_ALL_RECORDS_NONAUTH is compiled to true - // - there is an entry in the metastore the FS entry is newer. - FileStatus mdStatus = originalMD != null - ? originalMD.getFileStatus() - : null; - boolean shouldUpdate = mdStatus != null - && s.getModificationTime() > mdStatus.getModificationTime(); - if (DIR_MERGE_UPDATES_ALL_RECORDS_NONAUTH && mdStatus == null) { - shouldUpdate = true; - } - if (shouldUpdate) { - LOG.debug("Update ms with newer metadata of: {}", s); - // ensure it gets into the dirListing - pathMetadata = new PathMetadata(s); - // add to the list of entries to add later, - nonAuthEntriesToAdd.add(pathMetadata); - } - } + PathMetadata pathMetadata = dirMetaMap.get(statusPath); + if (pathMetadata == null) { - // there's no entry in the listing already + // there's no entry in the listing, so create one. pathMetadata = new PathMetadata(s); - } - // use an object reference equality test - if (pathMetadata == originalMD) { + } else { // no change -add the path to the list of unchangedEntries unchangedEntries.add(statusPath); } @@ -387,31 +388,87 @@ public static FileStatus[] dirListingUnion(MetadataStore ms, Path path, // Any FileSystem has similar race conditions, but we could persist // a stale entry longer. We could expose an atomic // DirListingMetadata#putIfNotPresent() - boolean updated = dirMeta.put(pathMetadata); - changed = changed || updated; + changed |= dirMeta.put(pathMetadata); } - // If dirMeta is not authoritative, but isAuthoritative is true the - // directory metadata should be updated. Treat it as a change. - changed = changed || (!dirMeta.isAuthoritative() && isAuthoritative); - if (changed) { - if (isAuthoritative) { - // in an authoritative update, we pass in the full list of entries, - // but do declare which have not changed. - LOG.debug("Marking the directory {} as authoritative", path); - ms.getInstrumentation().directoryMarkedAuthoritative(); - dirMeta.setAuthoritative(true); // This is the full directory contents - // write the updated dir entry and any changed children. - S3Guard.putWithTtl(ms, dirMeta, unchangedEntries, timeProvider, operationState); + // in an authoritative update, we pass in the full list of entries, + // but do declare which have not changed to avoid needless and potentially + // destructive overwrites. + LOG.debug("Marking the directory {} as authoritative", path); + ms.getInstrumentation().directoryMarkedAuthoritative(); + dirMeta.setAuthoritative(true); // This is the full directory contents + // write the updated dir entry and any changed children. + S3Guard.putWithTtl(ms, dirMeta, unchangedEntries, timeProvider, operationState); + } + } + + /** + * Perform the authoritative union operation. + * @param ms MetadataStore to use. + * @param path path to directory + * @param backingStatuses Directory listing from the backing store. + * @param dirMeta Directory listing from MetadataStore. May be null. + * @param timeProvider Time provider to use when updating entries + * @param operationState ongoing operation + * @throws IOException if metadata store update failed + */ + private static void nonAuthoritativeUnion( + final MetadataStore ms, + final Path path, + final List backingStatuses, + final DirListingMetadata dirMeta, + final ITtlTimeProvider timeProvider, + final BulkOperationState operationState) throws IOException { + List entriesToAdd = new ArrayList<>(backingStatuses.size()); + Set deleted = dirMeta.listTombstones(); + + final Map dirMetaMap = dirMeta.getListing().stream() + .collect(Collectors.toMap(pm -> pm.getFileStatus().getPath(), pm -> pm)); + for (S3AFileStatus s : backingStatuses) { + final Path statusPath = s.getPath(); + if (deleted.contains(statusPath)) { + continue; + } + + // this is the record in dynamo + PathMetadata pathMetadata = dirMetaMap.get(statusPath); + + // in non-auth listings, we compare the file status of the metastore + // list with those in the FS, and overwrite the MS entry if + // either of two conditions are met + // - there is no entry in the metastore and + // DIR_MERGE_UPDATES_ALL_RECORDS_NONAUTH is compiled to true + // - there is an entry in the metastore the FS entry is newer. + boolean shouldUpdate; + if (pathMetadata != null) { + // entry is in DDB; check modification time + shouldUpdate = s.getModificationTime() > (pathMetadata.getFileStatus()) + .getModificationTime(); + // create an updated record. + pathMetadata = new PathMetadata(s); } else { - // non-auth, just push out the updated entry list - putWithTtl(ms, nonAuthEntriesToAdd, timeProvider, operationState); + // entry is not present. Create for insertion into dirMeta + pathMetadata = new PathMetadata(s); + // use hard-coded policy about updating + shouldUpdate = DIR_MERGE_UPDATES_ALL_RECORDS_NONAUTH; + } + if (shouldUpdate) { + // we do want to update DDB and the listing with a new entry. + LOG.debug("Update ms with newer metadata of: {}", s); + // ensure it gets into the dirListing + // add to the list of entries to add later, + entriesToAdd.add(pathMetadata); } + // add the entry to the union; no-op if it was already there. + dirMeta.put(pathMetadata); } - IOUtils.cleanupWithLogger(LOG, operationState); - return dirMetaToStatuses(dirMeta); + if (!entriesToAdd.isEmpty()) { + // non-auth, just push out the updated entry list + LOG.debug("Adding {} entries under directory {}", entriesToAdd.size(), path); + putWithTtl(ms, entriesToAdd, timeProvider, operationState); + } } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java index 59a21f6950b0f..dec0b07950756 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreAuthoritativeMode.java @@ -53,7 +53,8 @@ import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; import static org.apache.hadoop.fs.s3a.S3AUtils.applyLocatedFiles; import static org.apache.hadoop.fs.s3a.Statistic.OBJECT_LIST_REQUESTS; -import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_AUTHORITATIVE_DIRECTORIES_UPDATED;import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_RECORD_WRITES; +import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_AUTHORITATIVE_DIRECTORIES_UPDATED; +import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_RECORD_WRITES; import static org.apache.hadoop.fs.s3a.s3guard.AuthoritativeAuditOperation.ERROR_PATH_NOT_AUTH_IN_FS; import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.authoritativeEmptyDirectoryMarker; import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Authoritative.CHECK_FLAG; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3Guard.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3Guard.java index 2190c0c66288c..8f97179155c0d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3Guard.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3Guard.java @@ -18,6 +18,7 @@ package org.apache.hadoop.fs.s3a.s3guard; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -27,7 +28,10 @@ import java.util.UUID; import java.util.concurrent.TimeUnit; +import org.assertj.core.api.Assertions; +import org.junit.After; import org.junit.Assert; +import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,40 +56,180 @@ */ public class TestS3Guard extends Assert { + public static final String MS_FILE_1 = "s3a://bucket/dir/ms-file1"; + + public static final String MS_FILE_2 = "s3a://bucket/dir/ms-file2"; + + public static final String S3_FILE_3 = "s3a://bucket/dir/s3-file3"; + + public static final String S3_DIR_4 = "s3a://bucket/dir/s3-dir4"; + + public static final Path DIR_PATH = new Path("s3a://bucket/dir"); + + private MetadataStore ms; + + private ITtlTimeProvider timeProvider; + + @Before + public void setUp() throws Exception { + final Configuration conf = new Configuration(false); + ms = new LocalMetadataStore(); + ms.initialize(conf, new S3Guard.TtlTimeProvider(conf)); + timeProvider = new S3Guard.TtlTimeProvider( + DEFAULT_METADATASTORE_METADATA_TTL); + + } + + @After + public void tearDown() throws Exception { + if (ms != null) { + ms.destroy(); + } + } + /** * Basic test to ensure results from S3 and MetadataStore are merged * correctly. */ @Test - public void testDirListingUnion() throws Exception { - final Configuration conf = new Configuration(false); - MetadataStore ms = new LocalMetadataStore(); - - Path dirPath = new Path("s3a://bucket/dir"); - ms.initialize(conf, new S3Guard.TtlTimeProvider(conf)); + public void testDirListingUnionNonauth() throws Exception { // Two files in metadata store listing - PathMetadata m1 = makePathMeta("s3a://bucket/dir/ms-file1", false); - PathMetadata m2 = makePathMeta("s3a://bucket/dir/ms-file2", false); - DirListingMetadata dirMeta = new DirListingMetadata(dirPath, + PathMetadata m1 = makePathMeta(MS_FILE_1, false); + PathMetadata m2 = makePathMeta(MS_FILE_2, false); + DirListingMetadata dirMeta = new DirListingMetadata(DIR_PATH, Arrays.asList(m1, m2), false); - // Two other files in s3 + // Two other entries in s3 + final S3AFileStatus s1Status = makeFileStatus(S3_FILE_3, false); + final S3AFileStatus s2Status = makeFileStatus(S3_DIR_4, true); List s3Listing = Arrays.asList( - makeFileStatus("s3a://bucket/dir/s3-file3", false), - makeFileStatus("s3a://bucket/dir/s3-file4", false) - ); + s1Status, + s2Status); + + FileStatus[] result = S3Guard.dirListingUnion(ms, DIR_PATH, s3Listing, + dirMeta, false, timeProvider); + + assertEquals("listing length", 4, result.length); + assertContainsPaths(result, MS_FILE_1, MS_FILE_2, S3_FILE_3, S3_DIR_4); + + // check the MS doesn't contain the s3 entries as nonauth + // unions should block them + assertNoRecord(ms, S3_FILE_3); + assertNoRecord(ms, S3_DIR_4); + + // for entries which do exist, when updated in S3, the metastore is updated + S3AFileStatus f1Status2 = new S3AFileStatus( + 200, System.currentTimeMillis(), new Path(MS_FILE_1), + 1, null, "tag2", "ver2"); + FileStatus[] result2 = S3Guard.dirListingUnion(ms, DIR_PATH, + Arrays.asList(f1Status2), + dirMeta, false, timeProvider); + // the listing returns the new status + Assertions.assertThat(find(result2, MS_FILE_1)) + .describedAs("Entry in listing results for %s", MS_FILE_1) + .isSameAs(f1Status2); + // as does a query of the MS + final PathMetadata updatedMD = verifyRecord(ms, MS_FILE_1); + Assertions.assertThat(updatedMD.getFileStatus()) + .describedAs("Entry in metastore for %s: %s", MS_FILE_1, updatedMD) + .isEqualTo(f1Status2); + } + + /** + * Auth mode unions are different. + */ + @Test + public void testDirListingUnionAuth() throws Exception { + + // Two files in metadata store listing + PathMetadata m1 = makePathMeta(MS_FILE_1, false); + PathMetadata m2 = makePathMeta(MS_FILE_2, false); + DirListingMetadata dirMeta = new DirListingMetadata(DIR_PATH, + Arrays.asList(m1, m2), true); + + // Two other entries in s3 + S3AFileStatus s1Status = makeFileStatus(S3_FILE_3, false); + S3AFileStatus s2Status = makeFileStatus(S3_DIR_4, true); + List s3Listing = Arrays.asList( + s1Status, + s2Status); ITtlTimeProvider timeProvider = new S3Guard.TtlTimeProvider( DEFAULT_METADATASTORE_METADATA_TTL); - FileStatus[] result = S3Guard.dirListingUnion(ms, dirPath, s3Listing, - dirMeta, false, timeProvider); + FileStatus[] result = S3Guard.dirListingUnion(ms, DIR_PATH, s3Listing, + dirMeta, true, timeProvider); assertEquals("listing length", 4, result.length); - assertContainsPath(result, "s3a://bucket/dir/ms-file1"); - assertContainsPath(result, "s3a://bucket/dir/ms-file2"); - assertContainsPath(result, "s3a://bucket/dir/s3-file3"); - assertContainsPath(result, "s3a://bucket/dir/s3-file4"); + assertContainsPaths(result, MS_FILE_1, MS_FILE_2, S3_FILE_3, S3_DIR_4); + + // now verify an auth scan added the records + PathMetadata file3Meta = verifyRecord(ms, S3_FILE_3); + PathMetadata dir4Meta = verifyRecord(ms, S3_DIR_4); + + // we can't check auth flag handling because local FS doesn't have one + // so do just check the dir status still all good. + Assertions.assertThat(dir4Meta) + .describedAs("Metastore entry for dir %s", dir4Meta) + .matches(m -> m.getFileStatus().isDirectory()); + + DirListingMetadata dirMeta2 = new DirListingMetadata(DIR_PATH, + Arrays.asList(m1, m2, file3Meta, dir4Meta), true); + // now s1 status is updated on S3 + S3AFileStatus s1Status2 = new S3AFileStatus( + 200, System.currentTimeMillis(), new Path(S3_FILE_3), + 1, null, "tag2", "ver2"); + + // but the result of the listing contains the old entry + // because auth mode doesn't pick up changes in S3 which + // didn't go through s3guard + FileStatus[] result2 = S3Guard.dirListingUnion(ms, DIR_PATH, + Arrays.asList(s1Status2), + dirMeta2, true, timeProvider); + Assertions.assertThat(find(result2, S3_FILE_3)) + .describedAs("Entry in listing results for %s", S3_FILE_3) + .isSameAs(file3Meta.getFileStatus()); + } + + /** + * Assert there is no record in the store. + * @param ms metastore + * @param path path + * @throws IOException IOError + */ + private void assertNoRecord(MetadataStore ms, String path) + throws IOException { + Assertions.assertThat(lookup(ms, path)) + .describedAs("Metastore entry for %s", path) + .isNull(); + } + + /** + * Assert there is arecord in the store, then return it. + * @param ms metastore + * @param path path + * @return the record. + * @throws IOException IO Error + */ + private PathMetadata verifyRecord(MetadataStore ms, String path) + throws IOException { + final PathMetadata md = lookup(ms, path); + Assertions.assertThat(md) + .describedAs("Metastore entry for %s", path) + .isNotNull(); + return md; + } + + /** + * Look up a record. + * @param ms store + * @param path path + * @return the record or null + * @throws IOException IO Error + */ + private PathMetadata lookup(final MetadataStore ms, final String path) + throws IOException { + return ms.get(new Path(path)); } @Test @@ -293,18 +437,32 @@ public void testLogS3GuardDisabled() throws Exception { localLogger, "FOO_BAR_LEVEL", "bucket")); } + void assertContainsPaths(FileStatus[] statuses, String...pathStr) { + for (String s :pathStr) { + assertContainsPath(statuses, s); + } + } + void assertContainsPath(FileStatus[] statuses, String pathStr) { - assertTrue("listing doesn't contain " + pathStr, - containsPath(statuses, pathStr)); + find(statuses, pathStr); } - boolean containsPath(FileStatus[] statuses, String pathStr) { + /** + * Look up an entry or raise an assertion + * @param statuses list of statuses + * @param pathStr path to search + * @return the entry if found + */ + private FileStatus find(FileStatus[] statuses, String pathStr) { for (FileStatus s : statuses) { if (s.getPath().toString().equals(pathStr)) { - return true; + return s; } } - return false; + // no match, fail meaningfully + Assertions.assertThat(statuses) + .anyMatch(s -> s.getPath().toString().equals(pathStr)); + return null; } private PathMetadata makePathMeta(String pathStr, boolean isDir) {