From f72690156611c11830a5b7ba3b546fa153b1fcc9 Mon Sep 17 00:00:00 2001 From: Ben Roling Date: Thu, 2 May 2019 10:19:00 -0500 Subject: [PATCH 01/13] Squashed commit of the following: commit ae876ab2df46c68ddd923edf8dd1d314191fcc94 Merge: 2e0254ea7a4 6a427456eb5 Author: Ben Roling Date: Thu May 2 10:14:10 2019 -0500 Merge branch 'trunk' into HADOOP-16085-squashed-2 commit 2e0254ea7a4650736854a0bcbd13efaa1622fc7f Author: Ben Roling Date: Thu Apr 18 12:13:40 2019 -0500 Remove unused import commit d1275e4a381b700baeaf309ea51b244b10549b5b Merge: 450ba663eee df76cdc8959 Author: Ben Roling Date: Thu Apr 18 12:10:01 2019 -0500 Merge branch 'trunk' into HADOOP-16085-squashed commit 450ba663eeefe48fdf178071ffe763f98d9b7834 Author: Ben Roling Date: Thu Apr 18 11:45:41 2019 -0500 Improvements to TestObjectChangeDetectionAttributes, AbstractS3AMockTest commit 408af6ca1561fdd221da5ddd1a0b50cca829111f Author: Ben Roling Date: Thu Apr 18 10:29:05 2019 -0500 Use HttpStatus code constant instead of magic number commit 5f0532b5666cd5b855e219d4ba02cbbfe19c33c2 Author: Ben Roling Date: Thu Apr 18 10:02:50 2019 -0500 Update core-default.xml commit 3488b20da09bc6ceb8bcfca140aefdbc8ea6cca4 Author: Ben Roling Date: Wed Apr 17 16:14:43 2019 -0500 Fix runaround of creating FileStatus and then calling fromFileStatus() commit 90d5c9c16277bed3deb7c6ff760d875c1123fe83 Author: Ben Roling Date: Wed Apr 17 15:45:51 2019 -0500 Fix minor nits commit 3ff59e48785f72c969b5aaacf720743da42a8f3f Author: Ben Roling Date: Wed Apr 17 15:07:02 2019 -0500 Mutate S3AFileStatus instead of creating new instance commit 13fab97cf2ee58b3ee25beb2aa5f89540890329d Author: Ben Roling Date: Wed Apr 17 14:30:55 2019 -0500 Rename S3LocatedFileStatus to S3ALocatedFileStatus commit bee4e5291f216bc3e35ca7765873dcd8d6355648 Author: Ben Roling Date: Wed Apr 17 14:25:17 2019 -0500 Stop pretending to support group and permission attributes on S3AFileStatus commit 807e13b293bd9fee426adc0d67a2572a421cd105 Author: Ben Roling Date: Wed Apr 17 14:20:14 2019 -0500 Add serialVersionUID to S3LocatedFileStatus commit 9974cec858f542b3b85f28e7cac8578cc211f7b3 Author: Ben Roling Date: Mon Apr 8 13:34:38 2019 -0500 Fix missed group or owner tweak commit 708c00167f8d9f416bcba120b9d1122f4e42947e Author: Ben Roling Date: Mon Apr 8 12:58:14 2019 -0500 Fix S3AFileStatus group handling ITestS3AConfiguration.testUsernameFromUGI was failing, expecting the user to be copied into the group. Strict copying of user into group causes TestLocalMetadataStore.testPutNew() to fail since it expects the group to be preserved from the original FileStatus. This change copies user into group when group is null/empty. With this change, all existing tests pass. commit 5239a9f20ab81aa6437062e9a59d0207f139636a Author: Ben Roling Date: Thu Apr 4 16:38:31 2019 -0500 Skip tests that require versionId when bucket doesn't have versioning enabled commit 4c6331ed84660e464fbeae5ba71ab19a3362c107 Author: Ben Roling Date: Mon Apr 1 13:58:24 2019 -0500 Fix broken TestObjectChangeDetectionAttributes commit 8a19c42ababcf1d3915ea359ebb9a6ea8560ed04 Author: Ben Roling Date: Mon Apr 1 10:08:33 2019 -0500 Squashed commit of the following: commit 9f4ad8845d8ee040ffd009d61ebec2d302feb27f Author: Ben Roling Date: Mon Apr 1 09:29:35 2019 -0500 Add test for 412 response commit dc0a3fb1930d831434b576cfe236cd3196fd2024 Author: Ben Roling Date: Thu Mar 28 16:53:46 2019 -0500 Update tests that started failing due to HADOOP-15999 commit 5e1f3e343168913dff7c3d169c994e78f001e4dd Author: Ben Roling Date: Thu Mar 28 15:49:26 2019 -0500 Speed up ITestS3ARemoteFileChanged commit 1b6be405b72fa00465829298b6ccb33615befdcb Author: Ben Roling Date: Thu Mar 28 14:23:53 2019 -0500 Skip invalid test when object versioning enabled commit 8597d2eb719c817b40c7a02dacba2e5b0eeedd65 Merge: 2d235f89d12 b5db2383832 Author: Ben Roling Date: Thu Mar 28 11:54:50 2019 -0500 Merge remote-tracking branch 'apache/trunk' into HADOOP-16085 commit 2d235f89d125e47e0122cf9cde92f2903bdf7350 Author: Ben Roling Date: Thu Mar 28 11:51:26 2019 -0500 Fix typo commit dc83cef506975f541ecfa1e9ddc0837dc8ed3165 Author: Ben Roling Date: Thu Mar 28 10:28:09 2019 -0500 Generalize TestObjectETag to cover versionId and test overwrite commit 0d71f327f17ec74c5d0f1d867948f4a6bad35743 Author: Ben Roling Date: Thu Mar 28 08:45:42 2019 -0500 Fix trailing whitespace commit 324be6d2b09521292df74a581bf38d27b58d9ace Author: Ben Roling Date: Wed Mar 27 22:00:57 2019 -0500 S3GuardTool updates to correct ETag or versionId metadata commit 2a2bba772c830411de554ccd1f39a4779273142d Author: Ben Roling Date: Wed Mar 27 21:27:27 2019 -0500 Clarify log message commit 6e62a3aed4ef316d7015757ab2cc73070e1b470f Author: Ben Roling Date: Wed Mar 27 21:17:48 2019 -0500 Documentation updates per PR feedback commit 1ff8bef49f86c863300166d26a331ab8817f90ed Author: Ben Roling Date: Wed Mar 27 16:05:59 2019 -0500 check version.required on CopyResult commit e296275971f3b7f1f74fd646b10f50a20a8673fd Author: Ben Roling Date: Wed Mar 27 16:04:50 2019 -0500 Minor javadoc improvements from PR review commit 3e9ea192305603d5a4bc096d04aa9b7fad3f9421 Author: Ben Roling Date: Wed Mar 27 13:15:58 2019 -0500 Skip tests that aren't applicable with change.detection.source=versionId commit ddbf68bb1ad67c72f87eed29149929af26694377 Author: Ben Roling Date: Wed Mar 27 11:56:38 2019 -0500 Add tests of case where no version metadata is present commit 21d37ddec4554e24640f4aad0f640568bb62ade6 Author: Ben Roling Date: Wed Mar 27 09:25:46 2019 -0500 Fix compiler deprecation warning commit b8e1569b1c30b591459ade7c22d01a5758e7f22d Author: Ben Roling Date: Wed Mar 27 09:19:46 2019 -0500 Fix license issue commit 33bb5f9c2b4167db1149057834af2d4dbf8e418a Author: Ben Roling Date: Wed Mar 27 09:19:32 2019 -0500 Fix findbugs issue commit 5b7fadbe3f3d10219c0068aff1b44a5e3d088913 Author: Ben Roling Date: Wed Mar 27 09:00:39 2019 -0500 Fix checkstyle issues commit 6110a11d22d9eb07ed7ca065461a5b7e12e0db37 Author: Ben Roling Date: Wed Mar 27 08:28:37 2019 -0500 Remove trailing whitespace commit d82069bb2b21c2a5df5f04bd8ab7004b239e56da Author: Ben Roling Date: Tue Mar 26 16:05:01 2019 -0500 Improve S3Guard doc commit ca2f0e9352da8d11997b9eb56fdebfe52972d4d2 Author: Ben Roling Date: Tue Mar 26 14:29:03 2019 -0500 Fix ITestS3ARemoteFileChanged commit 1e4fa85cc153575d60c25189f7ddeb15e3a44533 Author: Ben Roling Date: Tue Mar 26 11:37:48 2019 -0500 Increase local metastore cache timeout commit 34b0c8028ee8a711b1bf15ce127bef58ae231ff1 Author: Ben Roling Date: Tue Mar 26 11:35:34 2019 -0500 Fix isEmptyDir inconsistency commit bbf8365815c8e60da04158a805d49f02bcf48e62 Author: Ben Roling Date: Mon Mar 25 16:55:24 2019 -0500 TestPathMetadataDynamoDBTranslation tests null etag, versonId commit 2ae7d1621ee893b9e1282d8741509dadf31d9225 Author: Ben Roling Date: Mon Mar 25 16:54:49 2019 -0500 Add constants in TestDirListingMetadata commit 068a55d62201284393f15448acbb8b4b439eefd5 Author: Ben Roling Date: Mon Mar 25 15:43:45 2019 -0500 Add copy exception handling commit 0eca6f38be44879011df7099a89ec338851f9c36 Author: Ben Roling Date: Mon Mar 25 12:43:51 2019 -0500 Don't process response from copy commit ad9e1524c7ec337a21b47145701187e0e279a3ec Author: Ben Roling Date: Mon Feb 25 16:41:54 2019 -0600 HADOOP-16085-003.patch Rebase of previous work after merge of HADOOP-15625. --- .../src/main/resources/core-default.xml | 18 +- hadoop-tools/hadoop-aws/pom.xml | 5 + .../org/apache/hadoop/fs/s3a/Listing.java | 75 ++-- .../fs/s3a/RemoteFileChangedException.java | 20 + .../apache/hadoop/fs/s3a/S3AFileStatus.java | 50 ++- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 291 +++++++++---- .../apache/hadoop/fs/s3a/S3AInputStream.java | 3 +- .../hadoop/fs/s3a/S3ALocatedFileStatus.java | 63 +++ .../org/apache/hadoop/fs/s3a/S3AUtils.java | 21 +- .../hadoop/fs/s3a/S3ObjectAttributes.java | 16 +- .../hadoop/fs/s3a/WriteOperationHelper.java | 31 +- .../fs/s3a/impl/ChangeDetectionPolicy.java | 104 ++++- .../hadoop/fs/s3a/impl/ChangeTracker.java | 124 +++++- .../fs/s3a/s3guard/DDBPathMetadata.java | 8 +- .../fs/s3a/s3guard/DescendantsIterator.java | 6 +- .../fs/s3a/s3guard/DirListingMetadata.java | 5 +- .../fs/s3a/s3guard/DynamoDBMetadataStore.java | 51 ++- .../fs/s3a/s3guard/LocalMetadataStore.java | 3 +- .../MetadataStoreListFilesIterator.java | 12 +- .../hadoop/fs/s3a/s3guard/PathMetadata.java | 26 +- .../PathMetadataDynamoDBTranslation.java | 19 +- .../apache/hadoop/fs/s3a/s3guard/S3Guard.java | 34 +- .../hadoop/fs/s3a/s3guard/S3GuardTool.java | 17 +- .../site/markdown/tools/hadoop-aws/s3guard.md | 140 +++++++ .../tools/hadoop-aws/troubleshooting_s3a.md | 4 +- .../hadoop/fs/s3a/AbstractS3AMockTest.java | 17 +- .../hadoop/fs/s3a/ITestS3ADelayedFNF.java | 9 + .../hadoop/fs/s3a/ITestS3AInconsistency.java | 9 + .../fs/s3a/ITestS3ARemoteFileChanged.java | 309 ++++++++++++-- .../fs/s3a/ITestS3GuardListConsistency.java | 2 +- .../s3a/ITestS3GuardOutOfBandOperations.java | 9 + .../hadoop/fs/s3a/MockS3AFileSystem.java | 2 +- .../apache/hadoop/fs/s3a/S3ATestUtils.java | 25 +- .../org/apache/hadoop/fs/s3a/TestListing.java | 29 +- .../fs/s3a/TestStreamChangeTracker.java | 194 ++++++++- .../s3guard/AbstractS3GuardToolTestBase.java | 2 +- .../s3guard/ITestDynamoDBMetadataStore.java | 5 +- .../ITestDynamoDBMetadataStoreScale.java | 3 +- .../fs/s3a/s3guard/ITestS3GuardToolLocal.java | 37 ++ .../fs/s3a/s3guard/MetadataStoreTestBase.java | 77 ++-- .../s3a/s3guard/TestDirListingMetadata.java | 16 +- .../s3a/s3guard/TestLocalMetadataStore.java | 11 +- .../TestObjectChangeDetectionAttributes.java | 384 ++++++++++++++++++ .../TestPathMetadataDynamoDBTranslation.java | 93 +++-- .../hadoop/fs/s3a/s3guard/TestS3Guard.java | 13 +- .../AbstractITestS3AMetadataStoreScale.java | 6 +- .../fs/s3a/select/ITestS3SelectCLI.java | 8 + .../fs/s3a/select/ITestS3SelectLandsat.java | 8 + .../fs/s3a/select/ITestS3SelectMRJob.java | 10 + .../filecache/TestS3AResourceScope.java | 2 +- 50 files changed, 2029 insertions(+), 397 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ALocatedFileStatus.java create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestObjectChangeDetectionAttributes.java diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 651ce33b29ab4..641955eecf5e9 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -1904,15 +1904,15 @@ fs.s3a.change.detection.mode server - Determines how change detection is applied to alert to S3 objects - rewritten while being read. Value 'server' indicates to apply the attribute - constraint directly on GetObject requests to S3. Value 'client' means to do a - client-side comparison of the attribute value returned in the response. Value - 'server' would not work with third-party S3 implementations that do not - support these constraints on GetObject. Values 'server' and 'client' generate - RemoteObjectChangedException when a mismatch is detected. Value 'warn' works - like 'client' but generates only a warning. Value 'none' will ignore change - detection completely. + Determines how change detection is applied to alert to inconsistent S3 + objects read during or after an overwrite. Value 'server' indicates to apply + the attribute constraint directly on GetObject requests to S3. Value 'client' + means to do a client-side comparison of the attribute value returned in the + response. Value 'server' would not work with third-party S3 implementations + that do not support these constraints on GetObject. Values 'server' and + 'client' generate RemoteObjectChangedException when a mismatch is detected. + Value 'warn' works like 'client' but generates only a warning. Value 'none' + will ignore change detection completely. diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index 3bfe776a8e704..f8f5ee2f38f4a 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -406,6 +406,11 @@ hadoop-common provided + + org.apache.httpcomponents + httpcore + provided + org.apache.hadoop hadoop-common diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java index b016eadcfbb46..e002e3ed5b30b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java @@ -68,7 +68,7 @@ public Listing(S3AFileSystem owner) { * @return the file status iterator */ ProvidedFileStatusIterator createProvidedFileStatusIterator( - FileStatus[] fileStatuses, + S3AFileStatus[] fileStatuses, PathFilter filter, FileStatusAcceptor acceptor) { return new ProvidedFileStatusIterator(fileStatuses, filter, acceptor); @@ -114,7 +114,7 @@ FileStatusListingIterator createFileStatusListingIterator( S3ListRequest request, PathFilter filter, Listing.FileStatusAcceptor acceptor, - RemoteIterator providedStatus) throws IOException { + RemoteIterator providedStatus) throws IOException { return new FileStatusListingIterator( new ObjectListingIterator(listPath, request), filter, @@ -129,7 +129,7 @@ FileStatusListingIterator createFileStatusListingIterator( */ @VisibleForTesting LocatedFileStatusIterator createLocatedFileStatusIterator( - RemoteIterator statusIterator) { + RemoteIterator statusIterator) { return new LocatedFileStatusIterator(statusIterator); } @@ -143,7 +143,7 @@ LocatedFileStatusIterator createLocatedFileStatusIterator( */ @VisibleForTesting TombstoneReconcilingIterator createTombstoneReconcilingIterator( - RemoteIterator iterator, Set tombstones) { + RemoteIterator iterator, Set tombstones) { return new TombstoneReconcilingIterator(iterator, tombstones); } @@ -189,19 +189,19 @@ interface FileStatusAcceptor { * iterator returned. */ static final class SingleStatusRemoteIterator - implements RemoteIterator { + implements RemoteIterator { /** * The status to return; set to null after the first iteration. */ - private LocatedFileStatus status; + private S3ALocatedFileStatus status; /** * Constructor. * @param status status value: may be null, in which case * the iterator is empty. */ - public SingleStatusRemoteIterator(LocatedFileStatus status) { + public SingleStatusRemoteIterator(S3ALocatedFileStatus status) { this.status = status; } @@ -226,9 +226,9 @@ public boolean hasNext() throws IOException { * to the constructor. */ @Override - public LocatedFileStatus next() throws IOException { + public S3ALocatedFileStatus next() throws IOException { if (hasNext()) { - LocatedFileStatus s = this.status; + S3ALocatedFileStatus s = this.status; status = null; return s; } else { @@ -247,16 +247,16 @@ public LocatedFileStatus next() throws IOException { * There is no remote data to fetch. */ static class ProvidedFileStatusIterator - implements RemoteIterator { - private final ArrayList filteredStatusList; + implements RemoteIterator { + private final ArrayList filteredStatusList; private int index = 0; - ProvidedFileStatusIterator(FileStatus[] fileStatuses, PathFilter filter, + ProvidedFileStatusIterator(S3AFileStatus[] fileStatuses, PathFilter filter, FileStatusAcceptor acceptor) { Preconditions.checkArgument(fileStatuses != null, "Null status list!"); filteredStatusList = new ArrayList<>(fileStatuses.length); - for (FileStatus status : fileStatuses) { + for (S3AFileStatus status : fileStatuses) { if (filter.accept(status.getPath()) && acceptor.accept(status)) { filteredStatusList.add(status); } @@ -270,7 +270,7 @@ public boolean hasNext() throws IOException { } @Override - public FileStatus next() throws IOException { + public S3AFileStatus next() throws IOException { if (!hasNext()) { throw new NoSuchElementException(); } @@ -305,7 +305,7 @@ public FileStatus next() throws IOException { * Thread safety: None. */ class FileStatusListingIterator - implements RemoteIterator { + implements RemoteIterator { /** Source of objects. */ private final ObjectListingIterator source; @@ -316,10 +316,10 @@ class FileStatusListingIterator /** request batch size. */ private int batchSize; /** Iterator over the current set of results. */ - private ListIterator statusBatchIterator; + private ListIterator statusBatchIterator; - private final Set providedStatus; - private Iterator providedStatusIterator; + private final Set providedStatus; + private Iterator providedStatusIterator; /** * Create an iterator over file status entries. @@ -335,13 +335,13 @@ class FileStatusListingIterator FileStatusListingIterator(ObjectListingIterator source, PathFilter filter, FileStatusAcceptor acceptor, - RemoteIterator providedStatus) throws IOException { + RemoteIterator providedStatus) throws IOException { this.source = source; this.filter = filter; this.acceptor = acceptor; this.providedStatus = new HashSet<>(); for (; providedStatus != null && providedStatus.hasNext();) { - final FileStatus status = providedStatus.next(); + final S3AFileStatus status = providedStatus.next(); if (filter.accept(status.getPath()) && acceptor.accept(status)) { this.providedStatus.add(status); } @@ -384,8 +384,8 @@ private boolean sourceHasNext() throws IOException { @Override @Retries.RetryTranslated - public FileStatus next() throws IOException { - final FileStatus status; + public S3AFileStatus next() throws IOException { + final S3AFileStatus status; if (sourceHasNext()) { status = statusBatchIterator.next(); // We remove from provided list the file status listed by S3 so that @@ -441,7 +441,7 @@ private boolean buildNextStatusBatch(S3ListResult objects) { // counters for debug logs int added = 0, ignored = 0; // list to fill in with results. Initial size will be list maximum. - List stats = new ArrayList<>( + List stats = new ArrayList<>( objects.getObjectSummaries().size() + objects.getCommonPrefixes().size()); // objects @@ -453,8 +453,9 @@ private boolean buildNextStatusBatch(S3ListResult objects) { } // Skip over keys that are ourselves and old S3N _$folder$ files if (acceptor.accept(keyPath, summary) && filter.accept(keyPath)) { - FileStatus status = createFileStatus(keyPath, summary, - owner.getDefaultBlockSize(keyPath), owner.getUsername()); + S3AFileStatus status = createFileStatus(keyPath, summary, + owner.getDefaultBlockSize(keyPath), owner.getUsername(), + null, null); LOG.debug("Adding: {}", status); stats.add(status); added++; @@ -468,7 +469,7 @@ private boolean buildNextStatusBatch(S3ListResult objects) { for (String prefix : objects.getCommonPrefixes()) { Path keyPath = owner.keyToQualifiedPath(prefix); if (acceptor.accept(keyPath, prefix) && filter.accept(keyPath)) { - FileStatus status = new S3AFileStatus(Tristate.FALSE, keyPath, + S3AFileStatus status = new S3AFileStatus(Tristate.FALSE, keyPath, owner.getUsername()); LOG.debug("Adding directory: {}", status); added++; @@ -679,14 +680,14 @@ public boolean accept(FileStatus status) { * return a remote iterator of {@link LocatedFileStatus} instances. */ class LocatedFileStatusIterator - implements RemoteIterator { - private final RemoteIterator statusIterator; + implements RemoteIterator { + private final RemoteIterator statusIterator; /** * Constructor. * @param statusIterator an iterator over the remote status entries */ - LocatedFileStatusIterator(RemoteIterator statusIterator) { + LocatedFileStatusIterator(RemoteIterator statusIterator) { this.statusIterator = statusIterator; } @@ -696,7 +697,7 @@ public boolean hasNext() throws IOException { } @Override - public LocatedFileStatus next() throws IOException { + public S3ALocatedFileStatus next() throws IOException { return owner.toLocatedFileStatus(statusIterator.next()); } } @@ -708,16 +709,16 @@ public LocatedFileStatus next() throws IOException { * remain in the source iterator. */ static class TombstoneReconcilingIterator implements - RemoteIterator { - private LocatedFileStatus next = null; - private final RemoteIterator iterator; + RemoteIterator { + private S3ALocatedFileStatus next = null; + private final RemoteIterator iterator; private final Set tombstones; /** * @param iterator Source iterator to filter * @param tombstones set of tombstone markers to filter out of results */ - TombstoneReconcilingIterator(RemoteIterator + TombstoneReconcilingIterator(RemoteIterator iterator, Set tombstones) { this.iterator = iterator; if (tombstones != null) { @@ -729,7 +730,7 @@ static class TombstoneReconcilingIterator implements private boolean fetch() throws IOException { while (next == null && iterator.hasNext()) { - LocatedFileStatus candidate = iterator.next(); + S3ALocatedFileStatus candidate = iterator.next(); if (!tombstones.contains(candidate.getPath())) { next = candidate; return true; @@ -745,9 +746,9 @@ public boolean hasNext() throws IOException { return fetch(); } - public LocatedFileStatus next() throws IOException { + public S3ALocatedFileStatus next() throws IOException { if (hasNext()) { - LocatedFileStatus result = next; + S3ALocatedFileStatus result = next; next = null; fetch(); return result; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/RemoteFileChangedException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/RemoteFileChangedException.java index cfa5935bbf3e3..1df2d7ee20389 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/RemoteFileChangedException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/RemoteFileChangedException.java @@ -32,6 +32,9 @@ @InterfaceStability.Unstable public class RemoteFileChangedException extends PathIOException { + public static final String PRECONDITIONS_FAILED = + "Constraints of request were unsatisfiable"; + /** * Constructs a RemoteFileChangedException. * @@ -46,4 +49,21 @@ public RemoteFileChangedException(String path, super(path, message); setOperation(operation); } + + /** + * Constructs a RemoteFileChangedException. + * + * @param path the path accessed when the change was detected + * @param operation the operation (e.g. open, re-open) performed when the + * change was detected + * @param message a message providing more details about the condition + * @param cause inner cause. + */ + public RemoteFileChangedException(String path, + String operation, + String message, + Throwable cause) { + super(path, message, cause); + setOperation(operation); + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java index ca6ca908beca6..b45e67ed7efd9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java @@ -21,6 +21,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; /** * File status for an S3A "file". @@ -32,6 +33,8 @@ @InterfaceStability.Evolving public class S3AFileStatus extends FileStatus { private Tristate isEmptyDirectory; + private String eTag; + private String versionId; /** * Create a directory status. @@ -69,15 +72,17 @@ public S3AFileStatus(Tristate isemptydir, * @param path path * @param blockSize block size * @param owner owner + * @param eTag eTag of the S3 object if available, else null + * @param versionId versionId of the S3 object if available, else null */ public S3AFileStatus(long length, long modification_time, Path path, - long blockSize, String owner) { - super(length, false, 1, blockSize, modification_time, 0, - null, null, null, null, + long blockSize, String owner, String eTag, String versionId) { + super(length, false, 1, blockSize, modification_time, + 0, null, owner, owner, null, path, false, true, false); isEmptyDirectory = Tristate.FALSE; - setOwner(owner); - setGroup(owner); + this.eTag = eTag; + this.versionId = versionId; } /** @@ -86,16 +91,19 @@ public S3AFileStatus(long length, long modification_time, Path path, * @param source FileStatus to convert to S3AFileStatus * @param isEmptyDirectory TRUE/FALSE if known to be / not be an empty * directory, UNKNOWN if that information was not computed. + * @param eTag eTag of the S3 object if available, else null + * @param versionId versionId of the S3 object if available, else null * @return a new S3AFileStatus */ public static S3AFileStatus fromFileStatus(FileStatus source, - Tristate isEmptyDirectory) { + Tristate isEmptyDirectory, String eTag, String versionId) { if (source.isDirectory()) { return new S3AFileStatus(isEmptyDirectory, source.getPath(), source.getOwner()); } else { return new S3AFileStatus(source.getLen(), source.getModificationTime(), - source.getPath(), source.getBlockSize(), source.getOwner()); + source.getPath(), source.getBlockSize(), source.getOwner(), + eTag, versionId); } } @@ -109,6 +117,28 @@ public Tristate isEmptyDirectory() { return isEmptyDirectory; } + /** + * Update isEmptyDirectory attribute. + * @param isEmptyDirectory new isEmptyDirectory value + */ + public void setIsEmptyDirectory(Tristate isEmptyDirectory) { + this.isEmptyDirectory = isEmptyDirectory; + } + + /** + * @return the S3 object eTag when available, else null. + */ + public String getETag() { + return eTag; + } + + /** + * @return the S3 object versionId when available, else null. + */ + public String getVersionId() { + return versionId; + } + /** Compare if this object is equal to another object. * @param o the object to be compared. * @return true if two file status has the same path name; false if not. @@ -150,8 +180,10 @@ public long getModificationTime(){ @Override public String toString() { - return super.toString() + - String.format(" isEmptyDirectory=%s", isEmptyDirectory().name()); + return super.toString() + + String.format(" isEmptyDirectory=%s", isEmptyDirectory().name() + + String.format(" eTag=%s", eTag) + + String.format(" versionId=%s", versionId)); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 7c82aa6b90d6e..fead648d9b0d7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -74,6 +74,7 @@ import com.amazonaws.services.s3.transfer.TransferManager; import com.amazonaws.services.s3.transfer.TransferManagerConfiguration; import com.amazonaws.services.s3.transfer.Upload; +import com.amazonaws.services.s3.transfer.model.CopyResult; import com.amazonaws.services.s3.transfer.model.UploadResult; import com.amazonaws.event.ProgressListener; import com.google.common.annotations.VisibleForTesting; @@ -115,6 +116,7 @@ import org.apache.hadoop.fs.s3a.commit.CommitConstants; import org.apache.hadoop.fs.s3a.commit.PutTracker; import org.apache.hadoop.fs.s3a.commit.MagicCommitIntegration; +import org.apache.hadoop.fs.s3a.impl.ChangeTracker; import org.apache.hadoop.fs.s3a.select.SelectBinding; import org.apache.hadoop.fs.s3a.select.SelectConstants; import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata; @@ -696,11 +698,12 @@ public S3AInputPolicy getInputPolicy() { } /** - * Get the change detection policy for this FS instance. + * Get the change detection policy for this FS instance. Only public to allow + * access in tests in other packages. * @return the change detection policy */ @VisibleForTesting - ChangeDetectionPolicy getChangeDetectionPolicy() { + public ChangeDetectionPolicy getChangeDetectionPolicy() { return changeDetectionPolicy; } @@ -877,7 +880,7 @@ private FSDataInputStream open( throws IOException { entryPoint(INVOCATION_OPEN); - final FileStatus fileStatus = getFileStatus(path); + final S3AFileStatus fileStatus = (S3AFileStatus) getFileStatus(path); if (fileStatus.isDirectory()) { throw new FileNotFoundException("Can't open " + path + " because it is a directory"); @@ -910,7 +913,10 @@ private FSDataInputStream open( return new FSDataInputStream( new S3AInputStream( readContext, - createObjectAttributes(path), + createObjectAttributes( + path, + fileStatus.getETag(), + fileStatus.getVersionId()), fileStatus.getLen(), s3)); } @@ -943,13 +949,20 @@ private S3AReadOpContext createReadContext( /** * Create the attributes of an object for a get/select request. * @param f path path of the request. + * @param eTag the eTag of the S3 object + * @param versionId S3 object version ID * @return attributes to use when building the query. */ - private S3ObjectAttributes createObjectAttributes(final Path f) { + private S3ObjectAttributes createObjectAttributes( + final Path f, + final String eTag, + final String versionId) { return new S3ObjectAttributes(bucket, pathToKey(f), getServerSideEncryptionAlgorithm(), - encryptionSecrets.getEncryptionKey()); + encryptionSecrets.getEncryptionKey(), + eTag, + versionId); } /** @@ -1213,19 +1226,27 @@ private boolean innerRename(Path source, Path dest) if (srcStatus.isFile()) { LOG.debug("rename: renaming file {} to {}", src, dst); long length = srcStatus.getLen(); + S3ObjectAttributes objectAttributes = + createObjectAttributes(srcStatus.getPath(), + srcStatus.getETag(), srcStatus.getVersionId()); + S3AReadOpContext readContext = createReadContext(srcStatus, inputPolicy, + changeDetectionPolicy, readAhead); if (dstStatus != null && dstStatus.isDirectory()) { String newDstKey = maybeAddTrailingSlash(dstKey); String filename = srcKey.substring(pathToKey(src.getParent()).length()+1); newDstKey = newDstKey + filename; - copyFile(srcKey, newDstKey, length); + CopyResult copyResult = copyFile(srcKey, newDstKey, length, + objectAttributes, readContext); S3Guard.addMoveFile(metadataStore, srcPaths, dstMetas, src, keyToQualifiedPath(newDstKey), length, getDefaultBlockSize(dst), - username); + username, copyResult.getETag(), copyResult.getVersionId()); } else { - copyFile(srcKey, dstKey, srcStatus.getLen()); + CopyResult copyResult = copyFile(srcKey, dstKey, srcStatus.getLen(), + objectAttributes, readContext); S3Guard.addMoveFile(metadataStore, srcPaths, dstMetas, src, dst, - length, getDefaultBlockSize(dst), username); + length, getDefaultBlockSize(dst), username, + copyResult.getETag(), copyResult.getVersionId()); } innerDelete(srcStatus, false); } else { @@ -1248,10 +1269,10 @@ private boolean innerRename(Path source, Path dest) } Path parentPath = keyToQualifiedPath(srcKey); - RemoteIterator iterator = listFilesAndEmptyDirectories( - parentPath, true); + RemoteIterator iterator = + listFilesAndEmptyDirectories(parentPath, true); while (iterator.hasNext()) { - LocatedFileStatus status = iterator.next(); + S3ALocatedFileStatus status = iterator.next(); long length = status.getLen(); String key = pathToKey(status.getPath()); if (status.isDirectory() && !key.endsWith("/")) { @@ -1261,7 +1282,13 @@ private boolean innerRename(Path source, Path dest) .add(new DeleteObjectsRequest.KeyVersion(key)); String newDstKey = dstKey + key.substring(srcKey.length()); - copyFile(key, newDstKey, length); + S3ObjectAttributes objectAttributes = + createObjectAttributes(status.getPath(), + status.getETag(), status.getVersionId()); + S3AReadOpContext readContext = createReadContext(status, inputPolicy, + changeDetectionPolicy, readAhead); + CopyResult copyResult = copyFile(key, newDstKey, length, + objectAttributes, readContext); if (hasMetadataStore()) { // with a metadata store, the object entries need to be updated, @@ -1273,7 +1300,8 @@ private boolean innerRename(Path source, Path dest) childDst, username); } else { S3Guard.addMoveFile(metadataStore, srcPaths, dstMetas, childSrc, - childDst, length, getDefaultBlockSize(childDst), username); + childDst, length, getDefaultBlockSize(childDst), username, + copyResult.getETag(), copyResult.getVersionId()); } // Ancestor directories may not be listed, so we explicitly add them S3Guard.addMoveAncestors(metadataStore, srcPaths, dstMetas, @@ -1802,7 +1830,8 @@ PutObjectResult putObjectDirect(PutObjectRequest putObjectRequest) PutObjectResult result = s3.putObject(putObjectRequest); incrementPutCompletedStatistics(true, len); // update metadata - finishedWrite(putObjectRequest.getKey(), len); + finishedWrite(putObjectRequest.getKey(), len, + result.getETag(), result.getVersionId()); return result; } catch (AmazonClientException e) { incrementPutCompletedStatistics(false, len); @@ -2160,7 +2189,7 @@ public FileStatus[] innerListStatus(Path f) throws FileNotFoundException, LOG.debug("List status for path: {}", path); entryPoint(INVOCATION_LIST_STATUS); - List result; + List result; final FileStatus fileStatus = getFileStatus(path); if (fileStatus.isDirectory()) { @@ -2419,11 +2448,11 @@ S3AFileStatus innerGetFileStatus(final Path f, } } - FileStatus msStatus = pm.getFileStatus(); + S3AFileStatus msStatus = pm.getFileStatus(); if (needEmptyDirectoryFlag && msStatus.isDirectory()) { if (pm.isEmptyDirectory() != Tristate.UNKNOWN) { // We have a definitive true / false from MetadataStore, we are done. - return S3AFileStatus.fromFileStatus(msStatus, pm.isEmptyDirectory()); + return msStatus; } else { DirListingMetadata children = S3Guard.listChildrenWithTtl(metadataStore, path, ttlTimeProvider); @@ -2434,7 +2463,7 @@ S3AFileStatus innerGetFileStatus(final Path f, } } else { // Either this is not a directory, or we don't care if it is empty - return S3AFileStatus.fromFileStatus(msStatus, pm.isEmptyDirectory()); + return msStatus; } // If the metadata store has no children for it and it's not listed in @@ -2443,7 +2472,8 @@ S3AFileStatus innerGetFileStatus(final Path f, try { s3FileStatus = s3GetFileStatus(path, key, tombstones); } catch (FileNotFoundException e) { - return S3AFileStatus.fromFileStatus(msStatus, Tristate.TRUE); + return S3AFileStatus.fromFileStatus(msStatus, Tristate.TRUE, + null, null); } // entry was found, save in S3Guard return S3Guard.putAndReturn(metadataStore, s3FileStatus, instrumentation); @@ -2482,7 +2512,9 @@ private S3AFileStatus s3GetFileStatus(final Path path, String key, dateToLong(meta.getLastModified()), path, getDefaultBlockSize(path), - username); + username, + meta.getETag(), + meta.getVersionId()); } } catch (AmazonServiceException e) { if (e.getStatusCode() != 404) { @@ -2509,7 +2541,9 @@ private S3AFileStatus s3GetFileStatus(final Path path, String key, dateToLong(meta.getLastModified()), path, getDefaultBlockSize(path), - username); + username, + meta.getETag(), + meta.getVersionId()); } } catch (AmazonServiceException e) { if (e.getStatusCode() != 404) { @@ -2734,7 +2768,8 @@ UploadResult executePut(PutObjectRequest putObjectRequest, UploadResult result = waitForUploadCompletion(key, info); listener.uploadCompleted(); // post-write actions - finishedWrite(key, info.getLength()); + finishedWrite(key, info.getLength(), + result.getETag(), result.getVersionId()); return result; } @@ -2899,12 +2934,16 @@ public List listAWSPolicyRules( * @param srcKey source object path * @param dstKey destination object path * @param size object size + * @param srcAttributes S3 attributes of the source object + * @param readContext the read context + * @return the result of the copy * @throws AmazonClientException on failures inside the AWS SDK * @throws InterruptedIOException the operation was interrupted * @throws IOException Other IO problems */ @Retries.RetryMixed - private void copyFile(String srcKey, String dstKey, long size) + private CopyResult copyFile(String srcKey, String dstKey, long size, + S3ObjectAttributes srcAttributes, S3AReadOpContext readContext) throws IOException, InterruptedIOException { LOG.debug("copyFile {} -> {} ", srcKey, dstKey); @@ -2918,22 +2957,48 @@ private void copyFile(String srcKey, String dstKey, long size) } }; - once("copyFile(" + srcKey + ", " + dstKey + ")", srcKey, + return once("copyFile(" + srcKey + ", " + dstKey + ")", srcKey, () -> { ObjectMetadata srcom = getObjectMetadata(srcKey); ObjectMetadata dstom = cloneObjectMetadata(srcom); setOptionalObjectMetadata(dstom); + + ChangeTracker changeTracker = new ChangeTracker( + keyToQualifiedPath(srcKey).toString(), + changeDetectionPolicy, + readContext.instrumentation.newInputStreamStatistics() + .getVersionMismatchCounter(), + srcAttributes); + + changeTracker.processMetadata(srcom, "copy", 0); + CopyObjectRequest copyObjectRequest = new CopyObjectRequest(bucket, srcKey, bucket, dstKey); + changeTracker.maybeApplyConstraint(copyObjectRequest); + setOptionalCopyObjectRequestParameters(copyObjectRequest); copyObjectRequest.setCannedAccessControlList(cannedACL); copyObjectRequest.setNewObjectMetadata(dstom); Copy copy = transfers.copy(copyObjectRequest); copy.addProgressListener(progressListener); try { - copy.waitForCopyResult(); + CopyOutcome copyOutcome = copyOutcome(copy); + InterruptedException interruptedException = + copyOutcome.getInterruptedException(); + if (interruptedException != null) { + throw interruptedException; + } + RuntimeException runtimeException = + copyOutcome.getRuntimeException(); + if (runtimeException != null) { + changeTracker.processException(runtimeException, "copy"); + throw runtimeException; + } + CopyResult result = copyOutcome.getCopyResult(); + changeTracker.processResponse(result); incrementWriteOperations(); instrumentation.filesCopied(1, size); + return result; } catch (InterruptedException e) { throw new InterruptedIOException("Interrupted copying " + srcKey + " to " + dstKey + ", cancelling"); @@ -2941,6 +3006,43 @@ private void copyFile(String srcKey, String dstKey, long size) }); } + private static CopyOutcome copyOutcome(Copy copy) { + try { + CopyResult result = copy.waitForCopyResult(); + return new CopyOutcome(result, null, null); + } catch (RuntimeException e) { + return new CopyOutcome(null, null, e); + } catch (InterruptedException e) { + return new CopyOutcome(null, e, null); + } + } + + private static final class CopyOutcome { + private final CopyResult copyResult; + private final InterruptedException interruptedException; + private final RuntimeException runtimeException; + + private CopyOutcome(CopyResult copyResult, + InterruptedException interruptedException, + RuntimeException runtimeException) { + this.copyResult = copyResult; + this.interruptedException = interruptedException; + this.runtimeException = runtimeException; + } + + public CopyResult getCopyResult() { + return copyResult; + } + + public InterruptedException getInterruptedException() { + return interruptedException; + } + + public RuntimeException getRuntimeException() { + return runtimeException; + } + } + /** * Set the optional parameters when initiating the request (encryption, * headers, storage, etc). @@ -3044,6 +3146,8 @@ private Optional generateSSECustomerKey() { * * @param key key written to * @param length total length of file written + * @param eTag eTag of the written object + * @param versionId S3 object versionId of the written object * @throws MetadataPersistenceException if metadata about the write could * not be saved to the metadata store and * fs.s3a.metadatastore.fail.on.write.error=true @@ -3051,7 +3155,7 @@ private Optional generateSSECustomerKey() { @InterfaceAudience.Private @Retries.RetryTranslated("Except if failOnMetadataWriteError=false, in which" + " case RetryExceptionsSwallowed") - void finishedWrite(String key, long length) + void finishedWrite(String key, long length, String eTag, String versionId) throws MetadataPersistenceException { LOG.debug("Finished write to {}, len {}", key, length); Path p = keyToQualifiedPath(key); @@ -3064,7 +3168,7 @@ void finishedWrite(String key, long length) S3Guard.addAncestors(metadataStore, p, username); S3AFileStatus status = createUploadFileStatus(p, S3AUtils.objectRepresentsDirectory(key, length), length, - getDefaultBlockSize(p), username); + getDefaultBlockSize(p), username, eTag, versionId); S3Guard.putAndReturn(metadataStore, status, instrumentation); } } catch (IOException e) { @@ -3436,26 +3540,41 @@ public EtagChecksum getFileChecksum(Path f, final long length) @Retries.OnceTranslated public RemoteIterator listFiles(Path f, boolean recursive) throws FileNotFoundException, IOException { - return innerListFiles(f, recursive, - new Listing.AcceptFilesOnly(qualify(f))); + return toLocatedFileStatusIterator(innerListFiles(f, recursive, + new Listing.AcceptFilesOnly(qualify(f)))); + } + + private static RemoteIterator toLocatedFileStatusIterator( + RemoteIterator iterator) { + return new RemoteIterator() { + @Override + public boolean hasNext() throws IOException { + return iterator.hasNext(); + } + + @Override + public LocatedFileStatus next() throws IOException { + return iterator.next(); + } + }; } @Retries.OnceTranslated - public RemoteIterator listFilesAndEmptyDirectories(Path f, - boolean recursive) throws IOException { + public RemoteIterator listFilesAndEmptyDirectories( + Path f, boolean recursive) throws IOException { return innerListFiles(f, recursive, new Listing.AcceptAllButS3nDirs()); } @Retries.OnceTranslated - private RemoteIterator innerListFiles(Path f, boolean + private RemoteIterator innerListFiles(Path f, boolean recursive, Listing.FileStatusAcceptor acceptor) throws IOException { entryPoint(INVOCATION_LIST_FILES); Path path = qualify(f); LOG.debug("listFiles({}, {})", path, recursive); try { // lookup dir triggers existence check - final FileStatus fileStatus = getFileStatus(path); + final S3AFileStatus fileStatus = (S3AFileStatus) getFileStatus(path); if (fileStatus.isFile()) { // simple case: File LOG.debug("Path is a file"); @@ -3467,7 +3586,7 @@ private RemoteIterator innerListFiles(Path f, boolean String delimiter = recursive ? null : "/"; LOG.debug("Requesting all entries under {} with delimiter '{}'", key, delimiter); - final RemoteIterator cachedFilesIterator; + final RemoteIterator cachedFilesIterator; final Set tombstones; if (recursive) { final PathMetadata pm = metadataStore.get(path, true); @@ -3539,52 +3658,55 @@ public RemoteIterator listLocatedStatus(final Path f, entryPoint(INVOCATION_LIST_LOCATED_STATUS); Path path = qualify(f); LOG.debug("listLocatedStatus({}, {}", path, filter); - return once("listLocatedStatus", path.toString(), - () -> { - // lookup dir triggers existence check - final FileStatus fileStatus = getFileStatus(path); - if (fileStatus.isFile()) { - // simple case: File - LOG.debug("Path is a file"); - return new Listing.SingleStatusRemoteIterator( - filter.accept(path) ? toLocatedFileStatus(fileStatus) : null); - } else { - // directory: trigger a lookup - final String key = maybeAddTrailingSlash(pathToKey(path)); - final Listing.FileStatusAcceptor acceptor = - new Listing.AcceptAllButSelfAndS3nDirs(path); - DirListingMetadata meta = - S3Guard.listChildrenWithTtl(metadataStore, path, - ttlTimeProvider); - final RemoteIterator cachedFileStatusIterator = - listing.createProvidedFileStatusIterator( - S3Guard.dirMetaToStatuses(meta), filter, acceptor); - return (allowAuthoritative && meta != null - && meta.isAuthoritative()) - ? listing.createLocatedFileStatusIterator( - cachedFileStatusIterator) - : listing.createLocatedFileStatusIterator( - listing.createFileStatusListingIterator(path, - createListObjectsRequest(key, "/"), - filter, - acceptor, - cachedFileStatusIterator)); - } - }); + RemoteIterator iterator = + once("listLocatedStatus", path.toString(), + () -> { + // lookup dir triggers existence check + final S3AFileStatus fileStatus = + (S3AFileStatus) getFileStatus(path); + if (fileStatus.isFile()) { + // simple case: File + LOG.debug("Path is a file"); + return new Listing.SingleStatusRemoteIterator( + filter.accept(path) ? toLocatedFileStatus(fileStatus) : null); + } else { + // directory: trigger a lookup + final String key = maybeAddTrailingSlash(pathToKey(path)); + final Listing.FileStatusAcceptor acceptor = + new Listing.AcceptAllButSelfAndS3nDirs(path); + DirListingMetadata meta = + S3Guard.listChildrenWithTtl(metadataStore, path, + ttlTimeProvider); + final RemoteIterator cachedFileStatusIterator = + listing.createProvidedFileStatusIterator( + S3Guard.dirMetaToStatuses(meta), filter, acceptor); + return (allowAuthoritative && meta != null + && meta.isAuthoritative()) + ? listing.createLocatedFileStatusIterator( + cachedFileStatusIterator) + : listing.createLocatedFileStatusIterator( + listing.createFileStatusListingIterator(path, + createListObjectsRequest(key, "/"), + filter, + acceptor, + cachedFileStatusIterator)); + } + }); + return toLocatedFileStatusIterator(iterator); } /** - * Build a {@link LocatedFileStatus} from a {@link FileStatus} instance. + * Build a {@link S3ALocatedFileStatus} from a {@link FileStatus} instance. * @param status file status * @return a located status with block locations set up from this FS. * @throws IOException IO Problems. */ - LocatedFileStatus toLocatedFileStatus(FileStatus status) + S3ALocatedFileStatus toLocatedFileStatus(S3AFileStatus status) throws IOException { - return new LocatedFileStatus(status, + return new S3ALocatedFileStatus(status, status.isFile() ? getFileBlockLocations(status, 0, status.getLen()) - : null); + : null, status.getETag(), status.getVersionId()); } /** @@ -3743,17 +3865,36 @@ private FSDataInputStream select(final Path source, // so the operation will fail if it is not there or S3Guard believes it has // been deleted. // validation of the file status are delegated to the binding. - final FileStatus fileStatus = getFileStatus(path); + final S3AFileStatus fileStatus = (S3AFileStatus) getFileStatus(path); // readahead range can be dynamically set long ra = options.getLong(READAHEAD_RANGE, readAhead); + S3ObjectAttributes objectAttributes = createObjectAttributes( + path, fileStatus.getETag(), fileStatus.getVersionId()); + S3AReadOpContext readContext = createReadContext(fileStatus, inputPolicy, + changeDetectionPolicy, ra); + + if (!fileStatus.isDirectory()) { + // check that the object metadata lines up with what is expected + // based on the object attributes (which may contain an eTag or versionId) + // from S3Guard + ChangeTracker changeTracker = + new ChangeTracker(uri.toString(), + changeDetectionPolicy, + readContext.instrumentation.newInputStreamStatistics() + .getVersionMismatchCounter(), + objectAttributes); + ObjectMetadata objectMetadata = getObjectMetadata(path); + changeTracker.processMetadata(objectMetadata, "select", 0); + } + // build and execute the request return selectBinding.select( - createReadContext(fileStatus, inputPolicy, changeDetectionPolicy, ra), + readContext, expression, options, generateSSECustomerKey(), - createObjectAttributes(path)); + objectAttributes); } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java index cbe796bdcdd03..6075b52314340 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java @@ -153,7 +153,8 @@ public S3AInputStream(S3AReadOpContext ctx, this.serverSideEncryptionKey = s3Attributes.getServerSideEncryptionKey(); this.changeTracker = new ChangeTracker(uri, ctx.getChangeDetectionPolicy(), - streamStatistics.getVersionMismatchCounter()); + streamStatistics.getVersionMismatchCounter(), + s3Attributes); setInputPolicy(ctx.getInputPolicy()); setReadahead(ctx.getReadahead()); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ALocatedFileStatus.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ALocatedFileStatus.java new file mode 100644 index 0000000000000..d3ca2610e225b --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ALocatedFileStatus.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.LocatedFileStatus; + +import static com.google.common.base.Preconditions.checkNotNull; + +/** + * {@link LocatedFileStatus} extended to also carry ETag and object version ID. + */ +public class S3ALocatedFileStatus extends LocatedFileStatus { + + private static final long serialVersionUID = 3597192103662929338L; + + private final String eTag; + private final String versionId; + + public S3ALocatedFileStatus(S3AFileStatus status, BlockLocation[] locations, + String eTag, String versionId) { + super(checkNotNull(status), locations); + this.eTag = eTag; + this.versionId = versionId; + } + + public String getETag() { + return eTag; + } + + public String getVersionId() { + return versionId; + } + + // equals() and hashCode() overridden to avoid FindBugs warning. + // Base implementation is equality on Path only, which is still appropriate. + + @Override + public boolean equals(Object o) { + return super.equals(o); + } + + @Override + public int hashCode() { + return super.hashCode(); + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index 8d204d7c56014..fe4fd0ffd6a70 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -530,16 +530,20 @@ public static String stringify(AmazonS3Exception e) { * @param summary summary from AWS * @param blockSize block size to declare. * @param owner owner of the file + * @param eTag S3 object eTag or null if unavailable + * @param versionId S3 object versionId or null if unavailable * @return a status entry */ public static S3AFileStatus createFileStatus(Path keyPath, S3ObjectSummary summary, long blockSize, - String owner) { + String owner, + String eTag, + String versionId) { long size = summary.getSize(); return createFileStatus(keyPath, objectRepresentsDirectory(summary.getKey(), size), - size, summary.getLastModified(), blockSize, owner); + size, summary.getLastModified(), blockSize, owner, eTag, versionId); } /** @@ -552,22 +556,27 @@ public static S3AFileStatus createFileStatus(Path keyPath, * @param size file length * @param blockSize block size for file status * @param owner Hadoop username + * @param eTag S3 object eTag or null if unavailable + * @param versionId S3 object versionId or null if unavailable * @return a status entry */ public static S3AFileStatus createUploadFileStatus(Path keyPath, - boolean isDir, long size, long blockSize, String owner) { + boolean isDir, long size, long blockSize, String owner, + String eTag, String versionId) { Date date = isDir ? null : new Date(); - return createFileStatus(keyPath, isDir, size, date, blockSize, owner); + return createFileStatus(keyPath, isDir, size, date, blockSize, owner, + eTag, versionId); } /* Date 'modified' is ignored when isDir is true. */ private static S3AFileStatus createFileStatus(Path keyPath, boolean isDir, - long size, Date modified, long blockSize, String owner) { + long size, Date modified, long blockSize, String owner, + String eTag, String versionId) { if (isDir) { return new S3AFileStatus(Tristate.UNKNOWN, keyPath, owner); } else { return new S3AFileStatus(size, dateToLong(modified), keyPath, blockSize, - owner); + owner, eTag, versionId); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java index d67e3e1e8cbc6..2e62ff6728206 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java @@ -34,16 +34,22 @@ public class S3ObjectAttributes { private final String key; private final S3AEncryptionMethods serverSideEncryptionAlgorithm; private final String serverSideEncryptionKey; + private final String eTag; + private final String versionId; public S3ObjectAttributes( String bucket, String key, S3AEncryptionMethods serverSideEncryptionAlgorithm, - String serverSideEncryptionKey) { + String serverSideEncryptionKey, + String eTag, + String versionId) { this.bucket = bucket; this.key = key; this.serverSideEncryptionAlgorithm = serverSideEncryptionAlgorithm; this.serverSideEncryptionKey = serverSideEncryptionKey; + this.eTag = eTag; + this.versionId = versionId; } public String getBucket() { @@ -61,4 +67,12 @@ public S3AEncryptionMethods getServerSideEncryptionAlgorithm() { public String getServerSideEncryptionKey() { return serverSideEncryptionKey; } + + public String getETag() { + return eTag; + } + + public String getVersionId() { + return versionId; + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java index ea091720c2705..54386addd7970 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java @@ -226,7 +226,8 @@ public String initiateMultiPartUpload(String destKey) throws IOException { /** * Finalize a multipart PUT operation. * This completes the upload, and, if that works, calls - * {@link S3AFileSystem#finishedWrite(String, long)} to update the filesystem. + * {@link S3AFileSystem#finishedWrite(String, long, String, String)} + * to update the filesystem. * Retry policy: retrying, translated. * @param destKey destination of the commit * @param uploadId multipart operation Id @@ -247,20 +248,22 @@ private CompleteMultipartUploadResult finalizeMultipartUpload( throw new IOException( "No upload parts in multipart upload to " + destKey); } - CompleteMultipartUploadResult uploadResult = invoker.retry("Completing multipart commit", destKey, - true, - retrying, - () -> { - // a copy of the list is required, so that the AWS SDK doesn't - // attempt to sort an unmodifiable list. - return owner.getAmazonS3Client().completeMultipartUpload( - new CompleteMultipartUploadRequest(bucket, - destKey, - uploadId, - new ArrayList<>(partETags))); - } + CompleteMultipartUploadResult uploadResult = + invoker.retry("Completing multipart commit", destKey, + true, + retrying, + () -> { + // a copy of the list is required, so that the AWS SDK doesn't + // attempt to sort an unmodifiable list. + return owner.getAmazonS3Client().completeMultipartUpload( + new CompleteMultipartUploadRequest(bucket, + destKey, + uploadId, + new ArrayList<>(partETags))); + } ); - owner.finishedWrite(destKey, length); + owner.finishedWrite(destKey, length, uploadResult.getETag(), + uploadResult.getVersionId()); return uploadResult; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java index f3d8bc20c824b..7972183796aa3 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java @@ -20,9 +20,12 @@ import java.util.Locale; +import com.amazonaws.services.s3.model.CopyObjectRequest; import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.transfer.model.CopyResult; import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.fs.s3a.S3ObjectAttributes; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -200,6 +203,28 @@ public static ChangeDetectionPolicy createPolicy(final Mode mode, public abstract String getRevisionId(ObjectMetadata objectMetadata, String uri); + /** + * Like {{@link #getRevisionId(ObjectMetadata, String)}}, but retrieves the + * revision identifier from {@link S3ObjectAttributes}. + * + * @param s3Attributes the object attributes + * @return the revisionId string as interpreted by this policy, or potentially + * null if the attribute is unavailable (such as when the policy says to use + * versionId but object versioning is not enabled for the bucket). + */ + public abstract String getRevisionId(S3ObjectAttributes s3Attributes); + + /** + * Like {{@link #getRevisionId(ObjectMetadata, String)}}, but retrieves the + * revision identifier from {@link CopyResult}. + * + * @param copyResult the copy result + * @return the revisionId string as interpreted by this policy, or potentially + * null if the attribute is unavailable (such as when the policy says to use + * versionId but object versioning is not enabled for the bucket). + */ + public abstract String getRevisionId(CopyResult copyResult); + /** * Applies the given {@link #getRevisionId(ObjectMetadata, String) revisionId} * as a server-side qualification on the {@code GetObjectRequest}. @@ -210,6 +235,16 @@ public abstract String getRevisionId(ObjectMetadata objectMetadata, public abstract void applyRevisionConstraint(GetObjectRequest request, String revisionId); + /** + * Applies the given {@link #getRevisionId(ObjectMetadata, String) revisionId} + * as a server-side qualification on the {@code CopyObjectRequest}. + * + * @param request the request + * @param revisionId the revision id + */ + public abstract void applyRevisionConstraint(CopyObjectRequest request, + String revisionId); + /** * Takes appropriate action based on {@link #getMode() mode} when a change has * been detected. @@ -242,7 +277,8 @@ public ImmutablePair onChangeDetected( if (timesAlreadyDetected == 0) { // only warn on the first detection to avoid a noisy log LOG.warn( - String.format("%s change detected on %s %s at %d. Expected %s got %s", + String.format( + "%s change detected on %s %s at %d. Expected %s got %s", getSource(), operation, uri, position, revisionId, newRevisionId)); return new ImmutablePair<>(true, null); @@ -277,11 +313,32 @@ public String getRevisionId(ObjectMetadata objectMetadata, String uri) { return objectMetadata.getETag(); } + @Override + public String getRevisionId(S3ObjectAttributes s3Attributes) { + return s3Attributes.getETag(); + } + + @Override + public String getRevisionId(CopyResult copyResult) { + return copyResult.getETag(); + } + @Override public void applyRevisionConstraint(GetObjectRequest request, String revisionId) { - LOG.debug("Restricting request to etag {}", revisionId); - request.withMatchingETagConstraint(revisionId); + if (revisionId != null) { + LOG.debug("Restricting get request to etag {}", revisionId); + request.withMatchingETagConstraint(revisionId); + } + } + + @Override + public void applyRevisionConstraint(CopyObjectRequest request, + String revisionId) { + if (revisionId != null) { + LOG.debug("Restricting copy request to etag {}", revisionId); + request.withMatchingETagConstraint(revisionId); + } } @Override @@ -323,11 +380,32 @@ public String getRevisionId(ObjectMetadata objectMetadata, String uri) { return versionId; } + @Override + public String getRevisionId(S3ObjectAttributes s3Attributes) { + return s3Attributes.getVersionId(); + } + + @Override + public String getRevisionId(CopyResult copyResult) { + return copyResult.getVersionId(); + } + @Override public void applyRevisionConstraint(GetObjectRequest request, String revisionId) { - LOG.debug("Restricting request to version {}", revisionId); - request.withVersionId(revisionId); + if (revisionId != null) { + LOG.debug("Restricting get request to version {}", revisionId); + request.withVersionId(revisionId); + } + } + + @Override + public void applyRevisionConstraint(CopyObjectRequest request, + String revisionId) { + if (revisionId != null) { + LOG.debug("Restricting copy request to version {}", revisionId); + request.withSourceVersionId(revisionId); + } } @Override @@ -361,12 +439,28 @@ public String getRevisionId(final ObjectMetadata objectMetadata, return null; } + @Override + public String getRevisionId(final S3ObjectAttributes s3ObjectAttributes) { + return null; + } + + @Override + public String getRevisionId(CopyResult copyResult) { + return null; + } + @Override public void applyRevisionConstraint(final GetObjectRequest request, final String revisionId) { } + @Override + public void applyRevisionConstraint(CopyObjectRequest request, + String revisionId) { + + } + @Override public String toString() { return "NoChangeDetection"; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java index f76602b953259..7278307e5f1f8 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java @@ -20,11 +20,13 @@ import java.util.concurrent.atomic.AtomicLong; +import com.amazonaws.AmazonServiceException; +import com.amazonaws.services.s3.model.CopyObjectRequest; import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.S3Object; +import com.amazonaws.services.s3.transfer.model.CopyResult; import com.google.common.annotations.VisibleForTesting; -import org.apache.hadoop.fs.s3a.NoVersionAttributeException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -32,14 +34,18 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.PathIOException; +import org.apache.hadoop.fs.s3a.NoVersionAttributeException; import org.apache.hadoop.fs.s3a.RemoteFileChangedException; +import org.apache.hadoop.fs.s3a.S3ObjectAttributes; import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.http.HttpStatus.SC_PRECONDITION_FAILED; /** - * Change tracking for input streams: the revision ID/etag - * the previous request is recorded and when the next request comes in, - * it is compared. + * Change tracking for input streams: the version ID or etag of the object is + * tracked and compared on open/re-open. An initial version ID or etag may or + * may not be available, depending on usage (e.g. if S3Guard is utilized). + * * Self-contained for testing and use in different streams. */ @InterfaceAudience.Private @@ -49,7 +55,7 @@ public class ChangeTracker { private static final Logger LOG = LoggerFactory.getLogger(ChangeTracker.class); - public static final String CHANGE_REPORTED_BY_S3 = "reported by S3"; + public static final String CHANGE_REPORTED_BY_S3 = "Change reported by S3"; /** Policy to use. */ private final ChangeDetectionPolicy policy; @@ -76,13 +82,20 @@ public class ChangeTracker { * @param uri URI of object being tracked * @param policy policy to track. * @param versionMismatches reference to the version mismatch counter + * @param s3ObjectAttributes attributes of the object, potentially including + * an eTag or versionId to match depending on {@code policy} */ public ChangeTracker(final String uri, final ChangeDetectionPolicy policy, - final AtomicLong versionMismatches) { + final AtomicLong versionMismatches, + final S3ObjectAttributes s3ObjectAttributes) { this.policy = checkNotNull(policy); this.uri = uri; this.versionMismatches = versionMismatches; + this.revisionId = policy.getRevisionId(s3ObjectAttributes); + if (revisionId != null) { + LOG.debug("Revision ID for object at {}: {}", uri, revisionId); + } } public String getRevisionId() { @@ -115,6 +128,23 @@ public boolean maybeApplyConstraint( return false; } + /** + * Apply any revision control set by the policy if it is to be + * enforced on the server. + * @param request request to modify + * @return true iff a constraint was added. + */ + public boolean maybeApplyConstraint( + final CopyObjectRequest request) { + + if (policy.getMode() == ChangeDetectionPolicy.Mode.Server + && revisionId != null) { + policy.applyRevisionConstraint(request, revisionId); + return true; + } + return false; + } + /** * Process the response from the server for validation against the @@ -135,29 +165,97 @@ public void processResponse(final S3Object object, // object was not returned. versionMismatches.incrementAndGet(); throw new RemoteFileChangedException(uri, operation, - String.format("%s change " - + CHANGE_REPORTED_BY_S3 + String.format(CHANGE_REPORTED_BY_S3 + " while reading" + " at position %s." - + " Version %s was unavailable", - getSource(), + + " %s %s was unavailable", pos, + getSource(), getRevisionId())); } else { throw new PathIOException(uri, "No data returned from GET request"); } } - final ObjectMetadata metadata = object.getObjectMetadata(); + processMetadata(object.getObjectMetadata(), operation, pos); + } + + /** + * Process the response from the server for validation against the + * change policy. + * @param copyResult result of a copy operation + * @throws PathIOException raised on failure + * @throws RemoteFileChangedException if the remote file has changed. + */ + public void processResponse(final CopyResult copyResult) + throws PathIOException { + // ETag (sometimes, depending on encryption and/or multipart) is not the + // same on the copied object as the original. Version Id seems to never + // be the same on the copy. As such, there isn't really anything that + // can be verified on the response, except that a revision ID is present + // if required. + String newRevisionId = policy.getRevisionId(copyResult); + LOG.debug("Copy result {}: {}", policy.getSource(), newRevisionId); + if (newRevisionId == null && policy.isRequireVersion()) { + throw new NoVersionAttributeException(uri, String.format( + "Change detection policy requires %s", + policy.getSource())); + } + } + + /** + * Process an exception generated against the change policy. + * If the exception indicates the file has changed, this method throws + * {@code RemoteFileChangedException} with the original exception as the + * cause. + * @param e the exception + * @param operation the operation performed when the exception was + * generated (e.g. "copy", "read", "select"). + * @throws RemoteFileChangedException if the remote file has changed. + */ + public void processException(Exception e, String operation) throws + RemoteFileChangedException { + if (e instanceof AmazonServiceException) { + AmazonServiceException serviceException = (AmazonServiceException) e; + if (serviceException.getStatusCode() == SC_PRECONDITION_FAILED) { + versionMismatches.incrementAndGet(); + throw new RemoteFileChangedException(uri, operation, String.format( + RemoteFileChangedException.PRECONDITIONS_FAILED + + " on %s." + + " Version %s was unavailable", + getSource(), + getRevisionId()), + serviceException); + } + } + } + + /** + * Process metadata response from server for validation against the change + * policy. + * @param metadata metadata returned from server + * @param operation operation in progress + * @param pos offset of read + * @throws PathIOException raised on failure + * @throws RemoteFileChangedException if the remote file has changed. + */ + public void processMetadata(final ObjectMetadata metadata, + final String operation, + final long pos) throws PathIOException { final String newRevisionId = policy.getRevisionId(metadata, uri); + processNewRevision(newRevisionId, operation, pos); + } + + private void processNewRevision(final String newRevisionId, + final String operation, final long pos) throws PathIOException { if (newRevisionId == null && policy.isRequireVersion()) { throw new NoVersionAttributeException(uri, String.format( "Change detection policy requires %s", policy.getSource())); } if (revisionId == null) { - // revisionId is null on first (re)open. Pin it so change can be detected - // if object has been updated + // revisionId may be null on first (re)open. Pin it so change can be + // detected if object has been updated LOG.debug("Setting revision ID for object at {}: {}", uri, newRevisionId); revisionId = newRevisionId; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DDBPathMetadata.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DDBPathMetadata.java index 78568dc4bbbf9..e3a529ac14f2d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DDBPathMetadata.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DDBPathMetadata.java @@ -18,7 +18,7 @@ package org.apache.hadoop.fs.s3a.s3guard; -import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.Tristate; /** @@ -36,18 +36,18 @@ public DDBPathMetadata(PathMetadata pmd) { this.setLastUpdated(pmd.getLastUpdated()); } - public DDBPathMetadata(FileStatus fileStatus) { + public DDBPathMetadata(S3AFileStatus fileStatus) { super(fileStatus); this.isAuthoritativeDir = false; } - public DDBPathMetadata(FileStatus fileStatus, Tristate isEmptyDir, + public DDBPathMetadata(S3AFileStatus fileStatus, Tristate isEmptyDir, boolean isDeleted) { super(fileStatus, isEmptyDir, isDeleted); this.isAuthoritativeDir = false; } - public DDBPathMetadata(FileStatus fileStatus, Tristate isEmptyDir, + public DDBPathMetadata(S3AFileStatus fileStatus, Tristate isEmptyDir, boolean isDeleted, boolean isAuthoritativeDir, long lastUpdated) { super(fileStatus, isEmptyDir, isDeleted); this.isAuthoritativeDir = isAuthoritativeDir; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DescendantsIterator.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DescendantsIterator.java index dcee35824ed0f..88a46745b11bf 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DescendantsIterator.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DescendantsIterator.java @@ -28,9 +28,9 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.s3a.S3AFileStatus; /** * {@code DescendantsIterator} is a {@link RemoteIterator} that implements @@ -83,7 +83,7 @@ */ @InterfaceAudience.Private @InterfaceStability.Evolving -public class DescendantsIterator implements RemoteIterator { +public class DescendantsIterator implements RemoteIterator { private final MetadataStore metadataStore; private final Queue queue = new LinkedList<>(); @@ -121,7 +121,7 @@ public boolean hasNext() throws IOException { } @Override - public FileStatus next() throws IOException { + public S3AFileStatus next() throws IOException { if (!hasNext()) { throw new NoSuchElementException("No more descendants."); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java index 88f24aa9841e7..1059dd148623b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java @@ -34,6 +34,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.Tristate; /** @@ -61,7 +62,7 @@ public class DirListingMetadata extends ExpirableMetadata { * Create a directory listing metadata container. * * @param path Path of the directory. If this path has a host component, then - * all paths added later via {@link #put(FileStatus)} must also have + * all paths added later via {@link #put(S3AFileStatus)} must also have * the same host. * @param listing Entries in the directory. * @param isAuthoritative true iff listing is the full contents of the @@ -225,7 +226,7 @@ public void remove(Path childPath) { * @return true if the status was added or replaced with a new value. False * if the same FileStatus value was already present. */ - public boolean put(FileStatus childFileStatus) { + public boolean put(S3AFileStatus childFileStatus) { Preconditions.checkNotNull(childFileStatus, "childFileStatus must be non-null"); Path childPath = childStatusToPathKey(childFileStatus); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java index 769d3d4c4c376..289abf258a9cf 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java @@ -88,6 +88,7 @@ import org.apache.hadoop.fs.s3a.Constants; import org.apache.hadoop.fs.s3a.Invoker; import org.apache.hadoop.fs.s3a.Retries; +import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.fs.s3a.S3AInstrumentation; import org.apache.hadoop.fs.s3a.S3AUtils; @@ -129,6 +130,14 @@ * This attribute is meaningful only to file items. *

  • optional long attribute revealing block size of the file. * This attribute is meaningful only to file items.
  • + *
  • optional string attribute tracking the s3 eTag of the file. + * May be absent if the metadata was entered with a version of S3Guard + * before this was tracked. + * This attribute is meaningful only to file items.
  • + *
  • optional string attribute tracking the s3 versionId of the file. + * May be absent if the metadata was entered with a version of S3Guard + * before this was tracked. + * This attribute is meaningful only to file items.
  • * * * The DynamoDB partition key is the parent, and the range key is the child. @@ -155,20 +164,20 @@ * This is persisted to a single DynamoDB table as: * *
    - * =========================================================================
    - * | parent                 | child | is_dir | mod_time | len |     ...    |
    - * =========================================================================
    - * | /bucket                | dir1  | true   |          |     |            |
    - * | /bucket/dir1           | dir2  | true   |          |     |            |
    - * | /bucket/dir1           | dir3  | true   |          |     |            |
    - * | /bucket/dir1/dir2      | file1 |        |   100    | 111 |            |
    - * | /bucket/dir1/dir2      | file2 |        |   200    | 222 |            |
    - * | /bucket/dir1/dir3      | dir4  | true   |          |     |            |
    - * | /bucket/dir1/dir3      | dir5  | true   |          |     |            |
    - * | /bucket/dir1/dir3/dir4 | file3 |        |   300    | 333 |            |
    - * | /bucket/dir1/dir3/dir5 | file4 |        |   400    | 444 |            |
    - * | /bucket/dir1/dir3      | dir6  | true   |          |     |            |
    - * =========================================================================
    + * ====================================================================================
    + * | parent                 | child | is_dir | mod_time | len | etag | ver_id |  ...  |
    + * ====================================================================================
    + * | /bucket                | dir1  | true   |          |     |      |        |       |
    + * | /bucket/dir1           | dir2  | true   |          |     |      |        |       |
    + * | /bucket/dir1           | dir3  | true   |          |     |      |        |       |
    + * | /bucket/dir1/dir2      | file1 |        |   100    | 111 | abc  |  mno   |       |
    + * | /bucket/dir1/dir2      | file2 |        |   200    | 222 | def  |  pqr   |       |
    + * | /bucket/dir1/dir3      | dir4  | true   |          |     |      |        |       |
    + * | /bucket/dir1/dir3      | dir5  | true   |          |     |      |        |       |
    + * | /bucket/dir1/dir3/dir4 | file3 |        |   300    | 333 | ghi  |  stu   |       |
    + * | /bucket/dir1/dir3/dir5 | file4 |        |   400    | 444 | jkl  |  vwx   |       |
    + * | /bucket/dir1/dir3      | dir6  | true   |          |     |      |        |       |
    + * ====================================================================================
      * 
    * * This choice of schema is efficient for read access patterns. @@ -625,9 +634,8 @@ private DDBPathMetadata innerGet(Path path, boolean wantEmptyDirectoryFlag) * @param path path to dir * @return new FileStatus */ - private FileStatus makeDirStatus(String owner, Path path) { - return new FileStatus(0, true, 1, 0, 0, 0, null, - owner, null, path); + private S3AFileStatus makeDirStatus(String owner, Path path) { + return new S3AFileStatus(Tristate.UNKNOWN, path, owner); } @Override @@ -710,7 +718,7 @@ Collection completeAncestry( while (!parent.isRoot() && !ancestry.containsKey(parent)) { LOG.debug("auto-create ancestor path {} for child path {}", parent, path); - final FileStatus status = makeDirStatus(parent, username); + final S3AFileStatus status = makeDirStatus(parent, username); ancestry.put(parent, new DDBPathMetadata(status, Tristate.FALSE, false)); parent = parent.getParent(); @@ -915,7 +923,7 @@ Collection fullPathsToPut(DDBPathMetadata meta) while (path != null && !path.isRoot()) { final Item item = getConsistentItem(path); if (!itemExists(item)) { - final FileStatus status = makeDirStatus(path, username); + final S3AFileStatus status = makeDirStatus(path, username); metasToPut.add(new DDBPathMetadata(status, Tristate.FALSE, false, meta.isAuthoritativeDir(), meta.getLastUpdated())); path = path.getParent(); @@ -938,9 +946,8 @@ private boolean itemExists(Item item) { } /** Create a directory FileStatus using current system time as mod time. */ - static FileStatus makeDirStatus(Path f, String owner) { - return new FileStatus(0, true, 1, 0, System.currentTimeMillis(), 0, - null, owner, owner, f); + static S3AFileStatus makeDirStatus(Path f, String owner) { + return new S3AFileStatus(Tristate.UNKNOWN, f, owner); } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java index b8f9635dcd283..9276388679866 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java @@ -28,6 +28,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.Tristate; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -231,7 +232,7 @@ public void move(Collection pathsToDelete, public void put(PathMetadata meta) throws IOException { Preconditions.checkNotNull(meta); - FileStatus status = meta.getFileStatus(); + S3AFileStatus status = meta.getFileStatus(); Path path = standardize(status.getPath()); synchronized (this) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java index 378d10980c835..e4e76c50d6ce5 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java @@ -33,9 +33,9 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.s3a.S3AFileStatus; /** * {@code MetadataStoreListFilesIterator} is a {@link RemoteIterator} that @@ -85,14 +85,14 @@ @InterfaceAudience.Private @InterfaceStability.Evolving public class MetadataStoreListFilesIterator implements - RemoteIterator { + RemoteIterator { public static final Logger LOG = LoggerFactory.getLogger( MetadataStoreListFilesIterator.class); private final boolean allowAuthoritative; private final MetadataStore metadataStore; private final Set tombstones = new HashSet<>(); - private Iterator leafNodesIterator = null; + private Iterator leafNodesIterator = null; public MetadataStoreListFilesIterator(MetadataStore ms, PathMetadata meta, boolean allowAuthoritative) throws IOException { @@ -104,7 +104,7 @@ public MetadataStoreListFilesIterator(MetadataStore ms, PathMetadata meta, private void prefetch(PathMetadata meta) throws IOException { final Queue queue = new LinkedList<>(); - final Collection leafNodes = new ArrayList<>(); + final Collection leafNodes = new ArrayList<>(); if (meta != null) { final Path path = meta.getFileStatus().getPath(); @@ -121,7 +121,7 @@ private void prefetch(PathMetadata meta) throws IOException { while(!queue.isEmpty()) { PathMetadata nextMetadata = queue.poll(); - FileStatus nextStatus = nextMetadata.getFileStatus(); + S3AFileStatus nextStatus = nextMetadata.getFileStatus(); if (nextStatus.isFile()) { // All files are leaf nodes by definition leafNodes.add(nextStatus); @@ -159,7 +159,7 @@ public boolean hasNext() { } @Override - public FileStatus next() { + public S3AFileStatus next() { return leafNodesIterator.next(); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java index 56645fead73c4..5c00cdc5df706 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java @@ -23,6 +23,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.Tristate; /** @@ -33,7 +34,7 @@ @InterfaceStability.Evolving public class PathMetadata extends ExpirableMetadata { - private final FileStatus fileStatus; + private S3AFileStatus fileStatus; private Tristate isEmptyDirectory; private boolean isDeleted; @@ -43,24 +44,25 @@ public class PathMetadata extends ExpirableMetadata { * @return the entry. */ public static PathMetadata tombstone(Path path) { - long now = System.currentTimeMillis(); - FileStatus status = new FileStatus(0, false, 0, 0, now, path); - return new PathMetadata(status, Tristate.UNKNOWN, true); + S3AFileStatus s3aStatus = new S3AFileStatus(0, + System.currentTimeMillis(), path, 0, null, + null, null); + return new PathMetadata(s3aStatus, Tristate.UNKNOWN, true); } /** * Creates a new {@code PathMetadata} containing given {@code FileStatus}. * @param fileStatus file status containing an absolute path. */ - public PathMetadata(FileStatus fileStatus) { - this(fileStatus, Tristate.UNKNOWN); + public PathMetadata(S3AFileStatus fileStatus) { + this(fileStatus, Tristate.UNKNOWN, false); } - public PathMetadata(FileStatus fileStatus, Tristate isEmptyDir) { + public PathMetadata(S3AFileStatus fileStatus, Tristate isEmptyDir) { this(fileStatus, isEmptyDir, false); } - public PathMetadata(FileStatus fileStatus, Tristate isEmptyDir, boolean + public PathMetadata(S3AFileStatus fileStatus, Tristate isEmptyDir, boolean isDeleted) { Preconditions.checkNotNull(fileStatus, "fileStatus must be non-null"); Preconditions.checkNotNull(fileStatus.getPath(), "fileStatus path must be" + @@ -75,7 +77,7 @@ public PathMetadata(FileStatus fileStatus, Tristate isEmptyDir, boolean /** * @return {@code FileStatus} contained in this {@code PathMetadata}. */ - public final FileStatus getFileStatus() { + public final S3AFileStatus getFileStatus() { return fileStatus; } @@ -91,6 +93,7 @@ public Tristate isEmptyDirectory() { void setIsEmptyDirectory(Tristate isEmptyDirectory) { this.isEmptyDirectory = isEmptyDirectory; + fileStatus.setIsEmptyDirectory(isEmptyDirectory); } public boolean isDeleted() { @@ -128,10 +131,11 @@ public String toString() { * @param sb target StringBuilder */ public void prettyPrint(StringBuilder sb) { - sb.append(String.format("%-5s %-20s %-7d %-8s %-6s", + sb.append(String.format("%-5s %-20s %-7d %-8s %-6s %-20s %-20s", fileStatus.isDirectory() ? "dir" : "file", fileStatus.getPath().toString(), fileStatus.getLen(), - isEmptyDirectory.name(), isDeleted)); + isEmptyDirectory.name(), isDeleted, + fileStatus.getETag(), fileStatus.getVersionId())); sb.append(fileStatus); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java index c6f70bf277f44..7c168b9370961 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java @@ -43,6 +43,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.Constants; +import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.Tristate; /** @@ -70,6 +71,8 @@ final class PathMetadataDynamoDBTranslation { static final String IS_DELETED = "is_deleted"; static final String IS_AUTHORITATIVE = "is_authoritative"; static final String LAST_UPDATED = "last_updated"; + static final String ETAG = "etag"; + static final String VERSION_ID = "version_id"; /** Used while testing backward compatibility. */ @VisibleForTesting @@ -135,7 +138,7 @@ static DDBPathMetadata itemToPathMetadata(Item item, String username) { boolean isDir = item.hasAttribute(IS_DIR) && item.getBoolean(IS_DIR); boolean isAuthoritativeDir = false; - final FileStatus fileStatus; + final S3AFileStatus fileStatus; long lastUpdated = 0; if (isDir) { isAuthoritativeDir = !IGNORED_FIELDS.contains(IS_AUTHORITATIVE) @@ -146,8 +149,10 @@ static DDBPathMetadata itemToPathMetadata(Item item, String username) { long len = item.hasAttribute(FILE_LENGTH) ? item.getLong(FILE_LENGTH) : 0; long modTime = item.hasAttribute(MOD_TIME) ? item.getLong(MOD_TIME) : 0; long block = item.hasAttribute(BLOCK_SIZE) ? item.getLong(BLOCK_SIZE) : 0; - fileStatus = new FileStatus(len, false, 1, block, modTime, 0, null, - username, username, path); + String eTag = item.getString(ETAG); + String versionId = item.getString(VERSION_ID); + fileStatus = new S3AFileStatus( + len, modTime, path, block, username, eTag, versionId); } lastUpdated = !IGNORED_FIELDS.contains(LAST_UPDATED) @@ -172,7 +177,7 @@ static DDBPathMetadata itemToPathMetadata(Item item, String username) { */ static Item pathMetadataToItem(DDBPathMetadata meta) { Preconditions.checkNotNull(meta); - final FileStatus status = meta.getFileStatus(); + final S3AFileStatus status = meta.getFileStatus(); final Item item = new Item().withPrimaryKey(pathToKey(status.getPath())); if (status.isDirectory()) { item.withBoolean(IS_DIR, true); @@ -183,6 +188,12 @@ static Item pathMetadataToItem(DDBPathMetadata meta) { item.withLong(FILE_LENGTH, status.getLen()) .withLong(MOD_TIME, status.getModificationTime()) .withLong(BLOCK_SIZE, status.getBlockSize()); + if (status.getETag() != null) { + item.withString(ETAG, status.getETag()); + } + if (status.getVersionId() != null) { + item.withString(VERSION_ID, status.getVersionId()); + } } item.withBoolean(IS_DELETED, meta.isDeleted()); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java index 8234777c3b4a2..26c75e82133ce 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java @@ -68,7 +68,7 @@ public final class S3Guard { static final Class S3GUARD_DDB_CLIENT_FACTORY_IMPL_DEFAULT = DynamoDBClientFactory.DefaultDynamoDBClientFactory.class; - private static final FileStatus[] EMPTY_LISTING = new FileStatus[0]; + private static final S3AFileStatus[] EMPTY_LISTING = new S3AFileStatus[0]; // Utility class. All static functions. private S3Guard() { } @@ -164,7 +164,7 @@ public static S3AFileStatus putAndReturn(MetadataStore ms, * @param dirMeta directory listing -may be null * @return a possibly-empty array of file status entries */ - public static FileStatus[] dirMetaToStatuses(DirListingMetadata dirMeta) { + public static S3AFileStatus[] dirMetaToStatuses(DirListingMetadata dirMeta) { if (dirMeta == null) { return EMPTY_LISTING; } @@ -178,7 +178,7 @@ public static FileStatus[] dirMetaToStatuses(DirListingMetadata dirMeta) { } } - return statuses.toArray(new FileStatus[0]); + return statuses.toArray(new S3AFileStatus[0]); } /** @@ -201,7 +201,7 @@ public static FileStatus[] dirMetaToStatuses(DirListingMetadata dirMeta) { * @throws IOException if metadata store update failed */ public static FileStatus[] dirListingUnion(MetadataStore ms, Path path, - List backingStatuses, DirListingMetadata dirMeta, + List backingStatuses, DirListingMetadata dirMeta, boolean isAuthoritative, ITtlTimeProvider timeProvider) throws IOException { @@ -232,7 +232,7 @@ public static FileStatus[] dirListingUnion(MetadataStore ms, Path path, pm -> pm.getFileStatus().getPath(), PathMetadata::getFileStatus) ); - for (FileStatus s : backingStatuses) { + for (S3AFileStatus s : backingStatuses) { if (deleted.contains(s.getPath())) { continue; } @@ -323,7 +323,7 @@ public static void makeDirsOrdered(MetadataStore ms, List dirs, * [/a/b/file0, /a/b/file1, /a/b/file2, /a/b/file3], isAuthoritative = * true */ - FileStatus prevStatus = null; + S3AFileStatus prevStatus = null; // Use new batched put to reduce round trips. List pathMetas = new ArrayList<>(dirs.size()); @@ -334,8 +334,8 @@ public static void makeDirsOrdered(MetadataStore ms, List dirs, boolean isLeaf = (prevStatus == null); Path f = dirs.get(i); assertQualified(f); - FileStatus status = - createUploadFileStatus(f, true, 0, 0, owner); + S3AFileStatus status = + createUploadFileStatus(f, true, 0, 0, owner, null, null); // We only need to put a DirListingMetadata if we are setting // authoritative bit @@ -383,7 +383,8 @@ public static void addMoveDir(MetadataStore ms, Collection srcPaths, } assertQualified(srcPath, dstPath); - FileStatus dstStatus = createUploadFileStatus(dstPath, true, 0, 0, owner); + S3AFileStatus dstStatus = createUploadFileStatus(dstPath, true, 0, + 0, owner, null, null); addMoveStatus(srcPaths, dstMetas, srcPath, dstStatus); } @@ -399,16 +400,18 @@ public static void addMoveDir(MetadataStore ms, Collection srcPaths, * @param size length of file moved * @param blockSize blocksize to associate with destination file * @param owner file owner to use in created records + * @param eTag the s3 object eTag of file moved + * @param versionId the s3 object versionId of file moved */ public static void addMoveFile(MetadataStore ms, Collection srcPaths, Collection dstMetas, Path srcPath, Path dstPath, - long size, long blockSize, String owner) { + long size, long blockSize, String owner, String eTag, String versionId) { if (isNullMetadataStore(ms)) { return; } assertQualified(srcPath, dstPath); - FileStatus dstStatus = createUploadFileStatus(dstPath, false, - size, blockSize, owner); + S3AFileStatus dstStatus = createUploadFileStatus(dstPath, false, + size, blockSize, owner, eTag, versionId); addMoveStatus(srcPaths, dstMetas, srcPath, dstStatus); } @@ -465,9 +468,8 @@ public static void addAncestors(MetadataStore metadataStore, while (!parent.isRoot()) { PathMetadata directory = metadataStore.get(parent); if (directory == null || directory.isDeleted()) { - FileStatus status = new FileStatus(0, true, 1, 0, 0, 0, null, username, - null, parent); - PathMetadata meta = new PathMetadata(status, Tristate.FALSE, false); + S3AFileStatus s3aStatus = new S3AFileStatus(Tristate.FALSE, parent, username); + PathMetadata meta = new PathMetadata(s3aStatus, Tristate.FALSE, false); newDirs.add(meta); } else { break; @@ -480,7 +482,7 @@ public static void addAncestors(MetadataStore metadataStore, private static void addMoveStatus(Collection srcPaths, Collection dstMetas, Path srcPath, - FileStatus dstStatus) { + S3AFileStatus dstStatus) { srcPaths.add(srcPath); dstMetas.add(new PathMetadata(dstStatus)); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java index 1ac167f5a6dc6..0458544832e3b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java @@ -44,12 +44,12 @@ import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.s3a.MultipartUtils; import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus; import org.apache.hadoop.fs.s3a.S3AUtils; import org.apache.hadoop.fs.s3a.auth.delegation.S3ADelegationTokens; import org.apache.hadoop.fs.s3a.commit.CommitConstants; @@ -703,7 +703,7 @@ private void putParentsIfNotPresent(FileStatus f) throws IOException { if (dirCache.contains(parent)) { return; } - FileStatus dir = DynamoDBMetadataStore.makeDirStatus(parent, + S3AFileStatus dir = DynamoDBMetadataStore.makeDirStatus(parent, f.getOwner()); getStore().put(new PathMetadata(dir)); dirCache.add(parent); @@ -718,13 +718,13 @@ private void putParentsIfNotPresent(FileStatus f) throws IOException { */ private long importDir(FileStatus status) throws IOException { Preconditions.checkArgument(status.isDirectory()); - RemoteIterator it = getFilesystem() + RemoteIterator it = getFilesystem() .listFilesAndEmptyDirectories(status.getPath(), true); long items = 0; while (it.hasNext()) { - LocatedFileStatus located = it.next(); - FileStatus child; + S3ALocatedFileStatus located = it.next(); + S3AFileStatus child; if (located.isDirectory()) { child = DynamoDBMetadataStore.makeDirStatus(located.getPath(), located.getOwner()); @@ -734,7 +734,9 @@ private long importDir(FileStatus status) throws IOException { located.getModificationTime(), located.getPath(), located.getBlockSize(), - located.getOwner()); + located.getOwner(), + located.getETag(), + located.getVersionId()); } putParentsIfNotPresent(child); getStore().put(new PathMetadata(child)); @@ -761,7 +763,8 @@ public int run(String[] args, PrintStream out) throws Exception { filePath = "/"; } Path path = new Path(filePath); - FileStatus status = getFilesystem().getFileStatus(path); + S3AFileStatus status = (S3AFileStatus) getFilesystem() + .getFileStatus(path); try { initMetadataStore(false); diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md index bb09d576dcf95..84550ff011d3c 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md @@ -1097,6 +1097,111 @@ based on usage information collected from previous days, and choosing a combination of retry counts and an interval which allow for the clients to cope with some throttling, but not to time-out other applications. +## Read-After-Overwrite Consistency + +S3Guard provides read-after-overwrite consistency through ETags (default) or +object versioning checked either on the server (default) or client. This works +such that a reader reading a file after an overwrite either sees the new version +of the file or an error. Without S3Guard, new readers may see the original +version. Once S3 reaches eventual consistency, new readers will see the new +version. + +Readers using S3Guard will usually see the new file version, but may +in rare cases see `RemoteFileChangedException` instead. This would occur if +an S3 object read cannot provide the version tracked in S3Guard metadata. + +S3Guard achieves this behavior by storing ETags and object version IDs in the +S3Guard metadata store (e.g. DynamoDB). On opening a file, S3AFileSystem +will look in S3 for the version of the file indicated by the ETag or object +version ID stored in the metadata store. If that version is unavailable, +`RemoteFileChangedException` is thrown. Whether ETag or version ID and +server or client mode is used is determed by the +[fs.s3a.change.detection configuration options](./index.html#Handling_Read-During-Overwrite). + +### No Versioning Metadata Available + +When the first S3AFileSystem clients are upgraded to the version of +S3AFileSystem that contains these change tracking features, any existing +S3Guard metadata will not contain ETags or object version IDs. Reads of files +tracked in such S3Guard metadata will access whatever version of the file is +available in S3 at the time of read. Only if the file is subsequently updated +will S3Guard start tracking ETag and object version ID and as such generating +`RemoteFileChangedException` if an inconsistency is detected. + +Similarly, when S3Guard metadata is pruned, S3Guard will no longer be able to +detect an inconsistent read. S3Guard metadata should be retained for at least +as long as the perceived possible read-after-overwrite temporary inconsistency +window. That window is expected to be short, but there are no guarantees so it +is at the administrator's discretion to weigh the risk. + +### Known Limitations + +#### S3 Select + +S3 Select does not provide a capability for server-side ETag or object +version ID qualification. Whether fs.s3a.change.detection.mode is client or +server, S3Guard will cause a client-side check of the file version before +opening the file with S3 Select. If the current version does not match the +version tracked in S3Guard, `RemoteFileChangedException` is thrown. + +It is still possible that the S3 Select read will access a different version of +the file, if the visible file version changes between the version check and +the opening of the file. This can happen due to eventual consistency or +an overwrite of the file between the version check and the open of the file. + +#### Rename + +Rename is implemented via copy in S3. With fs.s3a.change.detection.mode=client, +a fully reliable mechansim for ensuring the copied content is the expected +content is not possible. This is the case since there isn't necessarily a way +to know the expected ETag or version ID to appear on the object resulting from +the copy. + +Furthermore, if fs.s3a.change.detection.mode=server and a third-party S3 +implemntation is used that doesn't honor the provided ETag or version ID, +S3AFileSystem and S3Guard cannot detect it. + +In either fs.s3.change.detection.mode=server or client, a client-side check +will be performed before the copy to ensure the current version of the file +matches S3Guard metadata. If not, `RemoteFileChangedException` is thrown. +Similar to as discussed with regard to S3 Select, this is not sufficient to +guarantee that same version is the version copied. + +When fs.s3.change.detection.mode=server, the expected version is also specified +in the underlying S3 CopyObjectRequest. As long as the server honors it, the +copied object will be correct. + +All this said, with the defaults of fs.s3.change.detection.mode=server and +fs.s3.change.detection.source=etag against Amazon's S3, copy should in fact +either copy the expected file version or, in the case of an eventual consistency +anomaly, generate `RemoteFileChangedException`. The same should be true with +fs.s3.change.detection.source=versionid. + +#### Out of Sync Metadata + +The S3Guard version tracking metadata (ETag or object version ID) could become +out of sync with the true current object metadata in S3. For example, S3Guard +is still tracking v1 of some file after v2 has been written. This could occur +for reasons such as a writer writing without utilizing S3Guard and/or +S3AFileSystem or simply due to a write with S3AFileSystem and S3Guard that wrote +successfully to S3, but failed in communication with S3Guard's metadata store +(e.g. DynamoDB). + +If this happens, reads of the affected file(s) will result in +`RemoteFileChangedException` until one of: + +* the S3Guard metadata is corrected out-of-band +* the file is overwritten (causing an S3Guard metadata update) +* the S3Guard metadata is pruned + +The S3Guard metadata for a file can be corrected with the `s3guard import` +command as discussed above. The command can take a file URI instead of a +bucket URI to correct the metadata for a single file. For example: + +```bash +hadoop s3guard import [-meta URI] s3a://my-bucket/file-with-bad-metadata +``` + ## Troubleshooting ### Error: `S3Guard table lacks version marker.` @@ -1215,6 +1320,41 @@ sync. See [Fail on Error](#fail-on-error) for more detail. +### Error `RemoteFileChangedException` + +An exception like the following could occur for a couple of reasons: + +* the S3Guard metadata is out of sync with the true S3 metadata. For +example, the S3Guard DynamoDB table is tracking a different ETag than the ETag +shown in the exception. This may suggest the object was updated in S3 without +involvement from S3Guard or there was a transient failure when S3Guard tried to +write to DynamoDB. + +* S3 is exhibiting read-after-overwrite temporary inconsistency. The S3Guard +metadata was updated with a new ETag during a recent write, but the current read +is not seeing that ETag due to S3 eventual consistency. This exception prevents +the reader from an inconsistent read where the reader sees an older version of +the file. + +``` +org.apache.hadoop.fs.s3a.RemoteFileChangedException: open 's3a://my-bucket/test/file.txt': + Change reported by S3 while reading at position 0. + ETag 4e886e26c072fef250cfaf8037675405 was unavailable + at org.apache.hadoop.fs.s3a.impl.ChangeTracker.processResponse(ChangeTracker.java:167) + at org.apache.hadoop.fs.s3a.S3AInputStream.reopen(S3AInputStream.java:207) + at org.apache.hadoop.fs.s3a.S3AInputStream.lambda$lazySeek$1(S3AInputStream.java:355) + at org.apache.hadoop.fs.s3a.Invoker.lambda$retry$2(Invoker.java:195) + at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:109) + at org.apache.hadoop.fs.s3a.Invoker.lambda$retry$3(Invoker.java:265) + at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:322) + at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:261) + at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:193) + at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:215) + at org.apache.hadoop.fs.s3a.S3AInputStream.lazySeek(S3AInputStream.java:348) + at org.apache.hadoop.fs.s3a.S3AInputStream.read(S3AInputStream.java:381) + at java.io.FilterInputStream.read(FilterInputStream.java:83) +``` + ## Other Topics For details on how to test S3Guard, see [Testing S3Guard](./testing.html#s3guard) diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md index 3123221bd8293..8cdac9e35263f 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md @@ -970,8 +970,8 @@ and the like. The standard strategy here is to save to HDFS and then copy to S3. ``` org.apache.hadoop.fs.s3a.RemoteFileChangedException: re-open `s3a://my-bucket/test/file.txt': - ETag change reported by S3 while reading at position 1949. - Version f9c186d787d4de9657e99f280ba26555 was unavailable + Change reported by S3 while reading at position 1949. + ETag f9c186d787d4de9657e99f280ba26555 was unavailable at org.apache.hadoop.fs.s3a.impl.ChangeTracker.processResponse(ChangeTracker.java:137) at org.apache.hadoop.fs.s3a.S3AInputStream.reopen(S3AInputStream.java:200) at org.apache.hadoop.fs.s3a.S3AInputStream.lambda$lazySeek$1(S3AInputStream.java:346) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java index 03c91e62cedce..886795a9d90fc 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java @@ -56,19 +56,26 @@ public abstract class AbstractS3AMockTest { @Before public void setup() throws Exception { + Configuration conf = createConfiguration(); + fs = new S3AFileSystem(); + URI uri = URI.create(FS_S3A + "://" + BUCKET); + fs.initialize(uri, conf); + s3 = fs.getAmazonS3ClientForTesting("mocking"); + } + + public Configuration createConfiguration() { Configuration conf = new Configuration(); conf.setClass(S3_CLIENT_FACTORY_IMPL, MockS3ClientFactory.class, S3ClientFactory.class); - // We explicitly disable MetadataStore even if it's configured. For unit + // We explicitly disable MetadataStore. For unit // test we don't issue request to AWS DynamoDB service. conf.setClass(S3_METADATA_STORE_IMPL, NullMetadataStore.class, MetadataStore.class); // FS is always magic conf.setBoolean(CommitConstants.MAGIC_COMMITTER_ENABLED, true); - fs = new S3AFileSystem(); - URI uri = URI.create(FS_S3A + "://" + BUCKET); - fs.initialize(uri, conf); - s3 = fs.getAmazonS3ClientForTesting("mocking"); + // use minimum multipart size for faster triggering + conf.setLong(Constants.MULTIPART_SIZE, MULTIPART_MIN_SIZE); + return conf; } @After diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ADelayedFNF.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ADelayedFNF.java index 7abd47497646e..94c5233f19a15 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ADelayedFNF.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ADelayedFNF.java @@ -22,7 +22,10 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source; import org.apache.hadoop.test.LambdaTestUtils; +import org.junit.Assume; import org.junit.Test; import java.io.FileNotFoundException; @@ -43,6 +46,12 @@ public class ITestS3ADelayedFNF extends AbstractS3ATestBase { @Test public void testNotFoundFirstRead() throws Exception { FileSystem fs = getFileSystem(); + ChangeDetectionPolicy changeDetectionPolicy = + ((S3AFileSystem) fs).getChangeDetectionPolicy(); + Assume.assumeFalse("FNF not expected when using a bucket with" + + " object versioning", + changeDetectionPolicy.getSource() == Source.VersionId); + Path p = path("some-file"); ContractTestUtils.createFile(fs, p, false, new byte[] {20, 21, 22}); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java index 6ac803e3085eb..c82a8b9789486 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java @@ -24,9 +24,12 @@ import org.apache.hadoop.fs.contract.AbstractFSContract; import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.contract.s3a.S3AContract; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source; import org.apache.hadoop.fs.s3a.s3guard.MetadataStore; import org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore; import org.apache.hadoop.test.LambdaTestUtils; +import org.junit.Assume; import org.junit.Test; import java.io.FileNotFoundException; @@ -106,6 +109,12 @@ public void testGetFileStatus() throws Exception { @Test public void testOpenDeleteRead() throws Exception { S3AFileSystem fs = getFileSystem(); + ChangeDetectionPolicy changeDetectionPolicy = + ((S3AFileSystem) fs).getChangeDetectionPolicy(); + Assume.assumeFalse("FNF not expected when using a bucket with" + + " object versioning", + changeDetectionPolicy.getSource() == Source.VersionId); + Path p = path("testOpenDeleteRead.txt"); writeTextFile(fs, p, "1337c0d3z", true); try (InputStream s = fs.open(p)) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java index 98dd2026f5f0d..11f4a383bef35 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java @@ -18,11 +18,17 @@ package org.apache.hadoop.fs.s3a; +import java.io.ByteArrayOutputStream; import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.PrintWriter; +import java.nio.charset.Charset; import java.util.Arrays; import java.util.Collection; +import org.apache.commons.io.IOUtils; import org.junit.Assume; +import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -33,63 +39,112 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source; +import org.apache.hadoop.fs.s3a.s3guard.LocalMetadataStore; +import org.apache.hadoop.fs.s3a.s3guard.MetadataStore; +import org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore; +import org.apache.hadoop.fs.s3a.s3guard.PathMetadata; import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBucketOverrides; +import static org.apache.hadoop.fs.s3a.select.SelectConstants.SELECT_SQL; import static org.apache.hadoop.test.LambdaTestUtils.eventually; import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.apache.hadoop.test.LambdaTestUtils.interceptFuture; /** * Test S3A remote file change detection. */ @RunWith(Parameterized.class) public class ITestS3ARemoteFileChanged extends AbstractS3ATestBase { + private static final Logger LOG = LoggerFactory.getLogger(ITestS3ARemoteFileChanged.class); + private static final String TEST_DATA = "Some test data"; + private static final String QUOTED_TEST_DATA = + "\"" + TEST_DATA + "\""; + + private enum InteractionType { + READ, READ_AFTER_DELETE, COPY, SELECT + } + private final String changeDetectionSource; private final String changeDetectionMode; - private final boolean expectChangeException; - private final boolean expectFileNotFoundException; + private final Collection expectedExceptionInteractions; + private S3AFileSystem fs; @Parameterized.Parameters public static Collection params() { return Arrays.asList(new Object[][]{ // make sure it works with invalid config - {"bogus", "bogus", true, true}, + {"bogus", "bogus", + Arrays.asList( + InteractionType.READ, + InteractionType.READ_AFTER_DELETE, + InteractionType.COPY, + InteractionType.SELECT)}, // test with etag - {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_SERVER, true, true}, - {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_CLIENT, true, true}, - {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_WARN, false, true}, - {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_NONE, false, true}, + {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_SERVER, + Arrays.asList( + InteractionType.READ, + InteractionType.READ_AFTER_DELETE, + InteractionType.COPY, + InteractionType.SELECT)}, + {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_CLIENT, + Arrays.asList( + InteractionType.READ, + InteractionType.READ_AFTER_DELETE, + InteractionType.COPY, + InteractionType.SELECT)}, + {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_WARN, + Arrays.asList( + InteractionType.READ_AFTER_DELETE)}, + {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_NONE, + Arrays.asList( + InteractionType.READ_AFTER_DELETE)}, // test with versionId - // when using server-side versionId, the exceptions shouldn't happen - // since the previous version will still be available - {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_SERVER, false, - false}, + // when using server-side versionId, the read exceptions shouldn't + // happen since the previous version will still be available, but + // they will still happen on rename and select since we always do a + // client-side check against the current version + {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_SERVER, + Arrays.asList( + InteractionType.COPY, + InteractionType.SELECT)}, // with client-side versionId it will behave similar to client-side eTag - {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_CLIENT, true, - true}, + {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_CLIENT, + Arrays.asList( + InteractionType.READ, + InteractionType.READ_AFTER_DELETE, + InteractionType.COPY, + InteractionType.SELECT)}, - {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_WARN, false, true}, - {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_NONE, false, true} + {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_WARN, + Arrays.asList( + InteractionType.READ_AFTER_DELETE)}, + {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_NONE, + Arrays.asList( + InteractionType.READ_AFTER_DELETE)} }); } public ITestS3ARemoteFileChanged(String changeDetectionSource, String changeDetectionMode, - boolean expectException, - boolean expectFileNotFoundException) { + Collection expectedExceptionInteractions) { this.changeDetectionSource = changeDetectionSource; this.changeDetectionMode = changeDetectionMode; - this.expectChangeException = expectException; - this.expectFileNotFoundException = expectFileNotFoundException; + this.expectedExceptionInteractions = expectedExceptionInteractions; + } + + @Before + public void setUp() { + fs = getFileSystem(); } @Override @@ -101,29 +156,37 @@ protected Configuration createConfiguration() { CHANGE_DETECT_MODE); conf.set(CHANGE_DETECT_SOURCE, changeDetectionSource); conf.set(CHANGE_DETECT_MODE, changeDetectionMode); + + // reduce retry limit so FileNotFoundException cases timeout faster, + // speeding up the tests + conf.set(RETRY_LIMIT, "5"); + + if (conf.getClass(S3_METADATA_STORE_IMPL, MetadataStore.class) == + NullMetadataStore.class) { + // favor LocalMetadataStore over NullMetadataStore + conf.setClass(S3_METADATA_STORE_IMPL, + LocalMetadataStore.class, MetadataStore.class); + } S3ATestUtils.disableFilesystemCaching(conf); return conf; } + /** + * Tests reading a file that is changed while the reader's InputStream is + * open. + */ @Test - public void testReadFileChanged() throws Throwable { + public void testReadFileChangedStreamOpen() throws Throwable { final int originalLength = 8192; final byte[] originalDataset = dataset(originalLength, 'a', 32); final int newLength = originalLength + 1; final byte[] newDataset = dataset(newLength, 'A', 32); - final S3AFileSystem fs = getFileSystem(); final Path testpath = path("readFileToChange.txt"); // initial write writeDataset(fs, testpath, originalDataset, originalDataset.length, 1024, false); - if (fs.getChangeDetectionPolicy().getSource() == Source.VersionId) { - // skip versionId tests if the bucket doesn't have object versioning - // enabled - Assume.assumeTrue( - "Target filesystem does not support versioning", - fs.getObjectMetadata(fs.pathToKey(testpath)).getVersionId() != null); - } + skipIfVersionPolicyAndNoVersionId(testpath); try(FSDataInputStream instream = fs.open(testpath)) { // seek forward and read successfully @@ -152,7 +215,7 @@ public void testReadFileChanged() throws Throwable { // now check seek backward instream.seek(instream.getPos() - 100); - if (expectChangeException) { + if (expectedExceptionInteractions.contains(InteractionType.READ)) { intercept(RemoteFileChangedException.class, "", "read", () -> instream.read()); } else { @@ -164,7 +227,7 @@ public void testReadFileChanged() throws Throwable { // seek backward instream.seek(0); - if (expectChangeException) { + if (expectedExceptionInteractions.contains(InteractionType.READ)) { intercept(RemoteFileChangedException.class, "", "read", () -> instream.read(buf)); intercept(RemoteFileChangedException.class, "", "read", @@ -183,7 +246,8 @@ public void testReadFileChanged() throws Throwable { // seek backward instream.seek(0); - if (expectFileNotFoundException) { + if (expectedExceptionInteractions.contains( + InteractionType.READ_AFTER_DELETE)) { intercept(FileNotFoundException.class, "", "read()", () -> instream.read()); intercept(FileNotFoundException.class, "", "readfully", @@ -194,4 +258,187 @@ public void testReadFileChanged() throws Throwable { } } } + + /** + * Tests reading a file where the version visible in S3 does not match the + * version tracked in the metadata store. + */ + @Test + public void testReadFileChangedOutOfSyncMetadata() throws Throwable { + final Path testpath = writeOutOfSyncFileVersion("read.dat"); + + skipIfVersionPolicyAndNoVersionId(testpath); + + final FSDataInputStream instream = fs.open(testpath); + if (expectedExceptionInteractions.contains(InteractionType.READ)) { + intercept(RemoteFileChangedException.class, "", "read()", + () -> { + instream.read(); + }); + } else { + instream.read(); + } + } + + /** + * Ensures a file can be read when there is no version metadata + * (ETag, versionId). + */ + @Test + public void testReadWithNoVersionMetadata() throws Throwable { + final Path testpath = writeFileWithNoVersionMetadata("readnoversion.dat"); + + skipIfVersionPolicyAndNoVersionId(testpath); + + final FSDataInputStream instream = fs.open(testpath); + assertEquals(TEST_DATA, + IOUtils.toString(instream, Charset.forName("UTF-8")).trim()); + } + + /** + * Tests using S3 Select on a file where the version visible in S3 does not + * match the version tracked in the metadata store. + */ + @Test + public void testSelectChangedFile() throws Throwable { + final Path testpath = writeOutOfSyncFileVersion("select.dat"); + + skipIfVersionPolicyAndNoVersionId(testpath); + + if (expectedExceptionInteractions.contains(InteractionType.SELECT)) { + interceptFuture(RemoteFileChangedException.class, "select", + fs.openFile(testpath) + .must(SELECT_SQL, "SELECT * FROM S3OBJECT").build()); + } else { + fs.openFile(testpath) + .must(SELECT_SQL, "SELECT * FROM S3OBJECT").build().get(); + } + } + + /** + * Ensures a file can be read via S3 Select when there is no version metadata + * (ETag, versionId). + */ + @Test + public void testSelectWithNoVersionMetadata() throws Throwable { + final Path testpath = + writeFileWithNoVersionMetadata("selectnoversion.dat"); + + skipIfVersionPolicyAndNoVersionId(testpath); + + FSDataInputStream instream = fs.openFile(testpath) + .must(SELECT_SQL, "SELECT * FROM S3OBJECT").build().get(); + assertEquals(QUOTED_TEST_DATA, + IOUtils.toString(instream, Charset.forName("UTF-8")).trim()); + } + + /** + * Tests doing a rename() on a file where the version visible in S3 does not + * match the version tracked in the metadata store. + * @throws Throwable + */ + @Test + public void testRenameChangedFile() throws Throwable { + final Path testpath = writeOutOfSyncFileVersion("rename.dat"); + + skipIfVersionPolicyAndNoVersionId(testpath); + + final Path dest = path("dest.dat"); + if (expectedExceptionInteractions.contains(InteractionType.COPY)) { + intercept(RemoteFileChangedException.class, "", "copy()", + () -> { + fs.rename(testpath, dest); + }); + } else { + fs.rename(testpath, dest); + } + } + + /** + * Ensures a file can be renamed when there is no version metadata + * (ETag, versionId). + */ + @Test + public void testRenameWithNoVersionMetadata() throws Throwable { + final Path testpath = + writeFileWithNoVersionMetadata("renamenoversion.dat"); + + skipIfVersionPolicyAndNoVersionId(testpath); + + final Path dest = path("noversiondest.dat"); + fs.rename(testpath, dest); + FSDataInputStream inputStream = fs.open(dest); + assertEquals(TEST_DATA, + IOUtils.toString(inputStream, Charset.forName("UTF-8")).trim()); + } + + /** + * Writes a file with old ETag and versionId in the metadata store such + * that the metadata is out of sync with S3. Attempts to read such a file + * should result in {@link RemoteFileChangedException}. + */ + private Path writeOutOfSyncFileVersion(String filename) throws IOException { + final Path testpath = path(filename); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PrintWriter printWriter = new PrintWriter(out); + printWriter.println(TEST_DATA); + printWriter.close(); + final byte[] dataset = out.toByteArray(); + writeDataset(fs, testpath, dataset, dataset.length, + 1024, false); + S3AFileStatus originalStatus = (S3AFileStatus) fs.getFileStatus(testpath); + + // overwrite with half the content + writeDataset(fs, testpath, dataset, dataset.length / 2, + 1024, true); + + S3AFileStatus newStatus = (S3AFileStatus) fs.getFileStatus(testpath); + + // put back the original etag, versionId + S3AFileStatus forgedStatus = + S3AFileStatus.fromFileStatus(newStatus, Tristate.FALSE, + originalStatus.getETag(), originalStatus.getVersionId()); + fs.getMetadataStore().put( + new PathMetadata(forgedStatus, Tristate.FALSE, false)); + + return testpath; + } + + /** + * Writes a file with null ETag and versionId in the metadata store. + */ + private Path writeFileWithNoVersionMetadata(String filename) + throws IOException { + final Path testpath = path(filename); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PrintWriter printWriter = new PrintWriter(out); + printWriter.println(TEST_DATA); + printWriter.close(); + final byte[] dataset = out.toByteArray(); + writeDataset(fs, testpath, dataset, dataset.length, + 1024, false); + S3AFileStatus originalStatus = (S3AFileStatus) fs.getFileStatus(testpath); + + // remove ETag and versionId + S3AFileStatus newStatus = S3AFileStatus.fromFileStatus(originalStatus, + Tristate.FALSE, null, null); + fs.getMetadataStore().put(new PathMetadata(newStatus, Tristate.FALSE, + false)); + + return testpath; + } + + /** + * The test is invalid if the policy uses versionId but the bucket doesn't + * have versioning enabled. + */ + private void skipIfVersionPolicyAndNoVersionId(Path testpath) throws IOException { + if (fs.getChangeDetectionPolicy().getSource() == Source.VersionId) { + // skip versionId tests if the bucket doesn't have object versioning + // enabled + Assume.assumeTrue( + "Target filesystem does not support versioning", + fs.getObjectMetadata(fs.pathToKey(testpath)).getVersionId() != null); + } + } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java index d3b3c21c926df..92eac3c71411f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java @@ -253,7 +253,7 @@ public void testConsistentRenameAfterDelete() throws Exception { DEFAULT_DELAY_KEY_SUBSTRING))); try { - RemoteIterator old = fs.listFilesAndEmptyDirectories( + RemoteIterator old = fs.listFilesAndEmptyDirectories( path("a"), true); fail("Recently renamed dir should not be visible"); } catch(FileNotFoundException e) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardOutOfBandOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardOutOfBandOperations.java index adaf538c6c8c4..6dbe6f91d48e3 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardOutOfBandOperations.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardOutOfBandOperations.java @@ -26,6 +26,7 @@ import java.util.UUID; import java.util.stream.Collectors; +import org.junit.Assume; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; @@ -37,6 +38,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source; import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata; import org.apache.hadoop.fs.s3a.s3guard.MetadataStore; @@ -243,6 +246,12 @@ public void testLongerLengthOverwrite() throws Exception { @Test public void testOutOfBandDeletes() throws Exception { + ChangeDetectionPolicy changeDetectionPolicy = + ((S3AFileSystem) getFileSystem()).getChangeDetectionPolicy(); + Assume.assumeFalse("FNF not expected when using a bucket with" + + " object versioning", + changeDetectionPolicy.getSource() == Source.VersionId); + Path testFileName = path("OutOfBandDelete-" + UUID.randomUUID()); outOfBandDeletes(testFileName, authoritative); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java index 51ff299e7be08..0e091a9e9cf0a 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java @@ -177,7 +177,7 @@ public boolean exists(Path f) throws IOException { } @Override - void finishedWrite(String key, long length) { + void finishedWrite(String key, long length, String eTag, String versionId) { } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java index 260ecdd71d25e..977c30d0f0225 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java @@ -1042,30 +1042,25 @@ public static void verifyFileStatus(FileStatus status, * Verify the status entry of a directory matches that expected. * @param status status entry to check * @param replication replication factor - * @param modTime modified time - * @param accessTime access time * @param owner owner - * @param group user group - * @param permission permission. */ - public static void verifyDirStatus(FileStatus status, + public static void verifyDirStatus(S3AFileStatus status, int replication, - long modTime, - long accessTime, - String owner, - String group, - FsPermission permission) { + String owner) { String details = status.toString(); assertTrue("Is a dir: " + details, status.isDirectory()); assertEquals("zero length: " + details, 0, status.getLen()); - - assertEquals("Mod time: " + details, modTime, status.getModificationTime()); + // S3AFileStatus always assigns modTime = System.currentTimeMillis() + assertTrue("Mod time: " + details, status.getModificationTime() > 0); assertEquals("Replication value: " + details, replication, status.getReplication()); - assertEquals("Access time: " + details, accessTime, status.getAccessTime()); + assertEquals("Access time: " + details, 0, status.getAccessTime()); assertEquals("Owner: " + details, owner, status.getOwner()); - assertEquals("Group: " + details, group, status.getGroup()); - assertEquals("Permission: " + details, permission, status.getPermission()); + // S3AFileStatus always assigns group=owner + assertEquals("Group: " + details, owner, status.getGroup()); + // S3AFileStatus always assigns permission = default + assertEquals("Permission: " + details, + FsPermission.getDefault(), status.getPermission()); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestListing.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestListing.java index 39a5e3bd87d25..1a533bfe64609 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestListing.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestListing.java @@ -19,7 +19,6 @@ package org.apache.hadoop.fs.s3a; import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.junit.Assert; @@ -40,11 +39,11 @@ */ public class TestListing extends AbstractS3AMockTest { - private static class MockRemoteIterator implements - RemoteIterator { - private Iterator iterator; + private static class MockRemoteIterator implements + RemoteIterator { + private Iterator iterator; - MockRemoteIterator(Collection source) { + MockRemoteIterator(Collection source) { iterator = source.iterator(); } @@ -52,13 +51,13 @@ public boolean hasNext() { return iterator.hasNext(); } - public FileStatus next() { + public S3AFileStatus next() { return iterator.next(); } } - private FileStatus blankFileStatus(Path path) { - return new FileStatus(0, true, 0, 0, 0, path); + private S3AFileStatus blankFileStatus(Path path) { + return new S3AFileStatus(Tristate.UNKNOWN, path, null); } @Test @@ -78,11 +77,11 @@ public void testTombstoneReconcilingIterator() throws Exception { Set tombstones = new HashSet<>(); tombstones.add(deletedChild); - RemoteIterator sourceIterator = new MockRemoteIterator( + RemoteIterator sourceIterator = new MockRemoteIterator( statuses); - RemoteIterator locatedIterator = + RemoteIterator locatedIterator = listing.createLocatedFileStatusIterator(sourceIterator); - RemoteIterator reconcilingIterator = + RemoteIterator reconcilingIterator = listing.createTombstoneReconcilingIterator(locatedIterator, tombstones); Set expectedPaths = new HashSet<>(); @@ -98,8 +97,12 @@ public void testTombstoneReconcilingIterator() throws Exception { @Test public void testProvidedFileStatusIteratorEnd() throws Exception { - FileStatus[] statuses = { - new FileStatus(100, false, 1, 8192, 0, new Path("s3a://blah/blah")) + S3AFileStatus s3aStatus = new S3AFileStatus( + 100, 0, new Path("s3a://blah/blah"), + 8192, null, null, null); + + S3AFileStatus[] statuses = { + s3aStatus }; ProvidedFileStatusIterator it = new ProvidedFileStatusIterator(statuses, ACCEPT_ALL, new Listing.AcceptAllButS3nDirs()); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java index f073c4c486d2f..5e0edbb2c2b31 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java @@ -20,18 +20,22 @@ import java.util.concurrent.atomic.AtomicLong; +import com.amazonaws.AmazonServiceException; import com.amazonaws.services.s3.Headers; +import com.amazonaws.services.s3.model.CopyObjectRequest; import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.S3Object; -import org.apache.hadoop.fs.PathIOException; +import com.amazonaws.services.s3.transfer.model.CopyResult; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; import org.apache.hadoop.fs.s3a.impl.ChangeTracker; import org.apache.hadoop.test.HadoopTestBase; +import org.apache.http.HttpStatus; import static org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.CHANGE_DETECTED; import static org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.createPolicy; @@ -50,6 +54,8 @@ public class TestStreamChangeTracker extends HadoopTestBase { public static final String OBJECT = "object"; + public static final String DEST_OBJECT = "new_object"; + public static final String URI = "s3a://" + BUCKET + "/" + OBJECT; @Test @@ -161,12 +167,106 @@ public void testVersionCheckingOnServer() throws Throwable { CHANGE_DETECTED); } + @Test + public void testVersionCheckingUpfrontETag() throws Throwable { + ChangeTracker tracker = newTracker( + ChangeDetectionPolicy.Mode.Server, + ChangeDetectionPolicy.Source.ETag, + false, + objectAttributes("etag1", "versionid1")); + + assertEquals("etag1", tracker.getRevisionId()); + } + + @Test + public void testVersionCheckingUpfrontVersionId() throws Throwable { + ChangeTracker tracker = newTracker( + ChangeDetectionPolicy.Mode.Server, + ChangeDetectionPolicy.Source.VersionId, + false, + objectAttributes("etag1", "versionid1")); + + assertEquals("versionid1", tracker.getRevisionId()); + } + + @Test + public void testVersionCheckingETagCopyServer() throws Throwable { + ChangeTracker tracker = newTracker( + ChangeDetectionPolicy.Mode.Server, + ChangeDetectionPolicy.Source.VersionId, + false, + objectAttributes("etag1", "versionid1")); + assertConstraintApplied(tracker, newCopyObjectRequest()); + } + + @Test + public void testVersionCheckingETagCopyClient() throws Throwable { + ChangeTracker tracker = newTracker( + ChangeDetectionPolicy.Mode.Client, + ChangeDetectionPolicy.Source.VersionId, + false, + objectAttributes("etag1", "versionid1")); + assertFalse("Tracker should not have applied contraints " + tracker, + tracker.maybeApplyConstraint(newCopyObjectRequest())); + } + + @Test + public void testCopyVersionIdRequired() throws Throwable { + ChangeTracker tracker = newTracker( + ChangeDetectionPolicy.Mode.Client, + ChangeDetectionPolicy.Source.VersionId, + true, + objectAttributes("etag1", "versionId")); + + expectNoVersionAttributeException(tracker, newCopyResult("etag1", + null), + "policy requires VersionId"); + } + + @Test + public void testCopyETagRequired() throws Throwable { + ChangeTracker tracker = newTracker( + ChangeDetectionPolicy.Mode.Client, + ChangeDetectionPolicy.Source.ETag, + true, + objectAttributes("etag1", "versionId")); + + expectNoVersionAttributeException(tracker, newCopyResult(null, + "versionId"), + "policy requires ETag"); + } + + @Test + public void testCopyVersionMismatch() throws Throwable { + ChangeTracker tracker = newTracker( + ChangeDetectionPolicy.Mode.Server, + ChangeDetectionPolicy.Source.ETag, + true, + objectAttributes("etag", "versionId")); + + // 412 is translated to RemoteFileChangedException + AmazonServiceException awsException = + new AmazonServiceException("aws exception"); + awsException.setStatusCode(HttpStatus.SC_PRECONDITION_FAILED); + expectChangeException(tracker, awsException, "copy", + RemoteFileChangedException.PRECONDITIONS_FAILED); + + // processing another type of exception does nothing + tracker.processException(new RuntimeException(), "copy"); + } + protected void assertConstraintApplied(final ChangeTracker tracker, final GetObjectRequest request) { assertTrue("Tracker should have applied contraints " + tracker, tracker.maybeApplyConstraint(request)); } + protected void assertConstraintApplied(final ChangeTracker tracker, + final CopyObjectRequest request) throws PathIOException { + assertTrue("Tracker should have applied contraints " + tracker, + tracker.maybeApplyConstraint(request)); + } + protected RemoteFileChangedException expectChangeException( final ChangeTracker tracker, final S3Object response, @@ -175,6 +275,15 @@ protected RemoteFileChangedException expectChangeException( RemoteFileChangedException.class); } + protected RemoteFileChangedException expectChangeException( + final ChangeTracker tracker, + final Exception exception, + final String operation, + final String message) throws Exception { + return expectException(tracker, exception, operation, message, + RemoteFileChangedException.class); + } + protected PathIOException expectNoVersionAttributeException( final ChangeTracker tracker, final S3Object response, @@ -183,6 +292,14 @@ protected PathIOException expectNoVersionAttributeException( NoVersionAttributeException.class); } + protected PathIOException expectNoVersionAttributeException( + final ChangeTracker tracker, + final CopyResult response, + final String message) throws Exception { + return expectException(tracker, response, message, + NoVersionAttributeException.class); + } + protected T expectException( final ChangeTracker tracker, final S3Object response, @@ -197,6 +314,35 @@ protected T expectException( }); } + protected T expectException( + final ChangeTracker tracker, + final CopyResult response, + final String message, + final Class clazz) throws Exception { + return intercept( + clazz, + message, + () -> { + tracker.processResponse(response); + return tracker; + }); + } + + protected T expectException( + final ChangeTracker tracker, + final Exception exception, + final String operation, + final String message, + final Class clazz) throws Exception { + return intercept( + clazz, + message, + () -> { + tracker.processException(exception, operation); + return tracker; + }); + } + protected void assertRevisionId(final ChangeTracker tracker, final String revId) { assertEquals("Wrong revision ID in " + tracker, @@ -218,14 +364,29 @@ protected void assertTrackerMismatchCount( */ protected ChangeTracker newTracker(final ChangeDetectionPolicy.Mode mode, final ChangeDetectionPolicy.Source source, boolean requireVersion) { + return newTracker(mode, source, requireVersion, + objectAttributes(null, null)); + } + + /** + * Create tracker. + * Contains standard assertions(s). + * @return the tracker. + */ + protected ChangeTracker newTracker(final ChangeDetectionPolicy.Mode mode, + final ChangeDetectionPolicy.Source source, boolean requireVersion, + S3ObjectAttributes objectAttributes) { ChangeDetectionPolicy policy = createPolicy( mode, source, requireVersion); ChangeTracker tracker = new ChangeTracker(URI, policy, - new AtomicLong(0)); - assertFalse("Tracker should not have applied constraints " + tracker, - tracker.maybeApplyConstraint(newGetObjectRequest())); + new AtomicLong(0), objectAttributes); + if (objectAttributes.getVersionId() == null + && objectAttributes.getETag() == null) { + assertFalse("Tracker should not have applied constraints " + tracker, + tracker.maybeApplyConstraint(newGetObjectRequest())); + } return tracker; } @@ -233,6 +394,21 @@ private GetObjectRequest newGetObjectRequest() { return new GetObjectRequest(BUCKET, OBJECT); } + private CopyObjectRequest newCopyObjectRequest() { + return new CopyObjectRequest(BUCKET, OBJECT, BUCKET, DEST_OBJECT); + } + + private CopyResult newCopyResult(String eTag, String versionId) { + CopyResult copyResult = new CopyResult(); + copyResult.setSourceBucketName(BUCKET); + copyResult.setSourceKey(OBJECT); + copyResult.setDestinationBucketName(BUCKET); + copyResult.setDestinationKey(DEST_OBJECT); + copyResult.setETag(eTag); + copyResult.setVersionId(versionId); + return copyResult; + } + private S3Object newResponse(String etag, String versionId) { ObjectMetadata md = new ObjectMetadata(); if (etag != null) { @@ -252,4 +428,14 @@ private S3Object emptyResponse() { response.setKey(OBJECT); return response; } + + private S3ObjectAttributes objectAttributes( + String etag, String versionId) { + return new S3ObjectAttributes(BUCKET, + OBJECT, + null, + null, + etag, + versionId); + } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java index ad4691a6d962b..589628c5c9424 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java @@ -221,7 +221,7 @@ protected void createFile(Path path, boolean onS3, boolean onMetadataStore) ContractTestUtils.touch(fs, path); } else if (onMetadataStore) { S3AFileStatus status = new S3AFileStatus(100L, System.currentTimeMillis(), - fs.qualify(path), 512L, "hdfs"); + fs.qualify(path), 512L, "hdfs", null, null); putFile(ms, status); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java index 158f13d3fc763..972cbe5f5ef7a 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java @@ -246,12 +246,13 @@ public DynamoDBMSContract createContract(Configuration conf) { } @Override - FileStatus basicFileStatus(Path path, int size, boolean isDir) + S3AFileStatus basicFileStatus(Path path, int size, boolean isDir) throws IOException { String owner = UserGroupInformation.getCurrentUser().getShortUserName(); return isDir ? new S3AFileStatus(true, path, owner) - : new S3AFileStatus(size, getModTime(), path, BLOCK_SIZE, owner); + : new S3AFileStatus(size, getModTime(), path, BLOCK_SIZE, owner, + null, null); } private DynamoDBMetadataStore getDynamoMetadataStore() throws IOException { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreScale.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreScale.java index 408c72eb8f521..301ba16aec57a 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreScale.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreScale.java @@ -41,7 +41,6 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.StorageStatistics; import org.apache.hadoop.fs.contract.ContractTestUtils; @@ -231,7 +230,7 @@ public void test_030_BatchedWrite() throws Exception { long pruneItems = 0; for (long i = 0; i < iterations; i++) { Path longPath = pathOfDepth(BATCH_SIZE, String.valueOf(i)); - FileStatus status = basicFileStatus(longPath, 0, false, 12345, + S3AFileStatus status = basicFileStatus(longPath, 0, false, 12345); PathMetadata pm = new PathMetadata(status); synchronized (toCleanup) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolLocal.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolLocal.java index 6a4d45e9ea170..f81f0e2bc13b7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolLocal.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolLocal.java @@ -37,7 +37,9 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.s3a.Tristate; import static org.apache.hadoop.fs.s3a.MultipartTestUtils.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.getLandsatCSVFile; @@ -95,6 +97,41 @@ public void testImportCommand() throws Exception { // assertTrue(children.isAuthoritative()); } + @Test + public void testImportCommandRepairsETagAndVersionId() throws Exception { + S3AFileSystem fs = getFileSystem(); + MetadataStore ms = getMetadataStore(); + Path path = path("test-version-metadata"); + try (FSDataOutputStream out = fs.create(path)) { + out.write(1); + } + S3AFileStatus originalStatus = (S3AFileStatus) fs.getFileStatus(path); + + // put in bogus ETag and versionId + S3AFileStatus bogusStatus = S3AFileStatus.fromFileStatus(originalStatus, + Tristate.FALSE, "bogusETag", "bogusVersionId"); + ms.put(new PathMetadata(bogusStatus)); + + // sanity check that bogus status is actually persisted + S3AFileStatus retrievedBogusStatus = (S3AFileStatus) fs.getFileStatus(path); + assertEquals("bogus ETag was not persisted", + "bogusETag", retrievedBogusStatus.getETag()); + assertEquals("bogus versionId was not persisted", + "bogusVersionId", retrievedBogusStatus.getVersionId()); + + // execute the import + S3GuardTool.Import cmd = new S3GuardTool.Import(fs.getConf()); + cmd.setStore(ms); + exec(cmd, "import", path.toString()); + + // make sure ETag and versionId were corrected + S3AFileStatus updatedStatus = (S3AFileStatus) fs.getFileStatus(path); + assertEquals("ETag was not corrected", + originalStatus.getETag(), updatedStatus.getETag()); + assertEquals("VersionId was not corrected", + originalStatus.getVersionId(), updatedStatus.getVersionId()); + } + @Test public void testDestroyBucketExistsButNoTable() throws Throwable { run(Destroy.NAME, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java index 799c5a046bc22..ced2e76cb53fd 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java @@ -38,6 +38,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.S3ATestUtils; import org.apache.hadoop.fs.s3a.Tristate; import org.apache.hadoop.io.IOUtils; @@ -61,11 +62,13 @@ public abstract class MetadataStoreTestBase extends HadoopTestBase { /** Some dummy values for sanity-checking FileStatus contents. */ static final long BLOCK_SIZE = 32 * 1024 * 1024; static final int REPLICATION = 1; - static final FsPermission PERMISSION = new FsPermission((short)0755); static final String OWNER = "bob"; - static final String GROUP = "uncles"; - private final long accessTime = System.currentTimeMillis(); - private final long modTime = accessTime - 5000; + private final long modTime = System.currentTimeMillis() - 5000; + + // attributes not supported by S3AFileStatus + static final FsPermission PERMISSION = null; + static final String GROUP = null; + private final long accessTime = 0; /** * Each test should override this. Will use a new Configuration instance. @@ -148,14 +151,14 @@ private void doTestDescendantsIterator( String[] checkNodes) throws Exception { // we set up the example file system tree in metadata store for (String pathStr : createNodes) { - final FileStatus status = pathStr.contains("file") + final S3AFileStatus status = pathStr.contains("file") ? basicFileStatus(strToPath(pathStr), 100, false) : basicFileStatus(strToPath(pathStr), 0, true); ms.put(new PathMetadata(status)); } final PathMetadata rootMeta = new PathMetadata(makeDirStatus("/")); - RemoteIterator iterator; + RemoteIterator iterator; if (implementation == DescendantsIterator.class) { iterator = new DescendantsIterator(ms, rootMeta); } else if (implementation == MetadataStoreListFilesIterator.class) { @@ -647,8 +650,7 @@ public void testPruneFiles() throws Exception { createNewDirs("/pruneFiles"); long oldTime = getTime(); - ms.put(new PathMetadata(makeFileStatus("/pruneFiles/old", 1, oldTime, - oldTime))); + ms.put(new PathMetadata(makeFileStatus("/pruneFiles/old", 1, oldTime))); DirListingMetadata ls2 = ms.listChildren(strToPath("/pruneFiles")); if (!allowMissing()) { assertListingsEqual(ls2.getListing(), "/pruneFiles/old"); @@ -659,8 +661,7 @@ public void testPruneFiles() throws Exception { Thread.sleep(1); long cutoff = System.currentTimeMillis(); long newTime = getTime(); - ms.put(new PathMetadata(makeFileStatus("/pruneFiles/new", 1, newTime, - newTime))); + ms.put(new PathMetadata(makeFileStatus("/pruneFiles/new", 1, newTime))); DirListingMetadata ls; ls = ms.listChildren(strToPath("/pruneFiles")); @@ -691,7 +692,7 @@ public void testPruneDirs() throws Exception { long oldTime = getTime(); ms.put(new PathMetadata(makeFileStatus("/pruneDirs/dir/file", - 1, oldTime, oldTime))); + 1, oldTime))); // It's possible for the Local implementation to get from the old // modification time to here in under 1ms, causing it to not get pruned @@ -715,10 +716,10 @@ public void testPruneUnsetsAuthoritative() throws Exception { createNewDirs(rootDir, grandparentDir, parentDir); long time = System.currentTimeMillis(); ms.put(new PathMetadata( - new FileStatus(0, false, 0, 0, time - 1, strToPath(staleFile)), + basicFileStatus(0, false, 0, time - 1, strToPath(staleFile)), Tristate.FALSE, false)); ms.put(new PathMetadata( - new FileStatus(0, false, 0, 0, time + 1, strToPath(freshFile)), + basicFileStatus(0, false, 0, time + 1, strToPath(freshFile)), Tristate.FALSE, false)); // set parent dir as authoritative @@ -752,10 +753,10 @@ public void testPrunePreservesAuthoritative() throws Exception { createNewDirs(rootDir, grandparentDir, parentDir); long time = System.currentTimeMillis(); ms.put(new PathMetadata( - new FileStatus(0, false, 0, 0, time + 1, strToPath(staleFile)), + basicFileStatus(0, false, 0, time + 1, strToPath(staleFile)), Tristate.FALSE, false)); ms.put(new PathMetadata( - new FileStatus(0, false, 0, 0, time + 1, strToPath(freshFile)), + basicFileStatus(0, false, 0, time + 1, strToPath(freshFile)), Tristate.FALSE, false)); if (!allowMissing()) { @@ -811,7 +812,7 @@ public void testPutDirListingMetadataPutsFileMetadata() @Test public void testPutRetainsIsDeletedInParentListing() throws Exception { final Path path = strToPath("/a/b"); - final FileStatus fileStatus = basicFileStatus(path, 0, false); + final S3AFileStatus fileStatus = basicFileStatus(path, 0, false); PathMetadata pm = new PathMetadata(fileStatus); pm.setIsDeleted(true); ms.put(pm); @@ -948,40 +949,56 @@ private void assertEmptyDirs(String ...dirs) throws IOException { } } - FileStatus basicFileStatus(Path path, int size, boolean isDir) throws + S3AFileStatus basicFileStatus(Path path, int size, boolean isDir) throws IOException { - return basicFileStatus(path, size, isDir, modTime, accessTime); + return basicFileStatus(path, size, isDir, modTime); } - public static FileStatus basicFileStatus(Path path, int size, boolean isDir, - long newModTime, long newAccessTime) throws IOException { - return new FileStatus(size, isDir, REPLICATION, BLOCK_SIZE, newModTime, - newAccessTime, PERMISSION, OWNER, GROUP, path); + S3AFileStatus basicFileStatus(int size, boolean isDir, + long blockSize, long modificationTime, Path path) { + if (isDir) { + return new S3AFileStatus(Tristate.UNKNOWN, path, null); + } + else { + return new S3AFileStatus(size, modificationTime, path, blockSize, null, + null, null); + } + } + + public static S3AFileStatus basicFileStatus(Path path, int size, + boolean isDir, long newModTime) throws IOException { + if (isDir) { + return new S3AFileStatus(Tristate.UNKNOWN, path, OWNER); + } + else { + return new S3AFileStatus(size, newModTime, path, BLOCK_SIZE, OWNER, + null, null); + } } - private FileStatus makeFileStatus(String pathStr, int size) throws + private S3AFileStatus makeFileStatus(String pathStr, int size) throws IOException { - return makeFileStatus(pathStr, size, modTime, accessTime); + return makeFileStatus(pathStr, size, modTime); } - private FileStatus makeFileStatus(String pathStr, int size, long newModTime, - long newAccessTime) throws IOException { + private S3AFileStatus makeFileStatus(String pathStr, int size, + long newModTime) throws IOException { return basicFileStatus(strToPath(pathStr), size, false, - newModTime, newAccessTime); + newModTime); } void verifyFileStatus(FileStatus status, long size) { S3ATestUtils.verifyFileStatus(status, size, BLOCK_SIZE, modTime); } - private FileStatus makeDirStatus(String pathStr) throws IOException { - return basicFileStatus(strToPath(pathStr), 0, true, modTime, accessTime); + private S3AFileStatus makeDirStatus(String pathStr) throws IOException { + return basicFileStatus(strToPath(pathStr), 0, true, modTime); } /** * Verify the directory file status. Subclass may verify additional fields. */ - void verifyDirStatus(FileStatus status) { + void verifyDirStatus(S3AFileStatus status) { assertTrue("Is a dir", status.isDirectory()); assertEquals("zero length", 0, status.getLen()); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDirListingMetadata.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDirListingMetadata.java index 8458252af763c..cb183a2954820 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDirListingMetadata.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDirListingMetadata.java @@ -38,6 +38,8 @@ public class TestDirListingMetadata { private static final String TEST_OWNER = "hadoop"; + public static final String TEST_ETAG = "abc"; + public static final String TEST_VERSION_ID = "def"; @Rule public ExpectedException exception = ExpectedException.none(); @@ -79,7 +81,8 @@ public void testListing() { PathMetadata pathMeta2 = new PathMetadata( new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER)); PathMetadata pathMeta3 = new PathMetadata( - new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER)); + new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER, + TEST_ETAG, TEST_VERSION_ID)); List listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3); DirListingMetadata meta = new DirListingMetadata(path, listing, false); assertEquals(path, meta.getPath()); @@ -130,7 +133,8 @@ public void testGet() { PathMetadata pathMeta2 = new PathMetadata( new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER)); PathMetadata pathMeta3 = new PathMetadata( - new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER)); + new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER, + TEST_ETAG, TEST_VERSION_ID)); List listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3); DirListingMetadata meta = new DirListingMetadata(path, listing, false); assertEquals(path, meta.getPath()); @@ -181,7 +185,8 @@ public void testPut() { PathMetadata pathMeta2 = new PathMetadata( new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER)); PathMetadata pathMeta3 = new PathMetadata( - new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER)); + new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER, + TEST_ETAG, TEST_VERSION_ID)); List listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3); DirListingMetadata meta = new DirListingMetadata(path, listing, false); assertEquals(path, meta.getPath()); @@ -243,7 +248,8 @@ public void testRemove() { PathMetadata pathMeta2 = new PathMetadata( new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER)); PathMetadata pathMeta3 = new PathMetadata( - new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER)); + new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER, + TEST_ETAG, TEST_VERSION_ID)); List listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3); DirListingMetadata meta = new DirListingMetadata(path, listing, false); assertEquals(path, meta.getPath()); @@ -296,7 +302,7 @@ private static DirListingMetadata makeTwoDirsOneFile(Path parent) { new S3AFileStatus(true, new Path(parent, "dir2"), TEST_OWNER)); PathMetadata pathMeta3 = new PathMetadata( new S3AFileStatus(123, 456, new Path(parent, "file1"), 8192, - TEST_OWNER)); + TEST_OWNER, TEST_ETAG, TEST_VERSION_ID)); List listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3); return new DirListingMetadata(parent, listing, false); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestLocalMetadataStore.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestLocalMetadataStore.java index 2ea20b26b023d..d0156f13e82ea 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestLocalMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestLocalMetadataStore.java @@ -30,7 +30,9 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.S3ATestUtils; +import org.apache.hadoop.fs.s3a.Tristate; /** * MetadataStore unit test for {@link LocalMetadataStore}. @@ -168,8 +170,8 @@ private static void populateMap(Cache cache, private static void populateEntry(Cache cache, Path path) { - FileStatus fileStatus = new FileStatus(0, true, 0, 0, 0, path); - cache.put(path, new LocalMetadataEntry(new PathMetadata(fileStatus))); + S3AFileStatus s3aStatus = new S3AFileStatus(Tristate.UNKNOWN, path, null); + cache.put(path, new LocalMetadataEntry(new PathMetadata(s3aStatus))); } private static long sizeOfMap(Cache cache) { @@ -196,9 +198,8 @@ protected void verifyFileStatus(FileStatus status, long size) { } @Override - protected void verifyDirStatus(FileStatus status) { - S3ATestUtils.verifyDirStatus(status, REPLICATION, getModTime(), - getAccessTime(), OWNER, GROUP, PERMISSION); + protected void verifyDirStatus(S3AFileStatus status) { + S3ATestUtils.verifyDirStatus(status, REPLICATION, OWNER); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestObjectChangeDetectionAttributes.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestObjectChangeDetectionAttributes.java new file mode 100644 index 0000000000000..d8a76065375bd --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestObjectChangeDetectionAttributes.java @@ -0,0 +1,384 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.s3guard; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; + +import com.amazonaws.services.s3.Headers; +import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; +import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; +import com.amazonaws.services.s3.model.GetObjectMetadataRequest; +import com.amazonaws.services.s3.model.GetObjectRequest; +import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; +import com.amazonaws.services.s3.model.InitiateMultipartUploadResult; +import com.amazonaws.services.s3.model.ListObjectsV2Request; +import com.amazonaws.services.s3.model.ListObjectsV2Result; +import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.PutObjectRequest; +import com.amazonaws.services.s3.model.PutObjectResult; +import com.amazonaws.services.s3.model.S3Object; +import com.amazonaws.services.s3.model.UploadPartRequest; +import com.amazonaws.services.s3.model.UploadPartResult; +import org.hamcrest.BaseMatcher; +import org.hamcrest.Description; +import org.hamcrest.Matcher; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.AbstractS3AMockTest; +import org.apache.hadoop.fs.s3a.Constants; +import org.apache.hadoop.fs.s3a.S3AFileStatus; + +import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_MODE; +import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_MODE_SERVER; +import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_SOURCE; +import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_SOURCE_ETAG; +import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_SOURCE_VERSION_ID; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.when; +import static org.mockito.hamcrest.MockitoHamcrest.argThat; + +/** + * Unit tests to ensure object eTag and versionId are captured on S3 PUT and + * used on GET. Further (integration) testing is performed in + * {@link org.apache.hadoop.fs.s3a.ITestS3ARemoteFileChanged}. + */ +@RunWith(Parameterized.class) +public class TestObjectChangeDetectionAttributes extends AbstractS3AMockTest { + private final String changeDetectionSource; + + public TestObjectChangeDetectionAttributes(String changeDetectionSource) { + this.changeDetectionSource = changeDetectionSource; + } + + @Parameterized.Parameters + public static Collection params() { + return Arrays.asList(new Object[][]{ + {CHANGE_DETECT_SOURCE_ETAG}, + {CHANGE_DETECT_SOURCE_VERSION_ID} + }); + } + + @Override + public void setup() throws Exception { + super.setup(); + } + + @Override + public Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); + conf.setClass(Constants.S3_METADATA_STORE_IMPL, + LocalMetadataStore.class, MetadataStore.class); + conf.set(CHANGE_DETECT_SOURCE, changeDetectionSource); + conf.set(CHANGE_DETECT_MODE, CHANGE_DETECT_MODE_SERVER); + return conf; + } + + /** + * Tests a file uploaded with a single PUT to ensure eTag is captured and used + * on file read. + */ + @Test + public void testCreateAndReadFileSinglePart() throws Exception { + String bucket = "s3a://mock-bucket/"; + String file = "single-part-file"; + Path path = new Path(bucket, file); + byte[] content = "content".getBytes(); + String eTag = "abc"; + String versionId = "def"; + + putObject(file, path, content, eTag, versionId); + + // make sure the eTag and versionId were put into the metadataStore + assertVersionAttributes(path, eTag, versionId); + + // Ensure underlying S3 getObject call uses the stored eTag or versionId + // when reading data back. If it doesn't, the read won't work and the + // assert will fail. + assertContent(file, path, content, eTag, versionId); + + // test overwrite + byte[] newConent = "newcontent".getBytes(); + String newETag = "newETag"; + String newVersionId = "newVersionId"; + + putObject(file, path, newConent, newETag, newVersionId); + assertVersionAttributes(path, newETag, newVersionId); + assertContent(file, path, newConent, newETag, newVersionId); + } + + /** + * Tests a file uploaded with multi-part upload to ensure eTag is captured + * and used on file read. + */ + @Test + public void testCreateAndReadFileMultiPart() throws Exception { + String bucket = "s3a://mock-bucket/"; + String file = "multi-part-file"; + Path path = new Path(bucket, file); + byte[] content = new byte[Constants.MULTIPART_MIN_SIZE + 1]; + String eTag = "abc"; + String versionId = "def"; + + multipartUpload(file, path, content, eTag, versionId); + + // make sure the eTag and versionId were put into the metadataStore + assertVersionAttributes(path, eTag, versionId); + + // Ensure underlying S3 getObject call uses the stored eTag or versionId + // when reading data back. If it doesn't, the read won't work and the + // assert will fail. + assertContent(file, path, content, eTag, versionId); + + // test overwrite + byte[] newConent = new byte[Constants.MULTIPART_MIN_SIZE + 1]; + Arrays.fill(newConent, (byte) 1); + String newETag = "newETag"; + String newVersionId = "newVersionId"; + + multipartUpload(file, path, newConent, newETag, newVersionId); + assertVersionAttributes(path, newETag, newVersionId); + assertContent(file, path, newConent, newETag, newVersionId); + } + + private void putObject(String file, Path path, byte[] content, + String eTag, String versionId) throws IOException { + PutObjectResult putObjectResult = new PutObjectResult(); + ObjectMetadata objectMetadata = new ObjectMetadata(); + objectMetadata.setContentLength(content.length); + putObjectResult.setMetadata(objectMetadata); + putObjectResult.setETag(eTag); + putObjectResult.setVersionId(versionId); + + when(s3.getObjectMetadata(any(GetObjectMetadataRequest.class))) + .thenThrow(NOT_FOUND); + when(s3.putObject(argThat(correctPutObjectRequest(file)))) + .thenReturn(putObjectResult); + ListObjectsV2Result emptyListing = new ListObjectsV2Result(); + when(s3.listObjectsV2(argThat(correctListObjectsRequest(file + "/")))) + .thenReturn(emptyListing); + + FSDataOutputStream outputStream = fs.create(path); + outputStream.write(content); + outputStream.close(); + } + + private void multipartUpload(String file, Path path, byte[] content, + String eTag, String versionId) throws IOException { + CompleteMultipartUploadResult uploadResult = + new CompleteMultipartUploadResult(); + uploadResult.setVersionId(versionId); + + when(s3.getObjectMetadata(any(GetObjectMetadataRequest.class))) + .thenThrow(NOT_FOUND); + + InitiateMultipartUploadResult initiateMultipartUploadResult = + new InitiateMultipartUploadResult(); + initiateMultipartUploadResult.setUploadId("uploadId"); + when(s3.initiateMultipartUpload( + argThat(correctInitiateMultipartUploadRequest(file)))) + .thenReturn(initiateMultipartUploadResult); + + UploadPartResult uploadPartResult = new UploadPartResult(); + uploadPartResult.setETag("partETag"); + when(s3.uploadPart(argThat(correctUploadPartRequest(file)))) + .thenReturn(uploadPartResult); + + CompleteMultipartUploadResult multipartUploadResult = + new CompleteMultipartUploadResult(); + multipartUploadResult.setETag(eTag); + multipartUploadResult.setVersionId(versionId); + when(s3.completeMultipartUpload( + argThat(correctMultipartUploadRequest(file)))) + .thenReturn(multipartUploadResult); + + ListObjectsV2Result emptyListing = new ListObjectsV2Result(); + when(s3.listObjectsV2(argThat(correctListObjectsRequest(file + "/")))) + .thenReturn(emptyListing); + + FSDataOutputStream outputStream = fs.create(path); + outputStream.write(content); + outputStream.close(); + } + + private void assertContent(String file, Path path, byte[] content, + String eTag, String versionId) throws IOException { + S3Object s3Object = new S3Object(); + ObjectMetadata metadata = new ObjectMetadata(); + metadata.setHeader(Headers.S3_VERSION_ID, versionId); + metadata.setHeader(Headers.ETAG, eTag); + s3Object.setObjectMetadata(metadata); + s3Object.setObjectContent(new ByteArrayInputStream(content)); + when(s3.getObject(argThat(correctGetObjectRequest(file, eTag, versionId)))) + .thenReturn(s3Object); + FSDataInputStream inputStream = fs.open(path); + byte[] readContent = IOUtils.toByteArray(inputStream); + assertArrayEquals(content, readContent); + } + + private void assertVersionAttributes(Path path, String eTag, String versionId) + throws IOException { + MetadataStore metadataStore = fs.getMetadataStore(); + PathMetadata pathMetadata = metadataStore.get(path); + assertNotNull(pathMetadata); + S3AFileStatus fileStatus = pathMetadata.getFileStatus(); + assertEquals(eTag, fileStatus.getETag()); + assertEquals(versionId, fileStatus.getVersionId()); + } + + private Matcher correctGetObjectRequest(final String key, + final String eTag, final String versionId) { + return new BaseMatcher() { + @Override + public boolean matches(Object item) { + if (item instanceof GetObjectRequest) { + GetObjectRequest getObjectRequest = (GetObjectRequest) item; + if (getObjectRequest.getKey().equals(key)) { + if (changeDetectionSource.equals( + CHANGE_DETECT_SOURCE_ETAG)) { + return getObjectRequest.getMatchingETagConstraints() + .contains(eTag); + } else if (changeDetectionSource.equals( + CHANGE_DETECT_SOURCE_VERSION_ID)) { + return getObjectRequest.getVersionId().equals(versionId); + } + } + } + return false; + } + + @Override + public void describeTo(Description description) { + description.appendText("key and " + + changeDetectionSource + + " matches"); + } + }; + } + + private Matcher correctUploadPartRequest( + final String key) { + return new BaseMatcher() { + @Override + public boolean matches(Object item) { + if (item instanceof UploadPartRequest) { + UploadPartRequest request = (UploadPartRequest) item; + return request.getKey().equals(key); + } + return false; + } + + @Override + public void describeTo(Description description) { + description.appendText("key matches"); + } + }; + } + + private Matcher + correctInitiateMultipartUploadRequest(final String key) { + return new BaseMatcher() { + @Override + public void describeTo(Description description) { + description.appendText("key matches"); + } + + @Override + public boolean matches(Object item) { + if (item instanceof InitiateMultipartUploadRequest) { + InitiateMultipartUploadRequest request = + (InitiateMultipartUploadRequest) item; + return request.getKey().equals(key); + } + return false; + } + }; + } + + private Matcher + correctMultipartUploadRequest(final String key) { + return new BaseMatcher() { + @Override + public boolean matches(Object item) { + if (item instanceof CompleteMultipartUploadRequest) { + CompleteMultipartUploadRequest request = + (CompleteMultipartUploadRequest) item; + return request.getKey().equals(key); + } + return false; + } + + @Override + public void describeTo(Description description) { + description.appendText("key matches"); + } + }; + } + + private Matcher correctListObjectsRequest( + final String key) { + return new BaseMatcher() { + @Override + public boolean matches(Object item) { + if (item instanceof ListObjectsV2Request) { + ListObjectsV2Request listObjectsRequest = + (ListObjectsV2Request) item; + return listObjectsRequest.getPrefix().equals(key); + } + return false; + } + + @Override + public void describeTo(Description description) { + description.appendText("key matches"); + } + }; + } + + private Matcher correctPutObjectRequest( + final String key) { + return new BaseMatcher() { + @Override + public boolean matches(Object item) { + if (item instanceof PutObjectRequest) { + PutObjectRequest putObjectRequest = (PutObjectRequest) item; + return putObjectRequest.getKey().equals(key); + } + return false; + } + + @Override + public void describeTo(Description description) { + description.appendText("key matches"); + } + }; + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestPathMetadataDynamoDBTranslation.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestPathMetadataDynamoDBTranslation.java index 704f51e3c083b..70bf901514bab 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestPathMetadataDynamoDBTranslation.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestPathMetadataDynamoDBTranslation.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.net.URI; +import java.util.Arrays; import java.util.Collection; import java.util.concurrent.Callable; @@ -31,16 +32,16 @@ import com.google.common.base.Preconditions; import org.junit.After; import org.junit.Assert; -import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; import org.junit.rules.Timeout; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.test.LambdaTestUtils; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import org.mockito.Mockito; import static com.amazonaws.services.dynamodbv2.model.KeyType.HASH; @@ -58,41 +59,75 @@ * Test the PathMetadataDynamoDBTranslation is able to translate between domain * model objects and DynamoDB items. */ +@RunWith(Parameterized.class) public class TestPathMetadataDynamoDBTranslation extends Assert { private static final Path TEST_DIR_PATH = new Path("s3a://test-bucket/myDir"); - private static final Item TEST_DIR_ITEM = new Item(); - private static DDBPathMetadata testDirPathMetadata; - private static final long TEST_FILE_LENGTH = 100; private static final long TEST_MOD_TIME = 9999; private static final long TEST_BLOCK_SIZE = 128; + private static final String TEST_ETAG = "abc"; + private static final String TEST_VERSION_ID = "def"; private static final Path TEST_FILE_PATH = new Path(TEST_DIR_PATH, "myFile"); - private static final Item TEST_FILE_ITEM = new Item(); - private static DDBPathMetadata testFilePathMetadata; - @BeforeClass - public static void setUpBeforeClass() throws IOException { + private final Item testFileItem; + private final DDBPathMetadata testFilePathMetadata; + private final Item testDirItem; + private final DDBPathMetadata testDirPathMetadata; + + @Parameterized.Parameters + public static Collection params() throws IOException { + String username = UserGroupInformation.getCurrentUser().getShortUserName(); + + return Arrays.asList(new Object[][]{ + // with etag and versionId + { + new Item() + .withPrimaryKey(PARENT, + pathToParentKey(TEST_FILE_PATH.getParent()), + CHILD, TEST_FILE_PATH.getName()) + .withBoolean(IS_DIR, false) + .withLong(FILE_LENGTH, TEST_FILE_LENGTH) + .withLong(MOD_TIME, TEST_MOD_TIME) + .withLong(BLOCK_SIZE, TEST_BLOCK_SIZE) + .withString(ETAG, TEST_ETAG) + .withString(VERSION_ID, TEST_VERSION_ID), + new DDBPathMetadata( + new S3AFileStatus(TEST_FILE_LENGTH, TEST_MOD_TIME, + TEST_FILE_PATH, TEST_BLOCK_SIZE, username, TEST_ETAG, + TEST_VERSION_ID)) + }, + // without etag or versionId + { + new Item() + .withPrimaryKey(PARENT, + pathToParentKey(TEST_FILE_PATH.getParent()), + CHILD, TEST_FILE_PATH.getName()) + .withBoolean(IS_DIR, false) + .withLong(FILE_LENGTH, TEST_FILE_LENGTH) + .withLong(MOD_TIME, TEST_MOD_TIME) + .withLong(BLOCK_SIZE, TEST_BLOCK_SIZE), + new DDBPathMetadata( + new S3AFileStatus(TEST_FILE_LENGTH, TEST_MOD_TIME, + TEST_FILE_PATH, TEST_BLOCK_SIZE, username, null, null)) + } + }); + } + + public TestPathMetadataDynamoDBTranslation(Item item, + DDBPathMetadata metadata) throws IOException { + testFileItem = item; + testFilePathMetadata = metadata; + String username = UserGroupInformation.getCurrentUser().getShortUserName(); testDirPathMetadata = new DDBPathMetadata(new S3AFileStatus(false, TEST_DIR_PATH, username)); - TEST_DIR_ITEM + testDirItem = new Item(); + testDirItem .withPrimaryKey(PARENT, "/test-bucket", CHILD, TEST_DIR_PATH.getName()) .withBoolean(IS_DIR, true); - - testFilePathMetadata = new DDBPathMetadata( - new S3AFileStatus(TEST_FILE_LENGTH, TEST_MOD_TIME, TEST_FILE_PATH, - TEST_BLOCK_SIZE, username)); - - TEST_FILE_ITEM - .withPrimaryKey(PARENT, pathToParentKey(TEST_FILE_PATH.getParent()), - CHILD, TEST_FILE_PATH.getName()) - .withBoolean(IS_DIR, false) - .withLong(FILE_LENGTH, TEST_FILE_LENGTH) - .withLong(MOD_TIME, TEST_MOD_TIME) - .withLong(BLOCK_SIZE, TEST_BLOCK_SIZE); } /** @@ -138,8 +173,8 @@ public void testItemToPathMetadata() throws IOException { UserGroupInformation.getCurrentUser().getShortUserName(); assertNull(itemToPathMetadata(null, user)); - verify(TEST_DIR_ITEM, itemToPathMetadata(TEST_DIR_ITEM, user)); - verify(TEST_FILE_ITEM, itemToPathMetadata(TEST_FILE_ITEM, user)); + verify(testDirItem, itemToPathMetadata(testDirItem, user)); + verify(testFileItem, itemToPathMetadata(testFileItem, user)); } /** @@ -147,7 +182,7 @@ public void testItemToPathMetadata() throws IOException { */ private static void verify(Item item, PathMetadata meta) { assertNotNull(meta); - final FileStatus status = meta.getFileStatus(); + final S3AFileStatus status = meta.getFileStatus(); final Path path = status.getPath(); assertEquals(item.get(PARENT), pathToParentKey(path.getParent())); assertEquals(item.get(CHILD), path.getName()); @@ -157,6 +192,10 @@ private static void verify(Item item, PathMetadata meta) { assertEquals(len, status.getLen()); long bSize = item.hasAttribute(BLOCK_SIZE) ? item.getLong(BLOCK_SIZE) : 0; assertEquals(bSize, status.getBlockSize()); + String eTag = item.getString(ETAG); + assertEquals(eTag, status.getETag()); + String versionId = item.getString(VERSION_ID); + assertEquals(versionId, status.getVersionId()); /* * S3AFileStatue#getModificationTime() reports the current time, so the @@ -252,7 +291,7 @@ public void testVersionMarkerNotStatusIllegalPath() throws Throwable { @Test public void testIsAuthoritativeCompatibilityItemToPathMetadata() throws Exception { - Item item = Mockito.spy(TEST_DIR_ITEM); + Item item = Mockito.spy(testDirItem); item.withBoolean(IS_AUTHORITATIVE, true); PathMetadataDynamoDBTranslation.IGNORED_FIELDS.add(IS_AUTHORITATIVE); @@ -288,7 +327,7 @@ public void testIsAuthoritativeCompatibilityPathMetadataToItem() { @Test public void testIsLastUpdatedCompatibilityItemToPathMetadata() throws Exception { - Item item = Mockito.spy(TEST_DIR_ITEM); + Item item = Mockito.spy(testDirItem); item.withLong(LAST_UPDATED, 100); PathMetadataDynamoDBTranslation.IGNORED_FIELDS.add(LAST_UPDATED); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3Guard.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3Guard.java index 1ddfed414d825..b246da2d50030 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3Guard.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3Guard.java @@ -26,6 +26,8 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3AFileStatus; +import org.apache.hadoop.fs.s3a.Tristate; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_METADATASTORE_AUTHORITATIVE_DIR_TTL; @@ -51,7 +53,7 @@ public void testDirListingUnion() throws Exception { Arrays.asList(m1, m2), false); // Two other files in s3 - List s3Listing = Arrays.asList( + List s3Listing = Arrays.asList( makeFileStatus("s3a://bucket/dir/s3-file3", false), makeFileStatus("s3a://bucket/dir/s3-file4", false) ); @@ -86,12 +88,15 @@ private PathMetadata makePathMeta(String pathStr, boolean isDir) { return new PathMetadata(makeFileStatus(pathStr, isDir)); } - private FileStatus makeFileStatus(String pathStr, boolean isDir) { + private S3AFileStatus makeFileStatus(String pathStr, boolean isDir) { Path p = new Path(pathStr); + S3AFileStatus fileStatus; if (isDir) { - return new FileStatus(0, true, 1, 1, System.currentTimeMillis(), p); + fileStatus = new S3AFileStatus(Tristate.UNKNOWN, p, null); } else { - return new FileStatus(100, false, 1, 1, System.currentTimeMillis(), p); + fileStatus = new S3AFileStatus( + 100, System.currentTimeMillis(), p, 1, null, null, null); } + return fileStatus; } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractITestS3AMetadataStoreScale.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractITestS3AMetadataStoreScale.java index 0e6a1d8d09245..b843392ebfa07 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractITestS3AMetadataStoreScale.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractITestS3AMetadataStoreScale.java @@ -170,7 +170,8 @@ protected S3AFileStatus copyStatus(S3AFileStatus status) { status.getOwner()); } else { return new S3AFileStatus(status.getLen(), status.getModificationTime(), - status.getPath(), status.getBlockSize(), status.getOwner()); + status.getPath(), status.getBlockSize(), status.getOwner(), + status.getETag(), status.getVersionId()); } } @@ -207,7 +208,8 @@ private static void printTiming(Logger log, String op, NanoTimer timer, } protected static S3AFileStatus makeFileStatus(Path path) throws IOException { - return new S3AFileStatus(SIZE, ACCESS_TIME, path, BLOCK_SIZE, OWNER); + return new S3AFileStatus(SIZE, ACCESS_TIME, path, BLOCK_SIZE, OWNER, + null, null); } protected static S3AFileStatus makeDirStatus(Path p) throws IOException { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectCLI.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectCLI.java index fccf708fef4e8..e31b48e5b5862 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectCLI.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectCLI.java @@ -25,6 +25,7 @@ import java.nio.charset.Charset; import java.util.List; +import org.junit.Assume; import org.junit.Test; import org.apache.commons.io.IOUtils; @@ -34,6 +35,8 @@ import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.fs.s3a.S3ATestUtils; import org.apache.hadoop.fs.s3a.Statistic; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source; import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool; import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.OperationDuration; @@ -80,6 +83,11 @@ public void setup() throws Exception { selectConf = new Configuration(getConfiguration()); localFile = getTempFilename(); landsatSrc = getLandsatGZ().toString(); + ChangeDetectionPolicy changeDetectionPolicy = + getLandsatFS().getChangeDetectionPolicy(); + Assume.assumeFalse("the standard landsat bucket doesn't have versioning", + changeDetectionPolicy.getSource() == Source.VersionId + && changeDetectionPolicy.isRequireVersion()); } @Override diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java index 78f3a6d1fe558..1d45852c98946 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java @@ -21,6 +21,9 @@ import java.io.IOException; import java.util.List; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source; +import org.junit.Assume; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -190,6 +193,11 @@ public void setup() throws Exception { // disable the gzip codec, so that the record readers do not // get confused enablePassthroughCodec(selectConf, ".gz"); + ChangeDetectionPolicy changeDetectionPolicy = + getLandsatFS().getChangeDetectionPolicy(); + Assume.assumeFalse("the standard landsat bucket doesn't have versioning", + changeDetectionPolicy.getSource() == Source.VersionId + && changeDetectionPolicy.isRequireVersion()); } protected int getMaxLines() { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectMRJob.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectMRJob.java index ee7de8c7ac2f2..181d797767397 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectMRJob.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectMRJob.java @@ -21,6 +21,9 @@ import java.io.IOException; import java.util.concurrent.atomic.AtomicLong; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source; +import org.junit.Assume; import org.junit.Test; import org.apache.hadoop.conf.Configuration; @@ -90,6 +93,13 @@ public class ITestS3SelectMRJob extends AbstractS3SelectTest { public void setup() throws Exception { super.setup(); fs = S3ATestUtils.createTestFileSystem(conf); + + ChangeDetectionPolicy changeDetectionPolicy = + getLandsatFS().getChangeDetectionPolicy(); + Assume.assumeFalse("the standard landsat bucket doesn't have versioning", + changeDetectionPolicy.getSource() == Source.VersionId + && changeDetectionPolicy.isRequireVersion()); + rootPath = path("ITestS3SelectMRJob"); Path workingDir = path("working"); fs.setWorkingDirectory(workingDir); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/mapreduce/filecache/TestS3AResourceScope.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/mapreduce/filecache/TestS3AResourceScope.java index c9b1ddc97ee18..172f79e09aea7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/mapreduce/filecache/TestS3AResourceScope.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/mapreduce/filecache/TestS3AResourceScope.java @@ -48,7 +48,7 @@ public void testS3AFilesArePrivate() throws Throwable { @Test public void testS3AFilesArePrivateOtherContstructor() throws Throwable { - S3AFileStatus status = new S3AFileStatus(0, 0, PATH, 1, "self"); + S3AFileStatus status = new S3AFileStatus(0, 0, PATH, 1, "self", null, null); assertTrue("Not encrypted: " + status, status.isEncrypted()); assertNotExecutable(status); } From 9a880edb54831b5f1a54f32fe02e161bce91f6ea Mon Sep 17 00:00:00 2001 From: Ben Roling Date: Mon, 6 May 2019 16:39:14 -0500 Subject: [PATCH 02/13] Retries for eventually consistent read-after-overwrite Includes retries for regular reads, select(), and rename() --- .../org/apache/hadoop/fs/s3a/Invoker.java | 84 +++ .../apache/hadoop/fs/s3a/S3AFileSystem.java | 87 +-- .../apache/hadoop/fs/s3a/S3AInputStream.java | 7 +- .../fs/s3a/S3GuardExistsRetryPolicy.java | 1 + .../fs/s3a/ITestS3ARemoteFileChanged.java | 497 +++++++++++++++++- 5 files changed, 632 insertions(+), 44 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java index 68a69f39321be..a59ffa9c6e088 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java @@ -197,6 +197,33 @@ public void retry(String action, }); } + /** + * Execute a void operation with retry processing when doRetry=true, else + * just once. + * @param doRetry true if retries should be performed + * @param action action to execute (used in error messages) + * @param path path of work (used in error messages) + * @param idempotent does the operation have semantics + * which mean that it can be retried even if was already executed? + * @param retrying callback on retries + * @param operation operation to execute + * @throws IOException any IOE raised, or translated exception + */ + @Retries.RetryTranslated + public void maybeRetry(boolean doRetry, + String action, + String path, + boolean idempotent, + Retried retrying, + VoidOperation operation) + throws IOException { + maybeRetry(doRetry, action, path, idempotent, retrying, + () -> { + operation.execute(); + return null; + }); + } + /** * Execute a void operation with the default retry callback invoked. * @param action action to execute (used in error messages) @@ -215,6 +242,28 @@ public void retry(String action, retry(action, path, idempotent, retryCallback, operation); } + /** + * Execute a void operation with the default retry callback invoked when + * doRetry=true, else just once. + * @param doRetry true if retries should be performed + * @param action action to execute (used in error messages) + * @param path path of work (used in error messages) + * @param idempotent does the operation have semantics + * which mean that it can be retried even if was already executed? + * @param operation operation to execute + * @throws IOException any IOE raised, or translated exception + */ + @Retries.RetryTranslated + public void maybeRetry( + boolean doRetry, + String action, + String path, + boolean idempotent, + VoidOperation operation) + throws IOException { + maybeRetry(doRetry, action, path, idempotent, retryCallback, operation); + } + /** * Execute a function with the default retry callback invoked. * @param action action to execute (used in error messages) @@ -265,6 +314,41 @@ public T retry( () -> once(action, path, operation)); } + /** + * Execute a function with retry processing when doRetry=true, else just once. + * Uses {@link #once(String, String, Operation)} as the inner + * invocation mechanism before retry logic is performed. + * @param type of return value + * @param doRetry true if retries should be performed + * @param action action to execute (used in error messages) + * @param path path of work (used in error messages) + * @param idempotent does the operation have semantics + * which mean that it can be retried even if was already executed? + * @param retrying callback on retries + * @param operation operation to execute + * @return the result of the call + * @throws IOException any IOE raised, or translated exception + */ + @Retries.RetryTranslated + public T maybeRetry( + boolean doRetry, + String action, + @Nullable String path, + boolean idempotent, + Retried retrying, + Operation operation) + throws IOException { + if (doRetry) { + return retryUntranslated( + toDescription(action, path), + idempotent, + retrying, + () -> once(action, path, operation)); + } else { + return once(action, path, operation); + } + } + /** * Execute a function with retry processing and no translation. * and the default retry callback. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index fead648d9b0d7..c1b14644a399e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -326,26 +326,7 @@ public void initialize(URI name, Configuration originalConf) readAhead = longBytesOption(conf, READAHEAD_RANGE, DEFAULT_READAHEAD_RANGE, 0); - int maxThreads = conf.getInt(MAX_THREADS, DEFAULT_MAX_THREADS); - if (maxThreads < 2) { - LOG.warn(MAX_THREADS + " must be at least 2: forcing to 2."); - maxThreads = 2; - } - int totalTasks = intOption(conf, - MAX_TOTAL_TASKS, DEFAULT_MAX_TOTAL_TASKS, 1); - long keepAliveTime = longOption(conf, KEEPALIVE_TIME, - DEFAULT_KEEPALIVE_TIME, 0); - boundedThreadPool = BlockingThreadPoolExecutorService.newInstance( - maxThreads, - maxThreads + totalTasks, - keepAliveTime, TimeUnit.SECONDS, - "s3a-transfer-shared"); - unboundedThreadPool = new ThreadPoolExecutor( - maxThreads, Integer.MAX_VALUE, - keepAliveTime, TimeUnit.SECONDS, - new LinkedBlockingQueue(), - BlockingThreadPoolExecutorService.newDaemonThreadFactory( - "s3a-transfer-unbounded")); + initThreadPools(conf); int listVersion = conf.getInt(LIST_VERSION, DEFAULT_LIST_VERSION); if (listVersion < 1 || listVersion > 2) { @@ -414,6 +395,29 @@ public void initialize(URI name, Configuration originalConf) } + private void initThreadPools(Configuration conf) { + int maxThreads = conf.getInt(MAX_THREADS, DEFAULT_MAX_THREADS); + if (maxThreads < 2) { + LOG.warn(MAX_THREADS + " must be at least 2: forcing to 2."); + maxThreads = 2; + } + int totalTasks = intOption(conf, + MAX_TOTAL_TASKS, DEFAULT_MAX_TOTAL_TASKS, 1); + long keepAliveTime = longOption(conf, KEEPALIVE_TIME, + DEFAULT_KEEPALIVE_TIME, 0); + boundedThreadPool = BlockingThreadPoolExecutorService.newInstance( + maxThreads, + maxThreads + totalTasks, + keepAliveTime, TimeUnit.SECONDS, + "s3a-transfer-shared"); + unboundedThreadPool = new ThreadPoolExecutor( + maxThreads, Integer.MAX_VALUE, + keepAliveTime, TimeUnit.SECONDS, + new LinkedBlockingQueue(), + BlockingThreadPoolExecutorService.newDaemonThreadFactory( + "s3a-transfer-unbounded")); + } + /** * Create the storage statistics or bind to an existing one. * @return a storage statistics instance. @@ -654,6 +658,13 @@ protected void setAmazonS3Client(AmazonS3 client) { Preconditions.checkNotNull(client, "client"); LOG.debug("Setting S3 client to {}", client); s3 = client; + + // Need to use a new TransferManager that uses the new client. + // Also, using a new TransferManager requires a new threadpool as the old + // TransferManager will shut the thread pool down when it is garbage + // collected. + initThreadPools(getConf()); + initTransferManager(); } /** @@ -2957,7 +2968,10 @@ private CopyResult copyFile(String srcKey, String dstKey, long size, } }; - return once("copyFile(" + srcKey + ", " + dstKey + ")", srcKey, + Invoker readInvoker = readContext.getReadInvoker(); + return readInvoker.retry( + "\"copyFile(\" + srcKey + \", \" + dstKey + \")\", srcKey", null, + false, () -> { ObjectMetadata srcom = getObjectMetadata(srcKey); ObjectMetadata dstom = cloneObjectMetadata(srcom); @@ -3874,19 +3888,24 @@ private FSDataInputStream select(final Path source, S3AReadOpContext readContext = createReadContext(fileStatus, inputPolicy, changeDetectionPolicy, ra); - if (!fileStatus.isDirectory()) { - // check that the object metadata lines up with what is expected - // based on the object attributes (which may contain an eTag or versionId) - // from S3Guard - ChangeTracker changeTracker = - new ChangeTracker(uri.toString(), - changeDetectionPolicy, - readContext.instrumentation.newInputStreamStatistics() - .getVersionMismatchCounter(), - objectAttributes); - ObjectMetadata objectMetadata = getObjectMetadata(path); - changeTracker.processMetadata(objectMetadata, "select", 0); - } + Invoker readInvoker = readContext.getReadInvoker(); + readInvoker.retry("select", + path.toString(), true, + () -> { + if (!fileStatus.isDirectory()) { + // check that the object metadata lines up with what is expected + // based on the object attributes (which may contain an eTag or + // versionId) from S3Guard + ChangeTracker changeTracker = + new ChangeTracker(uri.toString(), + changeDetectionPolicy, + readContext.instrumentation.newInputStreamStatistics() + .getVersionMismatchCounter(), + objectAttributes); + ObjectMetadata objectMetadata = getObjectMetadata(path); + changeTracker.processMetadata(objectMetadata, "select", 0); + } + }); // build and execute the request return selectBinding.select( diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java index 6075b52314340..6e7a2511f6749 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java @@ -345,9 +345,12 @@ public boolean seekToNewSource(long targetPos) throws IOException { private void lazySeek(long targetPos, long len) throws IOException { // With S3Guard, the metadatastore gave us metadata for the file in - // open(), so we use a slightly different retry policy. + // open(), so we use a slightly different retry policy, but only on initial + // open. After that, an exception generally means the file has changed + // and there is no point retrying anymore. Invoker invoker = context.getReadInvoker(); - invoker.retry("lazySeek", pathStr, true, + invoker.maybeRetry(streamStatistics.openOperations == 0, + "lazySeek", pathStr, true, () -> { //For lazy seek seekInStream(targetPos, len); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3GuardExistsRetryPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3GuardExistsRetryPolicy.java index 023d0c3cf2c37..1a0135bb9b5d5 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3GuardExistsRetryPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3GuardExistsRetryPolicy.java @@ -42,6 +42,7 @@ public S3GuardExistsRetryPolicy(Configuration conf) { protected Map, RetryPolicy> createExceptionMap() { Map, RetryPolicy> b = super.createExceptionMap(); b.put(FileNotFoundException.class, retryIdempotentCalls); + b.put(RemoteFileChangedException.class, retryIdempotentCalls); return b; } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java index 11f4a383bef35..1685daaa74e50 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java @@ -26,15 +26,27 @@ import java.util.Arrays; import java.util.Collection; -import org.apache.commons.io.IOUtils; +import com.amazonaws.AmazonServiceException; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.model.CopyObjectRequest; +import com.amazonaws.services.s3.model.CopyObjectResult; +import com.amazonaws.services.s3.model.GetObjectMetadataRequest; +import com.amazonaws.services.s3.model.GetObjectRequest; +import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.S3Object; import org.junit.Assume; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.mockito.ArgumentMatchers; +import org.mockito.Mockito; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; @@ -46,13 +58,25 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset; -import static org.apache.hadoop.fs.s3a.Constants.*; +import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_MODE; +import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_MODE_CLIENT; +import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_MODE_NONE; +import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_MODE_SERVER; +import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_MODE_WARN; +import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_SOURCE; +import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_SOURCE_ETAG; +import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_SOURCE_VERSION_ID; +import static org.apache.hadoop.fs.s3a.Constants.RETRY_LIMIT; +import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL; import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBucketOverrides; import static org.apache.hadoop.fs.s3a.select.SelectConstants.SELECT_SQL; import static org.apache.hadoop.test.LambdaTestUtils.eventually; import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.apache.hadoop.test.LambdaTestUtils.interceptFuture; +import static org.apache.http.HttpStatus.SC_PRECONDITION_FAILED; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.when; /** * Test S3A remote file change detection. @@ -64,11 +88,17 @@ public class ITestS3ARemoteFileChanged extends AbstractS3ATestBase { LoggerFactory.getLogger(ITestS3ARemoteFileChanged.class); private static final String TEST_DATA = "Some test data"; + private static final int TEST_MAX_RETRIES = 5; private static final String QUOTED_TEST_DATA = "\"" + TEST_DATA + "\""; private enum InteractionType { - READ, READ_AFTER_DELETE, COPY, SELECT + READ, + READ_AFTER_DELETE, + EVENTUALLY_CONSISTENT_READ, + COPY, + EVENTUALLY_CONSISTENT_COPY, + SELECT } private final String changeDetectionSource; @@ -84,7 +114,9 @@ public static Collection params() { Arrays.asList( InteractionType.READ, InteractionType.READ_AFTER_DELETE, + InteractionType.EVENTUALLY_CONSISTENT_READ, InteractionType.COPY, + InteractionType.EVENTUALLY_CONSISTENT_COPY, InteractionType.SELECT)}, // test with etag @@ -92,11 +124,14 @@ public static Collection params() { Arrays.asList( InteractionType.READ, InteractionType.READ_AFTER_DELETE, + InteractionType.EVENTUALLY_CONSISTENT_READ, InteractionType.COPY, + InteractionType.EVENTUALLY_CONSISTENT_COPY, InteractionType.SELECT)}, {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_CLIENT, Arrays.asList( InteractionType.READ, + InteractionType.EVENTUALLY_CONSISTENT_READ, InteractionType.READ_AFTER_DELETE, InteractionType.COPY, InteractionType.SELECT)}, @@ -108,13 +143,15 @@ public static Collection params() { InteractionType.READ_AFTER_DELETE)}, // test with versionId - // when using server-side versionId, the read exceptions shouldn't - // happen since the previous version will still be available, but - // they will still happen on rename and select since we always do a + // when using server-side versionId, the normal read exceptions + // shouldn't happen since the previous version will still be available, + // but they will still happen on rename and select since we always do a // client-side check against the current version {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_SERVER, Arrays.asList( + InteractionType.EVENTUALLY_CONSISTENT_READ, InteractionType.COPY, + InteractionType.EVENTUALLY_CONSISTENT_COPY, InteractionType.SELECT)}, // with client-side versionId it will behave similar to client-side eTag @@ -122,6 +159,7 @@ public static Collection params() { Arrays.asList( InteractionType.READ, InteractionType.READ_AFTER_DELETE, + InteractionType.EVENTUALLY_CONSISTENT_READ, InteractionType.COPY, InteractionType.SELECT)}, @@ -159,7 +197,7 @@ protected Configuration createConfiguration() { // reduce retry limit so FileNotFoundException cases timeout faster, // speeding up the tests - conf.set(RETRY_LIMIT, "5"); + conf.setInt(RETRY_LIMIT, TEST_MAX_RETRIES); if (conf.getClass(S3_METADATA_STORE_IMPL, MetadataStore.class) == NullMetadataStore.class) { @@ -315,6 +353,45 @@ public void testSelectChangedFile() throws Throwable { } } + /** + * Tests using S3 Select on a file where the version visible in S3 does not + * initially match the version tracked in the metadata store, but eventually + * (after retries) does. + */ + @Test + public void testSelectEventuallyConsistentFile() throws Throwable { + Assume.assumeTrue("S3Guard must be enabled", fs.hasMetadataStore()); + AmazonS3 s3ClientSpy = Mockito.spy(fs.getAmazonS3Client()); + fs.setAmazonS3Client(s3ClientSpy); + + final Path testpath1 = writeEventuallyConsistentFileVersion( + "select1.dat", s3ClientSpy, 0, TEST_MAX_RETRIES, 0); + + skipIfVersionPolicyAndNoVersionId(testpath1); + + // should succeed since the inconsistency doesn't last longer than the + // configured retry limit + fs.openFile(testpath1) + .must(SELECT_SQL, "SELECT * FROM S3OBJECT").build().get(); + + // TEST_MAX_RETRIES + 2 is required to before failure since there is + // one getObjectMetadata() call due to getFileStatus() before the + // change detection and retries in select() + final Path testpath2 = writeEventuallyConsistentFileVersion( + "select2.dat", s3ClientSpy, 0, TEST_MAX_RETRIES + 2, 0); + + if (expectedExceptionInteractions.contains(InteractionType.SELECT)) { + // should fail since the inconsistency lasts longer than the configured + // retry limit + interceptFuture(RemoteFileChangedException.class, "select", + fs.openFile(testpath2) + .must(SELECT_SQL, "SELECT * FROM S3OBJECT").build()); + } else { + fs.openFile(testpath2) + .must(SELECT_SQL, "SELECT * FROM S3OBJECT").build().get(); + } + } + /** * Ensures a file can be read via S3 Select when there is no version metadata * (ETag, versionId). @@ -354,6 +431,59 @@ public void testRenameChangedFile() throws Throwable { } } + /** + * Tests doing a rename() on a file where the version visible in S3 does not + * match the version in the metadata store until a certain number of retries + * has been met. + */ + @Test + public void testRenameEventuallyConsistentFile() throws Throwable { + Assume.assumeTrue("S3Guard must be enabled", fs.hasMetadataStore()); + AmazonS3 s3ClientSpy = Mockito.spy(fs.getAmazonS3Client()); + fs.setAmazonS3Client(s3ClientSpy); + + // Total inconsistent response count across getObjectMetadata() and + // copyObject() must be TEST_MAX_RETRIES + 2 to generate a failure + // since there is one getObjectMetadata() call from innerRename() before + // copyObject(). + // The split of inconsistent responses between getObjectMetadata() and + // copyObject() is arbitrary. + int maxInconsistenciesBeforeFailure = TEST_MAX_RETRIES + 1; + int metadataInconsistencyCount = 3; + final Path testpath1 = + writeEventuallyConsistentFileVersion("rename-eventually1.dat", + s3ClientSpy, + 0, + metadataInconsistencyCount, + maxInconsistenciesBeforeFailure - metadataInconsistencyCount); + + skipIfVersionPolicyAndNoVersionId(testpath1); + + final Path dest1 = path("dest1.dat"); + // shouldn't fail since the inconsistency doesn't last through the + // configured retry limit + fs.rename(testpath1, dest1); + + final Path testpath2 = + writeEventuallyConsistentFileVersion("rename-eventually2.dat", + s3ClientSpy, + 0, + metadataInconsistencyCount, + maxInconsistenciesBeforeFailure - metadataInconsistencyCount + 1); + final Path dest2 = path("dest2.dat"); + if (expectedExceptionInteractions.contains( + InteractionType.EVENTUALLY_CONSISTENT_COPY)) { + // should fail since the inconsistency is set up to persist longer than + // the configured retry limit + intercept(RemoteFileChangedException.class, "", "copy()", + () -> { + fs.rename(testpath2, dest2); + }); + } else { + fs.rename(testpath2, dest2); + } + } + /** * Ensures a file can be renamed when there is no version metadata * (ETag, versionId). @@ -372,6 +502,74 @@ public void testRenameWithNoVersionMetadata() throws Throwable { IOUtils.toString(inputStream, Charset.forName("UTF-8")).trim()); } + /** + * Ensures S3Guard and retries allow an eventually consistent read. + */ + @Test + public void testReadAfterEventuallyConsistentWrite() throws Throwable { + Assume.assumeTrue("S3Guard must be enabled", fs.hasMetadataStore()); + AmazonS3 s3ClientSpy = Mockito.spy(fs.getAmazonS3Client()); + fs.setAmazonS3Client(s3ClientSpy); + final Path testpath1 = + writeEventuallyConsistentFileVersion("eventually1.dat", + s3ClientSpy, TEST_MAX_RETRIES, 0 , 0); + try (FSDataInputStream instream1 = fs.open(testpath1)) { + // succeeds on the last retry + instream1.read(); + } + + final Path testpath2 = + writeEventuallyConsistentFileVersion("eventually2.dat", + s3ClientSpy, TEST_MAX_RETRIES + 1, 0, 0); + try (FSDataInputStream instream2 = fs.open(testpath2)) { + if (expectedExceptionInteractions.contains( + InteractionType.EVENTUALLY_CONSISTENT_READ)) { + // keeps retrying and eventually gives up with RemoteFileChangedException + intercept(RemoteFileChangedException.class, "", "read()", + () -> { + instream2.read(); + }); + } else { + instream2.read(); + } + } + } + + /** + * Ensures read on re-open (after seek backwards) when S3 does not return the + * version of the file tracked in the metadata store fails immediately. No + * retries should happen since a retry is not expected to recover. + */ + @Test + public void testEventuallyConsistentReadOnReopen() throws Throwable { + Assume.assumeTrue("S3Guard must be enabled", fs.hasMetadataStore()); + AmazonS3 s3ClientSpy = Mockito.spy(fs.getAmazonS3Client()); + fs.setAmazonS3Client(s3ClientSpy); + String filename = "eventually-reopen.dat"; + final Path testpath = + writeEventuallyConsistentFileVersion(filename, + s3ClientSpy, 0, 0, 0); + try (FSDataInputStream instream = fs.open(testpath)) { + instream.read(); + // overwrite the file, returning inconsistent version for + // (effectively) infinite retries + writeEventuallyConsistentFileVersion(filename, s3ClientSpy, + Integer.MAX_VALUE, 0, 0); + instream.seek(0); + if (expectedExceptionInteractions.contains(InteractionType.READ)) { + // if it retries at all, it will retry forever, which should fail + // the test. The expected behavior is immediate + // RemoteFileChangedException. + intercept(RemoteFileChangedException.class, "", "read()", + () -> { + instream.read(); + }); + } else { + instream.read(); + } + } + } + /** * Writes a file with old ETag and versionId in the metadata store such * that the metadata is out of sync with S3. Attempts to read such a file @@ -404,6 +602,221 @@ private Path writeOutOfSyncFileVersion(String filename) throws IOException { return testpath; } + /** + * Writes a file where the file will be inconsistent in S3 for some duration. + * The duration of the inconsistency is controlled by the + * getObjectInconsistencyCount, getMetdataInconsistencyCount, and + * copyInconistentCallCount parameters. The inconsistency manifests in + * AmazonS3#getObject, AmazonS3#getObjectMetadata, and AmazonS3#copyObject. + * This method sets up the provided s3ClientSpy to return a response to each + * of these methods indicating an inconsistency where the requested object + * version (eTag or versionId) is not available until a certain retry + * threshold is met. Providing inconsistent call count values above or + * below the overall retry limit allows a test to simulate a condition that + * either should or should not result in an overall failure from retry + * exhaustion. + */ + private Path writeEventuallyConsistentFileVersion(String filename, + AmazonS3 s3ClientSpy, + int getObjectInconsistencyCount, + int getMetdataInconsistencyCount, + int copyInconsistencyCount) + throws IOException { + final Path testpath = path(filename); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PrintWriter printWriter = new PrintWriter(out); + printWriter.println(TEST_DATA); + printWriter.close(); + final byte[] dataset = out.toByteArray(); + writeDataset(fs, testpath, dataset, dataset.length, + 1024, true); + S3AFileStatus originalStatus = (S3AFileStatus) fs.getFileStatus(testpath); + + // overwrite with half the content + writeDataset(fs, testpath, dataset, dataset.length / 2, + 1024, true); + + S3AFileStatus newStatus = (S3AFileStatus) fs.getFileStatus(testpath); + + stubTemporaryUnavailable(s3ClientSpy, getObjectInconsistencyCount, + testpath, newStatus); + + stubTemporaryWrongVersion(s3ClientSpy, getObjectInconsistencyCount, + testpath, originalStatus); + + stubTemporaryCopyInconsistency(s3ClientSpy, testpath, newStatus, + copyInconsistencyCount); + + stubTemporaryMetadataInconsistency(s3ClientSpy, testpath, originalStatus, + newStatus, getMetdataInconsistencyCount); + + return testpath; + } + + /** + * Stubs {@link AmazonS3#getObject(GetObjectRequest)} + * within s3ClientSpy to return null until inconsistentCallCount calls have + * been made. The null response simulates what occurs when an object + * matching the specified ETag or versionId is not available. + * @param s3ClientSpy the spy to stub + * @param inconsistentCallCount the number of calls that should return the + * null response + * @param testpath the path of the object the stub should apply to + */ + private void stubTemporaryUnavailable(AmazonS3 s3ClientSpy, + int inconsistentCallCount, Path testpath, + S3AFileStatus newStatus) { + Answer temporarilyUnavailableAnswer = new Answer() { + private int callCount = 0; + + @Override + public S3Object answer(InvocationOnMock invocation) throws Throwable { + // simulates ETag or versionId constraint not met until + // inconsistentCallCount surpassed + callCount++; + if (callCount <= inconsistentCallCount) { + return null; + } + return (S3Object) invocation.callRealMethod(); + } + }; + + // match the requests that would be made in either server-side change + // detection mode + doAnswer(temporarilyUnavailableAnswer).when(s3ClientSpy) + .getObject( + matchingGetObjectRequest( + testpath, newStatus.getETag(), null)); + doAnswer(temporarilyUnavailableAnswer).when(s3ClientSpy) + .getObject( + matchingGetObjectRequest( + testpath, null, newStatus.getVersionId())); + } + + /** + * Stubs {@link AmazonS3#getObject(GetObjectRequest)} + * within s3ClientSpy to return an object modified to contain metadata + * from originalStatus until inconsistentCallCount calls have been made. + * @param s3ClientSpy the spy to stub + * @param testpath the path of the object the stub should apply to + * @param inconsistentCallCount the number of calls that should return the + * null response + * @param originalStatus the status metadata to inject into the + * inconsistentCallCount responses + */ + private void stubTemporaryWrongVersion(AmazonS3 s3ClientSpy, + int inconsistentCallCount, Path testpath, + S3AFileStatus originalStatus) { + Answer temporarilyWrongVersionAnswer = new Answer() { + private int callCount = 0; + + @Override + public S3Object answer(InvocationOnMock invocation) throws Throwable { + // simulates old ETag or versionId until inconsistentCallCount surpassed + callCount++; + S3Object s3Object = (S3Object) invocation.callRealMethod(); + if (callCount <= inconsistentCallCount) { + S3Object objectSpy = Mockito.spy(s3Object); + ObjectMetadata metadataSpy = + Mockito.spy(s3Object.getObjectMetadata()); + when(objectSpy.getObjectMetadata()).thenReturn(metadataSpy); + when(metadataSpy.getETag()).thenReturn(originalStatus.getETag()); + when(metadataSpy.getVersionId()) + .thenReturn(originalStatus.getVersionId()); + return objectSpy; + } + return s3Object; + } + }; + + // match requests that would be made in client-side change detection + doAnswer(temporarilyWrongVersionAnswer).when(s3ClientSpy).getObject( + matchingGetObjectRequest(testpath, null, null)); + } + + /** + * Stubs {@link AmazonS3#copyObject(CopyObjectRequest)} + * within s3ClientSpy to throw precondition failed error until + * copyInconistentCallCount calls have been made. + * @param s3ClientSpy the spy to stub + * @param testpath the path of the object the stub should apply to + * @param newStatus the status metadata containing the ETag and versionId + * that should be matched in order for the stub to apply + * @param copyInconistentCallCount how many times to return the + * precondition failed error + */ + private void stubTemporaryCopyInconsistency(AmazonS3 s3ClientSpy, + Path testpath, S3AFileStatus newStatus, + int copyInconistentCallCount) { + Answer temporarilyPreconditionsNotMetAnswer = + new Answer() { + private int callCount = 0; + + @Override + public CopyObjectResult answer(InvocationOnMock invocation) + throws Throwable { + callCount++; + if (callCount <= copyInconistentCallCount) { + AmazonServiceException exception = + new AmazonServiceException("preconditions not met"); + exception.setStatusCode(SC_PRECONDITION_FAILED); + throw exception; + } + return (CopyObjectResult) invocation.callRealMethod(); + } + }; + + // match requests made during copy + doAnswer(temporarilyPreconditionsNotMetAnswer).when(s3ClientSpy).copyObject( + matchingCopyObjectRequest(testpath, newStatus.getETag(), null)); + doAnswer(temporarilyPreconditionsNotMetAnswer).when(s3ClientSpy).copyObject( + matchingCopyObjectRequest(testpath, null, newStatus.getVersionId())); + } + + /** + * Stubs {@link AmazonS3#getObjectMetadata(GetObjectMetadataRequest)} + * within s3ClientSpy to return metadata from originalStatus until + * metadataInconsistentCallCount calls have been made. + * @param s3ClientSpy the spy to stub + * @param testpath the path of the object the stub should apply to + * @param originalStatus the inconsistent status metadata to return + * @param newStatus the status metadata to return after + * metadataInconsistentCallCount is met + * @param metadataInconsistentCallCount how many times to return the + * inconsistent metadata + */ + private void stubTemporaryMetadataInconsistency(AmazonS3 s3ClientSpy, + Path testpath, S3AFileStatus originalStatus, + S3AFileStatus newStatus, int metadataInconsistentCallCount) { + Answer temporarilyOldMetadataAnswer = + new Answer() { + private int callCount = 0; + + @Override + public ObjectMetadata answer(InvocationOnMock invocation) + throws Throwable { + ObjectMetadata objectMetadata = + (ObjectMetadata) invocation.callRealMethod(); + callCount++; + if (callCount <= metadataInconsistentCallCount) { + ObjectMetadata metadataSpy = + Mockito.spy(objectMetadata); + when(metadataSpy.getETag()).thenReturn(originalStatus.getETag()); + when(metadataSpy.getVersionId()) + .thenReturn(originalStatus.getVersionId()); + return metadataSpy; + } + return objectMetadata; + } + }; + + // match requests made during select + doAnswer(temporarilyOldMetadataAnswer).when(s3ClientSpy).getObjectMetadata( + matchingMetadataRequest(testpath, null)); + doAnswer(temporarilyOldMetadataAnswer).when(s3ClientSpy).getObjectMetadata( + matchingMetadataRequest(testpath, newStatus.getVersionId())); + } + /** * Writes a file with null ETag and versionId in the metadata store. */ @@ -432,7 +845,8 @@ private Path writeFileWithNoVersionMetadata(String filename) * The test is invalid if the policy uses versionId but the bucket doesn't * have versioning enabled. */ - private void skipIfVersionPolicyAndNoVersionId(Path testpath) throws IOException { + private void skipIfVersionPolicyAndNoVersionId(Path testpath) + throws IOException { if (fs.getChangeDetectionPolicy().getSource() == Source.VersionId) { // skip versionId tests if the bucket doesn't have object versioning // enabled @@ -441,4 +855,71 @@ private void skipIfVersionPolicyAndNoVersionId(Path testpath) throws IOException fs.getObjectMetadata(fs.pathToKey(testpath)).getVersionId() != null); } } + + private GetObjectRequest matchingGetObjectRequest(Path path, String eTag, + String versionId) { + return ArgumentMatchers.argThat(request -> { + if (request.getBucketName().equals(fs.getBucket()) + && request.getKey().equals(fs.pathToKey(path))) { + if (eTag == null && !request.getMatchingETagConstraints().isEmpty()) { + return false; + } + if (eTag != null && + !request.getMatchingETagConstraints().contains(eTag)) { + return false; + } + if (versionId == null && request.getVersionId() != null) { + return false; + } + if (versionId != null && !versionId.equals(request.getVersionId())) { + return false; + } + return true; + } + return false; + }); + } + + private CopyObjectRequest matchingCopyObjectRequest(Path path, String eTag, + String versionId) { + return ArgumentMatchers.argThat(request -> { + if (request.getSourceBucketName().equals(fs.getBucket()) + && request.getSourceKey().equals(fs.pathToKey(path))) { + if (eTag == null && !request.getMatchingETagConstraints().isEmpty()) { + return false; + } + if (eTag != null && + !request.getMatchingETagConstraints().contains(eTag)) { + return false; + } + if (versionId == null && request.getSourceVersionId() != null) { + return false; + } + if (versionId != null && + !versionId.equals(request.getSourceVersionId())) { + return false; + } + return true; + } + return false; + }); + } + + private GetObjectMetadataRequest matchingMetadataRequest(Path path, + String versionId) { + return ArgumentMatchers.argThat(request -> { + if (request.getBucketName().equals(fs.getBucket()) + && request.getKey().equals(fs.pathToKey(path))) { + if (versionId == null && request.getVersionId() != null) { + return false; + } + if (versionId != null && + !versionId.equals(request.getVersionId())) { + return false; + } + return true; + } + return false; + }); + } } From f45bf3e806f37cbb95306ea8186d472435fc25af Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 8 May 2019 18:53:24 +0100 Subject: [PATCH 03/13] HADOOP-16085 S3Guard to support version tracking +add stevel review (primarily of tests) Change-Id: I75a3b70917eefc0a0ec3190ca1de527e2081551e --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 14 +-- .../fs/s3a/impl/ChangeDetectionPolicy.java | 2 +- .../site/markdown/tools/hadoop-aws/s3guard.md | 46 ++++---- .../hadoop/fs/s3a/ITestS3ADelayedFNF.java | 1 + .../hadoop/fs/s3a/ITestS3AInconsistency.java | 1 + .../fs/s3a/ITestS3ARemoteFileChanged.java | 108 +++++++++++------- .../TestObjectChangeDetectionAttributes.java | 18 ++- .../fs/s3a/select/ITestS3SelectLandsat.java | 4 +- 8 files changed, 111 insertions(+), 83 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index c1b14644a399e..75bef792400ec 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -2968,6 +2968,13 @@ private CopyResult copyFile(String srcKey, String dstKey, long size, } }; + ChangeTracker changeTracker = new ChangeTracker( + keyToQualifiedPath(srcKey).toString(), + changeDetectionPolicy, + readContext.instrumentation.newInputStreamStatistics() + .getVersionMismatchCounter(), + srcAttributes); + Invoker readInvoker = readContext.getReadInvoker(); return readInvoker.retry( "\"copyFile(\" + srcKey + \", \" + dstKey + \")\", srcKey", null, @@ -2977,13 +2984,6 @@ private CopyResult copyFile(String srcKey, String dstKey, long size, ObjectMetadata dstom = cloneObjectMetadata(srcom); setOptionalObjectMetadata(dstom); - ChangeTracker changeTracker = new ChangeTracker( - keyToQualifiedPath(srcKey).toString(), - changeDetectionPolicy, - readContext.instrumentation.newInputStreamStatistics() - .getVersionMismatchCounter(), - srcAttributes); - changeTracker.processMetadata(srcom, "copy", 0); CopyObjectRequest copyObjectRequest = diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java index 7972183796aa3..d77513d9a822f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java @@ -25,7 +25,6 @@ import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.transfer.model.CopyResult; import com.google.common.annotations.VisibleForTesting; -import org.apache.hadoop.fs.s3a.S3ObjectAttributes; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -33,6 +32,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.S3ObjectAttributes; import org.apache.hadoop.fs.s3a.RemoteFileChangedException; import static org.apache.hadoop.fs.s3a.Constants.*; diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md index 84550ff011d3c..3d7979cdd3deb 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md @@ -1120,16 +1120,16 @@ server or client mode is used is determed by the ### No Versioning Metadata Available -When the first S3AFileSystem clients are upgraded to the version of -S3AFileSystem that contains these change tracking features, any existing -S3Guard metadata will not contain ETags or object version IDs. Reads of files +When the first S3AFileSystem clients are upgraded to a version of +`S3AFileSystem` that contains these change tracking features, any existing +S3Guard metadata will not contain ETags or object version IDs. Reads of files tracked in such S3Guard metadata will access whatever version of the file is -available in S3 at the time of read. Only if the file is subsequently updated +available in S3 at the time of read. Only if the file is subsequently updated will S3Guard start tracking ETag and object version ID and as such generating `RemoteFileChangedException` if an inconsistency is detected. Similarly, when S3Guard metadata is pruned, S3Guard will no longer be able to -detect an inconsistent read. S3Guard metadata should be retained for at least +detect an inconsistent read. S3Guard metadata should be retained for at least as long as the perceived possible read-after-overwrite temporary inconsistency window. That window is expected to be short, but there are no guarantees so it is at the administrator's discretion to weigh the risk. @@ -1139,43 +1139,43 @@ is at the administrator's discretion to weigh the risk. #### S3 Select S3 Select does not provide a capability for server-side ETag or object -version ID qualification. Whether fs.s3a.change.detection.mode is client or -server, S3Guard will cause a client-side check of the file version before -opening the file with S3 Select. If the current version does not match the +version ID qualification. Whether `fs.s3a.change.detection.mode` is "client" or +"server", S3Guard will cause a client-side check of the file version before +opening the file with S3 Select. If the current version does not match the version tracked in S3Guard, `RemoteFileChangedException` is thrown. It is still possible that the S3 Select read will access a different version of the file, if the visible file version changes between the version check and -the opening of the file. This can happen due to eventual consistency or +the opening of the file. This can happen due to eventual consistency or an overwrite of the file between the version check and the open of the file. #### Rename -Rename is implemented via copy in S3. With fs.s3a.change.detection.mode=client, -a fully reliable mechansim for ensuring the copied content is the expected +Rename is implemented via copy in S3. With `fs.s3a.change.detection.mode` set +to "client", a fully reliable mechansim for ensuring the copied content is the expected content is not possible. This is the case since there isn't necessarily a way to know the expected ETag or version ID to appear on the object resulting from the copy. -Furthermore, if fs.s3a.change.detection.mode=server and a third-party S3 -implemntation is used that doesn't honor the provided ETag or version ID, +Furthermore, if `fs.s3a.change.detection.mode` is "server" and a third-party S3 +implementation is used that doesn't honor the provided ETag or version ID, S3AFileSystem and S3Guard cannot detect it. -In either fs.s3.change.detection.mode=server or client, a client-side check +When `fs.s3.change.detection.mode` is "client", a client-side check will be performed before the copy to ensure the current version of the file -matches S3Guard metadata. If not, `RemoteFileChangedException` is thrown. +matches S3Guard metadata. If not, `RemoteFileChangedException` is thrown. Similar to as discussed with regard to S3 Select, this is not sufficient to guarantee that same version is the version copied. -When fs.s3.change.detection.mode=server, the expected version is also specified -in the underlying S3 CopyObjectRequest. As long as the server honors it, the +When `fs.s3.change.detection.mode` server, the expected version is also specified +in the underlying S3 `CopyObjectRequest`. As long as the server honors it, the copied object will be correct. -All this said, with the defaults of fs.s3.change.detection.mode=server and -fs.s3.change.detection.source=etag against Amazon's S3, copy should in fact +All this said, with the defaults of `fs.s3.change.detection.mode` of "server" and +`fs.s3.change.detection.source` of "etag", when working with Amazon's S3, copy should in fact either copy the expected file version or, in the case of an eventual consistency -anomaly, generate `RemoteFileChangedException`. The same should be true with -fs.s3.change.detection.source=versionid. +anomaly, generate `RemoteFileChangedException`. The same should be true when +`fs.s3.change.detection.source` = "versionid". #### Out of Sync Metadata @@ -1195,8 +1195,8 @@ If this happens, reads of the affected file(s) will result in * the S3Guard metadata is pruned The S3Guard metadata for a file can be corrected with the `s3guard import` -command as discussed above. The command can take a file URI instead of a -bucket URI to correct the metadata for a single file. For example: +command as discussed above. The command can take a file URI instead of a +bucket URI to correct the metadata for a single file. For example: ```bash hadoop s3guard import [-meta URI] s3a://my-bucket/file-with-bad-metadata diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ADelayedFNF.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ADelayedFNF.java index 94c5233f19a15..870172ec3e12e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ADelayedFNF.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ADelayedFNF.java @@ -25,6 +25,7 @@ import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source; import org.apache.hadoop.test.LambdaTestUtils; + import org.junit.Assume; import org.junit.Test; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java index c82a8b9789486..da671030c2f54 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java @@ -29,6 +29,7 @@ import org.apache.hadoop.fs.s3a.s3guard.MetadataStore; import org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore; import org.apache.hadoop.test.LambdaTestUtils; + import org.junit.Assume; import org.junit.Test; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java index 1685daaa74e50..ffa9e805d5099 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java @@ -50,6 +50,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source; import org.apache.hadoop.fs.s3a.s3guard.LocalMetadataStore; import org.apache.hadoop.fs.s3a.s3guard.MetadataStore; @@ -58,18 +59,10 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset; -import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_MODE; -import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_MODE_CLIENT; -import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_MODE_NONE; -import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_MODE_SERVER; -import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_MODE_WARN; -import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_SOURCE; -import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_SOURCE_ETAG; -import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_SOURCE_VERSION_ID; -import static org.apache.hadoop.fs.s3a.Constants.RETRY_LIMIT; -import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL; +import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBucketOverrides; +import static org.apache.hadoop.fs.s3a.select.SelectConstants.S3_SELECT_CAPABILITY; import static org.apache.hadoop.fs.s3a.select.SelectConstants.SELECT_SQL; import static org.apache.hadoop.test.LambdaTestUtils.eventually; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -89,6 +82,7 @@ public class ITestS3ARemoteFileChanged extends AbstractS3ATestBase { private static final String TEST_DATA = "Some test data"; private static final int TEST_MAX_RETRIES = 5; + private static final String TEST_RETRY_INTERVAL = "100ms"; private static final String QUOTED_TEST_DATA = "\"" + TEST_DATA + "\""; @@ -106,7 +100,7 @@ private enum InteractionType { private final Collection expectedExceptionInteractions; private S3AFileSystem fs; - @Parameterized.Parameters + @Parameterized.Parameters(name = "{index}: source={0}; mode={1}") public static Collection params() { return Arrays.asList(new Object[][]{ // make sure it works with invalid config @@ -191,13 +185,16 @@ protected Configuration createConfiguration() { String bucketName = getTestBucketName(conf); removeBucketOverrides(bucketName, conf, CHANGE_DETECT_SOURCE, - CHANGE_DETECT_MODE); + CHANGE_DETECT_MODE, + RETRY_LIMIT, + RETRY_INTERVAL); conf.set(CHANGE_DETECT_SOURCE, changeDetectionSource); conf.set(CHANGE_DETECT_MODE, changeDetectionMode); // reduce retry limit so FileNotFoundException cases timeout faster, // speeding up the tests conf.setInt(RETRY_LIMIT, TEST_MAX_RETRIES); + conf.set(RETRY_INTERVAL, TEST_RETRY_INTERVAL); if (conf.getClass(S3_METADATA_STORE_IMPL, MetadataStore.class) == NullMetadataStore.class) { @@ -215,6 +212,8 @@ protected Configuration createConfiguration() { */ @Test public void testReadFileChangedStreamOpen() throws Throwable { + describe("Tests reading a file that is changed while the reader's" + + "InputStream is open."); final int originalLength = 8192; final byte[] originalDataset = dataset(originalLength, 'a', 32); final int newLength = originalLength + 1; @@ -303,18 +302,19 @@ public void testReadFileChangedStreamOpen() throws Throwable { */ @Test public void testReadFileChangedOutOfSyncMetadata() throws Throwable { - final Path testpath = writeOutOfSyncFileVersion("read.dat"); + final Path testpath = writeOutOfSyncFileVersion("fileChangedOutOfSync.dat"); skipIfVersionPolicyAndNoVersionId(testpath); - final FSDataInputStream instream = fs.open(testpath); - if (expectedExceptionInteractions.contains(InteractionType.READ)) { - intercept(RemoteFileChangedException.class, "", "read()", - () -> { - instream.read(); - }); - } else { - instream.read(); + try (final FSDataInputStream instream = fs.open(testpath)) { + if (expectedExceptionInteractions.contains(InteractionType.READ)) { + intercept(RemoteFileChangedException.class, "", "read()", + () -> { + instream.read(); + }); + } else { + instream.read(); + } } } @@ -328,9 +328,9 @@ public void testReadWithNoVersionMetadata() throws Throwable { skipIfVersionPolicyAndNoVersionId(testpath); - final FSDataInputStream instream = fs.open(testpath); - assertEquals(TEST_DATA, - IOUtils.toString(instream, Charset.forName("UTF-8")).trim()); + assertEquals("Contents of " + testpath, + TEST_DATA, + ContractTestUtils.readUTF8(fs, testpath, TEST_DATA.length())); } /** @@ -339,6 +339,7 @@ public void testReadWithNoVersionMetadata() throws Throwable { */ @Test public void testSelectChangedFile() throws Throwable { + requireS3Select(); final Path testpath = writeOutOfSyncFileVersion("select.dat"); skipIfVersionPolicyAndNoVersionId(testpath); @@ -349,7 +350,10 @@ public void testSelectChangedFile() throws Throwable { .must(SELECT_SQL, "SELECT * FROM S3OBJECT").build()); } else { fs.openFile(testpath) - .must(SELECT_SQL, "SELECT * FROM S3OBJECT").build().get(); + .must(SELECT_SQL, "SELECT * FROM S3OBJECT") + .build() + .get() + .close(); } } @@ -360,7 +364,9 @@ public void testSelectChangedFile() throws Throwable { */ @Test public void testSelectEventuallyConsistentFile() throws Throwable { - Assume.assumeTrue("S3Guard must be enabled", fs.hasMetadataStore()); + describe("Eventually Consistent S3 Select"); + requireS3Guard(); + requireS3Select(); AmazonS3 s3ClientSpy = Mockito.spy(fs.getAmazonS3Client()); fs.setAmazonS3Client(s3ClientSpy); @@ -372,7 +378,10 @@ public void testSelectEventuallyConsistentFile() throws Throwable { // should succeed since the inconsistency doesn't last longer than the // configured retry limit fs.openFile(testpath1) - .must(SELECT_SQL, "SELECT * FROM S3OBJECT").build().get(); + .must(SELECT_SQL, "SELECT * FROM S3OBJECT") + .build() + .get() + .close(); // TEST_MAX_RETRIES + 2 is required to before failure since there is // one getObjectMetadata() call due to getFileStatus() before the @@ -388,7 +397,10 @@ public void testSelectEventuallyConsistentFile() throws Throwable { .must(SELECT_SQL, "SELECT * FROM S3OBJECT").build()); } else { fs.openFile(testpath2) - .must(SELECT_SQL, "SELECT * FROM S3OBJECT").build().get(); + .must(SELECT_SQL, "SELECT * FROM S3OBJECT") + .build() + .get() + .close(); } } @@ -398,15 +410,17 @@ public void testSelectEventuallyConsistentFile() throws Throwable { */ @Test public void testSelectWithNoVersionMetadata() throws Throwable { + requireS3Select(); final Path testpath = writeFileWithNoVersionMetadata("selectnoversion.dat"); skipIfVersionPolicyAndNoVersionId(testpath); - FSDataInputStream instream = fs.openFile(testpath) - .must(SELECT_SQL, "SELECT * FROM S3OBJECT").build().get(); - assertEquals(QUOTED_TEST_DATA, - IOUtils.toString(instream, Charset.forName("UTF-8")).trim()); + try (FSDataInputStream instream = fs.openFile(testpath) + .must(SELECT_SQL, "SELECT * FROM S3OBJECT").build().get()) { + assertEquals(QUOTED_TEST_DATA, + IOUtils.toString(instream, Charset.forName("UTF-8")).trim()); + } } /** @@ -438,7 +452,7 @@ public void testRenameChangedFile() throws Throwable { */ @Test public void testRenameEventuallyConsistentFile() throws Throwable { - Assume.assumeTrue("S3Guard must be enabled", fs.hasMetadataStore()); + requireS3Guard(); AmazonS3 s3ClientSpy = Mockito.spy(fs.getAmazonS3Client()); fs.setAmazonS3Client(s3ClientSpy); @@ -507,7 +521,7 @@ public void testRenameWithNoVersionMetadata() throws Throwable { */ @Test public void testReadAfterEventuallyConsistentWrite() throws Throwable { - Assume.assumeTrue("S3Guard must be enabled", fs.hasMetadataStore()); + requireS3Guard(); AmazonS3 s3ClientSpy = Mockito.spy(fs.getAmazonS3Client()); fs.setAmazonS3Client(s3ClientSpy); final Path testpath1 = @@ -542,7 +556,7 @@ public void testReadAfterEventuallyConsistentWrite() throws Throwable { */ @Test public void testEventuallyConsistentReadOnReopen() throws Throwable { - Assume.assumeTrue("S3Guard must be enabled", fs.hasMetadataStore()); + requireS3Guard(); AmazonS3 s3ClientSpy = Mockito.spy(fs.getAmazonS3Client()); fs.setAmazonS3Client(s3ClientSpy); String filename = "eventually-reopen.dat"; @@ -737,17 +751,17 @@ public S3Object answer(InvocationOnMock invocation) throws Throwable { /** * Stubs {@link AmazonS3#copyObject(CopyObjectRequest)} * within s3ClientSpy to throw precondition failed error until - * copyInconistentCallCount calls have been made. + * copyInconsistentCallCount calls have been made. * @param s3ClientSpy the spy to stub * @param testpath the path of the object the stub should apply to * @param newStatus the status metadata containing the ETag and versionId * that should be matched in order for the stub to apply - * @param copyInconistentCallCount how many times to return the + * @param copyInconsistentCallCount how many times to return the * precondition failed error */ private void stubTemporaryCopyInconsistency(AmazonS3 s3ClientSpy, Path testpath, S3AFileStatus newStatus, - int copyInconistentCallCount) { + int copyInconsistentCallCount) { Answer temporarilyPreconditionsNotMetAnswer = new Answer() { private int callCount = 0; @@ -756,7 +770,7 @@ private void stubTemporaryCopyInconsistency(AmazonS3 s3ClientSpy, public CopyObjectResult answer(InvocationOnMock invocation) throws Throwable { callCount++; - if (callCount <= copyInconistentCallCount) { + if (callCount <= copyInconsistentCallCount) { AmazonServiceException exception = new AmazonServiceException("preconditions not met"); exception.setStatusCode(SC_PRECONDITION_FAILED); @@ -922,4 +936,20 @@ private GetObjectMetadataRequest matchingMetadataRequest(Path path, return false; }); } + + /** + * Skip a test case if it needs S3Guard and the filesystem does + * not have it. + */ + private void requireS3Guard() { + Assume.assumeTrue("S3Guard must be enabled", fs.hasMetadataStore()); + } + + /** + * Skip a test case if S3 Select is not supported on this store. + */ + private void requireS3Select() { + Assume.assumeTrue("S3 Select is not enabled", + getFileSystem().hasCapability(S3_SELECT_CAPABILITY)); + } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestObjectChangeDetectionAttributes.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestObjectChangeDetectionAttributes.java index d8a76065375bd..f24f63daaa197 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestObjectChangeDetectionAttributes.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestObjectChangeDetectionAttributes.java @@ -68,7 +68,8 @@ /** * Unit tests to ensure object eTag and versionId are captured on S3 PUT and - * used on GET. Further (integration) testing is performed in + * used on GET. + * Further (integration) testing is performed in * {@link org.apache.hadoop.fs.s3a.ITestS3ARemoteFileChanged}. */ @RunWith(Parameterized.class) @@ -79,7 +80,7 @@ public TestObjectChangeDetectionAttributes(String changeDetectionSource) { this.changeDetectionSource = changeDetectionSource; } - @Parameterized.Parameters + @Parameterized.Parameters(name = "change={0}") public static Collection params() { return Arrays.asList(new Object[][]{ {CHANGE_DETECT_SOURCE_ETAG}, @@ -87,11 +88,6 @@ public static Collection params() { }); } - @Override - public void setup() throws Exception { - super.setup(); - } - @Override public Configuration createConfiguration() { Configuration conf = super.createConfiguration(); @@ -159,14 +155,14 @@ public void testCreateAndReadFileMultiPart() throws Exception { assertContent(file, path, content, eTag, versionId); // test overwrite - byte[] newConent = new byte[Constants.MULTIPART_MIN_SIZE + 1]; - Arrays.fill(newConent, (byte) 1); + byte[] newContent = new byte[Constants.MULTIPART_MIN_SIZE + 1]; + Arrays.fill(newContent, (byte) 1); String newETag = "newETag"; String newVersionId = "newVersionId"; - multipartUpload(file, path, newConent, newETag, newVersionId); + multipartUpload(file, path, newContent, newETag, newVersionId); assertVersionAttributes(path, newETag, newVersionId); - assertContent(file, path, newConent, newETag, newVersionId); + assertContent(file, path, newContent, newETag, newVersionId); } private void putObject(String file, Path path, byte[] content, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java index 1d45852c98946..2099edd248b63 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java @@ -21,8 +21,6 @@ import java.io.IOException; import java.util.List; -import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; -import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source; import org.junit.Assume; import org.junit.Test; import org.slf4j.Logger; @@ -35,6 +33,8 @@ import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathIOException; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source; import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.fs.s3a.S3AInstrumentation; import org.apache.hadoop.fs.s3a.S3ATestUtils; From 3824980c034a4d24df467095627e865c4c8dbc14 Mon Sep 17 00:00:00 2001 From: Ben Roling Date: Wed, 8 May 2019 16:27:31 -0500 Subject: [PATCH 04/13] Fix test failures when bucket versioning disabled --- .../fs/s3a/ITestS3ARemoteFileChanged.java | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java index ffa9e805d5099..0f67f6e38b47e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java @@ -51,6 +51,7 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Mode; import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source; import org.apache.hadoop.fs.s3a.s3guard.LocalMetadataStore; import org.apache.hadoop.fs.s3a.s3guard.MetadataStore; @@ -128,6 +129,9 @@ public static Collection params() { InteractionType.EVENTUALLY_CONSISTENT_READ, InteractionType.READ_AFTER_DELETE, InteractionType.COPY, + // not InteractionType.EVENTUALLY_CONSISTENT_COPY as copy change + // detection can't really occur client-side. The eTag of + // the new object can't be expected to match. InteractionType.SELECT)}, {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_WARN, Arrays.asList( @@ -155,6 +159,9 @@ public static Collection params() { InteractionType.READ_AFTER_DELETE, InteractionType.EVENTUALLY_CONSISTENT_READ, InteractionType.COPY, + // not InteractionType.EVENTUALLY_CONSISTENT_COPY as copy change + // detection can't really occur client-side. The versionId of + // the new object can't be expected to match. InteractionType.SELECT)}, {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_WARN, @@ -527,6 +534,9 @@ public void testReadAfterEventuallyConsistentWrite() throws Throwable { final Path testpath1 = writeEventuallyConsistentFileVersion("eventually1.dat", s3ClientSpy, TEST_MAX_RETRIES, 0 , 0); + + skipIfVersionPolicyAndNoVersionId(testpath1); + try (FSDataInputStream instream1 = fs.open(testpath1)) { // succeeds on the last retry instream1.read(); @@ -563,6 +573,9 @@ public void testEventuallyConsistentReadOnReopen() throws Throwable { final Path testpath = writeEventuallyConsistentFileVersion(filename, s3ClientSpy, 0, 0, 0); + + skipIfVersionPolicyAndNoVersionId(testpath); + try (FSDataInputStream instream = fs.open(testpath)) { instream.read(); // overwrite the file, returning inconsistent version for @@ -658,8 +671,12 @@ private Path writeEventuallyConsistentFileVersion(String filename, stubTemporaryWrongVersion(s3ClientSpy, getObjectInconsistencyCount, testpath, originalStatus); - stubTemporaryCopyInconsistency(s3ClientSpy, testpath, newStatus, - copyInconsistencyCount); + if (fs.getChangeDetectionPolicy().getMode() == Mode.Server) { + // only stub inconsistency when mode is server since no constraints that + // should trigger inconsistency are passed in any other mode + stubTemporaryCopyInconsistency(s3ClientSpy, testpath, newStatus, + copyInconsistencyCount); + } stubTemporaryMetadataInconsistency(s3ClientSpy, testpath, originalStatus, newStatus, getMetdataInconsistencyCount); From 50f52ea73499cdfedbfa8a836dc939beef2b5364 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 9 May 2019 15:13:30 +0100 Subject: [PATCH 05/13] HADOOP-16085-steve-patch-002 -move CopyOutcome -add new (failing) test for directory renaming where one of the files underneath is eventually consistent. -pull out commonanity from test methods (spyOnFilesystem(), expectReadFailure()) -intercept() calls provide details on failure, primarily just by returning the result of the operation expected to fail. -ChangeDetectionPolicy-raised exceptions on copy failures don't include position any more -cache/restore metastore so even on test failures mockito is involved on teardown Change-Id: I1fbdc38e9083881e4d72f13f29845e34539ac4ed --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 92 ++--- .../fs/s3a/impl/ChangeDetectionPolicy.java | 9 +- .../hadoop/fs/s3a/impl/ChangeTracker.java | 19 +- .../hadoop/fs/s3a/impl/CopyOutcome.java | 80 +++++ .../fs/s3a/ITestS3ARemoteFileChanged.java | 338 ++++++++++++++---- 5 files changed, 385 insertions(+), 153 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyOutcome.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 75bef792400ec..f1552cb6a5328 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -49,6 +49,7 @@ import com.amazonaws.AmazonClientException; import com.amazonaws.AmazonServiceException; +import com.amazonaws.SdkBaseException; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.AbortMultipartUploadRequest; import com.amazonaws.services.s3.model.CannedAccessControlList; @@ -90,6 +91,7 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; +import org.apache.hadoop.fs.s3a.impl.CopyOutcome; import org.apache.hadoop.fs.s3a.select.InternalSelectConstants; import org.apache.hadoop.util.LambdaUtils; import org.apache.hadoop.fs.FileAlreadyExistsException; @@ -709,8 +711,8 @@ public S3AInputPolicy getInputPolicy() { } /** - * Get the change detection policy for this FS instance. Only public to allow - * access in tests in other packages. + * Get the change detection policy for this FS instance. + * Only public to allow access in tests in other packages. * @return the change detection policy */ @VisibleForTesting @@ -2948,11 +2950,10 @@ public List listAWSPolicyRules( * @param srcAttributes S3 attributes of the source object * @param readContext the read context * @return the result of the copy - * @throws AmazonClientException on failures inside the AWS SDK * @throws InterruptedIOException the operation was interrupted * @throws IOException Other IO problems */ - @Retries.RetryMixed + @Retries.RetryTranslated private CopyResult copyFile(String srcKey, String dstKey, long size, S3ObjectAttributes srcAttributes, S3AReadOpContext readContext) throws IOException, InterruptedIOException { @@ -2978,13 +2979,13 @@ private CopyResult copyFile(String srcKey, String dstKey, long size, Invoker readInvoker = readContext.getReadInvoker(); return readInvoker.retry( "\"copyFile(\" + srcKey + \", \" + dstKey + \")\", srcKey", null, - false, + true, () -> { ObjectMetadata srcom = getObjectMetadata(srcKey); ObjectMetadata dstom = cloneObjectMetadata(srcom); setOptionalObjectMetadata(dstom); - changeTracker.processMetadata(srcom, "copy", 0); + changeTracker.processMetadata(srcom, "copy"); CopyObjectRequest copyObjectRequest = new CopyObjectRequest(bucket, srcKey, bucket, dstKey); @@ -2995,68 +2996,29 @@ private CopyResult copyFile(String srcKey, String dstKey, long size, copyObjectRequest.setNewObjectMetadata(dstom); Copy copy = transfers.copy(copyObjectRequest); copy.addProgressListener(progressListener); - try { - CopyOutcome copyOutcome = copyOutcome(copy); - InterruptedException interruptedException = - copyOutcome.getInterruptedException(); - if (interruptedException != null) { - throw interruptedException; - } - RuntimeException runtimeException = - copyOutcome.getRuntimeException(); - if (runtimeException != null) { - changeTracker.processException(runtimeException, "copy"); - throw runtimeException; - } - CopyResult result = copyOutcome.getCopyResult(); - changeTracker.processResponse(result); - incrementWriteOperations(); - instrumentation.filesCopied(1, size); - return result; - } catch (InterruptedException e) { - throw new InterruptedIOException("Interrupted copying " + srcKey - + " to " + dstKey + ", cancelling"); + CopyOutcome copyOutcome = CopyOutcome.waitForCopy(copy); + InterruptedException interruptedException = + copyOutcome.getInterruptedException(); + if (interruptedException != null) { + // copy interrupted: convert to an IOException. + throw (IOException)new InterruptedIOException( + "Interrupted copying " + srcKey + + " to " + dstKey + ", cancelling") + .initCause(interruptedException); } + SdkBaseException awsException = copyOutcome.getAwsException(); + if (awsException != null) { + changeTracker.processException(awsException, "copy"); + throw awsException; + } + CopyResult result = copyOutcome.getCopyResult(); + changeTracker.processResponse(result); + incrementWriteOperations(); + instrumentation.filesCopied(1, size); + return result; }); } - private static CopyOutcome copyOutcome(Copy copy) { - try { - CopyResult result = copy.waitForCopyResult(); - return new CopyOutcome(result, null, null); - } catch (RuntimeException e) { - return new CopyOutcome(null, null, e); - } catch (InterruptedException e) { - return new CopyOutcome(null, e, null); - } - } - - private static final class CopyOutcome { - private final CopyResult copyResult; - private final InterruptedException interruptedException; - private final RuntimeException runtimeException; - - private CopyOutcome(CopyResult copyResult, - InterruptedException interruptedException, - RuntimeException runtimeException) { - this.copyResult = copyResult; - this.interruptedException = interruptedException; - this.runtimeException = runtimeException; - } - - public CopyResult getCopyResult() { - return copyResult; - } - - public InterruptedException getInterruptedException() { - return interruptedException; - } - - public RuntimeException getRuntimeException() { - return runtimeException; - } - } - /** * Set the optional parameters when initiating the request (encryption, * headers, storage, etc). @@ -3903,7 +3865,7 @@ private FSDataInputStream select(final Path source, .getVersionMismatchCounter(), objectAttributes); ObjectMetadata objectMetadata = getObjectMetadata(path); - changeTracker.processMetadata(objectMetadata, "select", 0); + changeTracker.processMetadata(objectMetadata, "select"); } }); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java index d77513d9a822f..c513d7c35531e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java @@ -269,6 +269,7 @@ public ImmutablePair onChangeDetected( long position, String operation, long timesAlreadyDetected) { + String positionText = position >= 0 ? (" at " + position) : ""; switch (mode) { case None: // something changed; we don't care. @@ -278,8 +279,8 @@ public ImmutablePair onChangeDetected( // only warn on the first detection to avoid a noisy log LOG.warn( String.format( - "%s change detected on %s %s at %d. Expected %s got %s", - getSource(), operation, uri, position, revisionId, + "%s change detected on %s %s%s. Expected %s got %s", + getSource(), operation, uri, positionText, revisionId, newRevisionId)); return new ImmutablePair<>(true, null); } @@ -293,9 +294,9 @@ public ImmutablePair onChangeDetected( operation, String.format("%s " + CHANGE_DETECTED - + " while reading at position %s." + + " on %s%s." + " Expected %s got %s", - getSource(), position, revisionId, newRevisionId))); + getSource(), operation, positionText, revisionId, newRevisionId))); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java index 7278307e5f1f8..47788762ee7dd 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java @@ -166,9 +166,10 @@ public void processResponse(final S3Object object, versionMismatches.incrementAndGet(); throw new RemoteFileChangedException(uri, operation, String.format(CHANGE_REPORTED_BY_S3 - + " while reading" + + " during %s" + " at position %s." + " %s %s was unavailable", + operation, pos, getSource(), getRevisionId())); @@ -177,7 +178,7 @@ public void processResponse(final S3Object object, } } - processMetadata(object.getObjectMetadata(), operation, pos); + processMetadata(object.getObjectMetadata(), operation); } /** @@ -235,17 +236,23 @@ public void processException(Exception e, String operation) throws * policy. * @param metadata metadata returned from server * @param operation operation in progress - * @param pos offset of read * @throws PathIOException raised on failure * @throws RemoteFileChangedException if the remote file has changed. */ public void processMetadata(final ObjectMetadata metadata, - final String operation, - final long pos) throws PathIOException { + final String operation) throws PathIOException { final String newRevisionId = policy.getRevisionId(metadata, uri); - processNewRevision(newRevisionId, operation, pos); + processNewRevision(newRevisionId, operation, -1); } + /** + * Validate a revision from the server against our expectations. + * @param newRevisionId new revision. + * @param operation operation in progress + * @param pos offset in the file; -1 for "none" + * @throws PathIOException raised on failure + * @throws RemoteFileChangedException if the remote file has changed. + */ private void processNewRevision(final String newRevisionId, final String operation, final long pos) throws PathIOException { if (newRevisionId == null && policy.isRequireVersion()) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyOutcome.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyOutcome.java new file mode 100644 index 0000000000000..16459ac45b850 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyOutcome.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import com.amazonaws.SdkBaseException; +import com.amazonaws.services.s3.transfer.Copy; +import com.amazonaws.services.s3.transfer.model.CopyResult; + +/** + * Extracts the outcome of a TransferManager-executed copy operation. + */ +public final class CopyOutcome { + + /** + * Result of a successful copy. + */ + private final CopyResult copyResult; + + /** the copy was interrupted. */ + private final InterruptedException interruptedException; + + /** + * The copy raised an AWS Exception of some form. + */ + private final SdkBaseException awsException; + + public CopyOutcome(CopyResult copyResult, + InterruptedException interruptedException, + SdkBaseException awsException) { + this.copyResult = copyResult; + this.interruptedException = interruptedException; + this.awsException = awsException; + } + + public CopyResult getCopyResult() { + return copyResult; + } + + public InterruptedException getInterruptedException() { + return interruptedException; + } + + public SdkBaseException getAwsException() { + return awsException; + } + + /** + * Calls {@code Copy.waitForCopyResult()} to await the result, converts + * it to a copy outcome. + * Exceptions caught and + * @param copy the copy operation. + * @return the outcome. + */ + public static CopyOutcome waitForCopy(Copy copy) { + try { + CopyResult result = copy.waitForCopyResult(); + return new CopyOutcome(result, null, null); + } catch (SdkBaseException e) { + return new CopyOutcome(null, null, e); + } catch (InterruptedException e) { + return new CopyOutcome(null, e, null); + } + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java index 0f67f6e38b47e..33f1d4da7e844 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java @@ -18,13 +18,12 @@ package org.apache.hadoop.fs.s3a; -import java.io.ByteArrayOutputStream; import java.io.FileNotFoundException; import java.io.IOException; -import java.io.PrintWriter; import java.nio.charset.Charset; import java.util.Arrays; import java.util.Collection; +import java.util.Optional; import com.amazonaws.AmazonServiceException; import com.amazonaws.services.s3.AmazonS3; @@ -34,8 +33,8 @@ import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.S3Object; +import com.google.common.base.Charsets; import org.junit.Assume; -import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -47,10 +46,10 @@ import org.slf4j.LoggerFactory; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Mode; import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source; import org.apache.hadoop.fs.s3a.s3guard.LocalMetadataStore; @@ -59,6 +58,7 @@ import org.apache.hadoop.fs.s3a.s3guard.PathMetadata; import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; +import static org.apache.hadoop.fs.contract.ContractTestUtils.readUTF8; import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName; @@ -82,11 +82,16 @@ public class ITestS3ARemoteFileChanged extends AbstractS3ATestBase { LoggerFactory.getLogger(ITestS3ARemoteFileChanged.class); private static final String TEST_DATA = "Some test data"; + + private static final byte[] TEST_DATA_BYTES = TEST_DATA.getBytes( + Charsets.UTF_8); private static final int TEST_MAX_RETRIES = 5; - private static final String TEST_RETRY_INTERVAL = "100ms"; + private static final String TEST_RETRY_INTERVAL = "10ms"; private static final String QUOTED_TEST_DATA = "\"" + TEST_DATA + "\""; + private Optional originalS3Client = Optional.empty(); + private enum InteractionType { READ, READ_AFTER_DELETE, @@ -101,7 +106,7 @@ private enum InteractionType { private final Collection expectedExceptionInteractions; private S3AFileSystem fs; - @Parameterized.Parameters(name = "{index}: source={0}; mode={1}") + @Parameterized.Parameters(name = "{0}-{1}") public static Collection params() { return Arrays.asList(new Object[][]{ // make sure it works with invalid config @@ -181,9 +186,20 @@ public ITestS3ARemoteFileChanged(String changeDetectionSource, this.expectedExceptionInteractions = expectedExceptionInteractions; } - @Before - public void setUp() { + @Override + public void setup() throws Exception { + super.setup(); fs = getFileSystem(); + // cache the original S3 client for teardown. + originalS3Client = Optional.of( + fs.getAmazonS3ClientForTesting("caching")); + } + + @Override + public void teardown() throws Exception { + // restore the s3 client so there's no mocking interfering with the teardown + originalS3Client.ifPresent(fs::setAmazonS3Client); + super.teardown(); } @Override @@ -213,13 +229,22 @@ protected Configuration createConfiguration() { return conf; } + /** + * Get the path of this method, including parameterized values. + * @return a path unique to this method and parameters + * @throws IOException failure. + */ + protected Path path() throws IOException { + return super.path(getMethodName()); + } + /** * Tests reading a file that is changed while the reader's InputStream is * open. */ @Test public void testReadFileChangedStreamOpen() throws Throwable { - describe("Tests reading a file that is changed while the reader's" + describe("Tests reading a file that is changed while the reader's " + "InputStream is open."); final int originalLength = 8192; final byte[] originalDataset = dataset(originalLength, 'a', 32); @@ -260,8 +285,7 @@ public void testReadFileChangedStreamOpen() throws Throwable { instream.seek(instream.getPos() - 100); if (expectedExceptionInteractions.contains(InteractionType.READ)) { - intercept(RemoteFileChangedException.class, "", "read", - () -> instream.read()); + expectReadFailure(instream); } else { instream.read(); } @@ -272,8 +296,7 @@ public void testReadFileChangedStreamOpen() throws Throwable { instream.seek(0); if (expectedExceptionInteractions.contains(InteractionType.READ)) { - intercept(RemoteFileChangedException.class, "", "read", - () -> instream.read(buf)); + expectReadFailure(instream); intercept(RemoteFileChangedException.class, "", "read", () -> instream.read(0, buf, 0, buf.length)); intercept(RemoteFileChangedException.class, "", "readfully", @@ -315,10 +338,7 @@ public void testReadFileChangedOutOfSyncMetadata() throws Throwable { try (final FSDataInputStream instream = fs.open(testpath)) { if (expectedExceptionInteractions.contains(InteractionType.READ)) { - intercept(RemoteFileChangedException.class, "", "read()", - () -> { - instream.read(); - }); + expectReadFailure(instream); } else { instream.read(); } @@ -337,7 +357,7 @@ public void testReadWithNoVersionMetadata() throws Throwable { assertEquals("Contents of " + testpath, TEST_DATA, - ContractTestUtils.readUTF8(fs, testpath, TEST_DATA.length())); + readUTF8(fs, testpath, -1)); } /** @@ -374,8 +394,7 @@ public void testSelectEventuallyConsistentFile() throws Throwable { describe("Eventually Consistent S3 Select"); requireS3Guard(); requireS3Select(); - AmazonS3 s3ClientSpy = Mockito.spy(fs.getAmazonS3Client()); - fs.setAmazonS3Client(s3ClientSpy); + AmazonS3 s3ClientSpy = spyOnFilesystem(); final Path testpath1 = writeEventuallyConsistentFileVersion( "select1.dat", s3ClientSpy, 0, TEST_MAX_RETRIES, 0); @@ -433,7 +452,7 @@ public void testSelectWithNoVersionMetadata() throws Throwable { /** * Tests doing a rename() on a file where the version visible in S3 does not * match the version tracked in the metadata store. - * @throws Throwable + * @throws Throwable failure */ @Test public void testRenameChangedFile() throws Throwable { @@ -443,15 +462,30 @@ public void testRenameChangedFile() throws Throwable { final Path dest = path("dest.dat"); if (expectedExceptionInteractions.contains(InteractionType.COPY)) { - intercept(RemoteFileChangedException.class, "", "copy()", - () -> { - fs.rename(testpath, dest); - }); + intercept(RemoteFileChangedException.class, "", + "expected copy() failure", + () -> fs.rename(testpath, dest)); } else { fs.rename(testpath, dest); } } + /** + * Total inconsistent response count across getObjectMetadata() and + * copyObject() for a rename. + * The split of inconsistent responses between getObjectMetadata() and + * copyObject() is somewhat arbitrary. + * @return the inconsistencies for (metadata, copy) + */ + private Pair renameInconsistencyCounts() { + int maxInconsistenciesBeforeFailure = TEST_MAX_RETRIES + 2; + int copyInconsistencyCount = versionCheckingIsOnServer() ? 2 : 0; + + int metadataInconsistencyCount = + maxInconsistenciesBeforeFailure - copyInconsistencyCount; + return Pair.of(metadataInconsistencyCount, copyInconsistencyCount); + } + /** * Tests doing a rename() on a file where the version visible in S3 does not * match the version in the metadata store until a certain number of retries @@ -460,23 +494,21 @@ public void testRenameChangedFile() throws Throwable { @Test public void testRenameEventuallyConsistentFile() throws Throwable { requireS3Guard(); - AmazonS3 s3ClientSpy = Mockito.spy(fs.getAmazonS3Client()); - fs.setAmazonS3Client(s3ClientSpy); + AmazonS3 s3ClientSpy = spyOnFilesystem(); // Total inconsistent response count across getObjectMetadata() and - // copyObject() must be TEST_MAX_RETRIES + 2 to generate a failure - // since there is one getObjectMetadata() call from innerRename() before // copyObject(). // The split of inconsistent responses between getObjectMetadata() and // copyObject() is arbitrary. - int maxInconsistenciesBeforeFailure = TEST_MAX_RETRIES + 1; - int metadataInconsistencyCount = 3; + Pair counts = renameInconsistencyCounts(); + int metadataInconsistencyCount = counts.getLeft(); + int copyInconsistencyCount = counts.getRight(); final Path testpath1 = writeEventuallyConsistentFileVersion("rename-eventually1.dat", s3ClientSpy, 0, metadataInconsistencyCount, - maxInconsistenciesBeforeFailure - metadataInconsistencyCount); + copyInconsistencyCount); skipIfVersionPolicyAndNoVersionId(testpath1); @@ -484,27 +516,82 @@ public void testRenameEventuallyConsistentFile() throws Throwable { // shouldn't fail since the inconsistency doesn't last through the // configured retry limit fs.rename(testpath1, dest1); + } + + /** + * Tests doing a rename() on a file where the version visible in S3 does not + * match the version in the metadata store until a certain number of retries + * has been met. + */ + @Test + public void testRenameEventuallyConsistentFile2() throws Throwable { + requireS3Guard(); + AmazonS3 s3ClientSpy = Mockito.spy(fs.getAmazonS3ClientForTesting("mocking")); + fs.setAmazonS3Client(s3ClientSpy); + Pair counts = renameInconsistencyCounts(); + int metadataInconsistencyCount = counts.getLeft(); + int copyInconsistencyCount = counts.getRight(); final Path testpath2 = writeEventuallyConsistentFileVersion("rename-eventually2.dat", s3ClientSpy, 0, metadataInconsistencyCount, - maxInconsistenciesBeforeFailure - metadataInconsistencyCount + 1); + copyInconsistencyCount + 1); + skipIfVersionPolicyAndNoVersionId(testpath2); final Path dest2 = path("dest2.dat"); if (expectedExceptionInteractions.contains( InteractionType.EVENTUALLY_CONSISTENT_COPY)) { // should fail since the inconsistency is set up to persist longer than // the configured retry limit - intercept(RemoteFileChangedException.class, "", "copy()", - () -> { - fs.rename(testpath2, dest2); - }); + intercept(RemoteFileChangedException.class, "", + "expected copy() failure", + () -> fs.rename(testpath2, dest2)); } else { fs.rename(testpath2, dest2); } } + /** + * Tests doing a rename() on a directory containing + * an file which is eventually consistent. + */ + @Test + public void testRenameEventuallyConsistentDirectory() throws Throwable { + requireS3Guard(); + AmazonS3 s3ClientSpy = spyOnFilesystem(); + Path basedir = path(); + Path sourcedir = new Path(basedir, "sourcedir"); + fs.mkdirs(sourcedir); + Path destdir = new Path(basedir, "destdir"); + String inconsistent = "inconsistent"; + String consistent = "consistent"; + Path inconsistentFile = new Path(sourcedir, inconsistent); + Path consistentFile = new Path(sourcedir, consistent); + + // write the consistent data + writeDataset(fs, consistentFile, TEST_DATA_BYTES, TEST_DATA_BYTES.length, + 1024, true, true); + + Pair counts = renameInconsistencyCounts(); + int metadataInconsistencyCount = counts.getLeft(); + int copyInconsistencyCount = counts.getRight(); + + writeEventuallyConsistentData( + s3ClientSpy, + inconsistentFile, + TEST_DATA_BYTES, + 0, + metadataInconsistencyCount, + copyInconsistencyCount); + + skipIfVersionPolicyAndNoVersionId(inconsistentFile); + + // must not fail since the inconsistency doesn't last through the + // configured retry limit + fs.rename(sourcedir, destdir); + } + /** * Ensures a file can be renamed when there is no version metadata * (ETag, versionId). @@ -518,9 +605,9 @@ public void testRenameWithNoVersionMetadata() throws Throwable { final Path dest = path("noversiondest.dat"); fs.rename(testpath, dest); - FSDataInputStream inputStream = fs.open(dest); - assertEquals(TEST_DATA, - IOUtils.toString(inputStream, Charset.forName("UTF-8")).trim()); + assertEquals("Contents of " + dest, + TEST_DATA, + readUTF8(fs, dest, -1)); } /** @@ -529,8 +616,7 @@ public void testRenameWithNoVersionMetadata() throws Throwable { @Test public void testReadAfterEventuallyConsistentWrite() throws Throwable { requireS3Guard(); - AmazonS3 s3ClientSpy = Mockito.spy(fs.getAmazonS3Client()); - fs.setAmazonS3Client(s3ClientSpy); + AmazonS3 s3ClientSpy = spyOnFilesystem(); final Path testpath1 = writeEventuallyConsistentFileVersion("eventually1.dat", s3ClientSpy, TEST_MAX_RETRIES, 0 , 0); @@ -541,7 +627,15 @@ public void testReadAfterEventuallyConsistentWrite() throws Throwable { // succeeds on the last retry instream1.read(); } + } + /** + * Ensures S3Guard and retries allow an eventually consistent read. + */ + @Test + public void testReadAfterEventuallyConsistentWrite2() throws Throwable { + requireS3Guard(); + AmazonS3 s3ClientSpy = spyOnFilesystem(); final Path testpath2 = writeEventuallyConsistentFileVersion("eventually2.dat", s3ClientSpy, TEST_MAX_RETRIES + 1, 0, 0); @@ -549,10 +643,7 @@ public void testReadAfterEventuallyConsistentWrite() throws Throwable { if (expectedExceptionInteractions.contains( InteractionType.EVENTUALLY_CONSISTENT_READ)) { // keeps retrying and eventually gives up with RemoteFileChangedException - intercept(RemoteFileChangedException.class, "", "read()", - () -> { - instream2.read(); - }); + expectReadFailure(instream2); } else { instream2.read(); } @@ -567,8 +658,7 @@ public void testReadAfterEventuallyConsistentWrite() throws Throwable { @Test public void testEventuallyConsistentReadOnReopen() throws Throwable { requireS3Guard(); - AmazonS3 s3ClientSpy = Mockito.spy(fs.getAmazonS3Client()); - fs.setAmazonS3Client(s3ClientSpy); + AmazonS3 s3ClientSpy = spyOnFilesystem(); String filename = "eventually-reopen.dat"; final Path testpath = writeEventuallyConsistentFileVersion(filename, @@ -587,10 +677,7 @@ public void testEventuallyConsistentReadOnReopen() throws Throwable { // if it retries at all, it will retry forever, which should fail // the test. The expected behavior is immediate // RemoteFileChangedException. - intercept(RemoteFileChangedException.class, "", "read()", - () -> { - instream.read(); - }); + expectReadFailure(instream); } else { instream.read(); } @@ -604,11 +691,7 @@ public void testEventuallyConsistentReadOnReopen() throws Throwable { */ private Path writeOutOfSyncFileVersion(String filename) throws IOException { final Path testpath = path(filename); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - PrintWriter printWriter = new PrintWriter(out); - printWriter.println(TEST_DATA); - printWriter.close(); - final byte[] dataset = out.toByteArray(); + final byte[] dataset = TEST_DATA_BYTES; writeDataset(fs, testpath, dataset, dataset.length, 1024, false); S3AFileStatus originalStatus = (S3AFileStatus) fs.getFileStatus(testpath); @@ -630,31 +713,62 @@ private Path writeOutOfSyncFileVersion(String filename) throws IOException { } /** - * Writes a file where the file will be inconsistent in S3 for some duration. + * Writes {@link #TEST_DATA} to a file where the file will be inconsistent + * in S3 for a set of operations. * The duration of the inconsistency is controlled by the - * getObjectInconsistencyCount, getMetdataInconsistencyCount, and - * copyInconistentCallCount parameters. The inconsistency manifests in - * AmazonS3#getObject, AmazonS3#getObjectMetadata, and AmazonS3#copyObject. + * getObjectInconsistencyCount, getMetadataInconsistencyCount, and + * copyInconsistentCallCount parameters. + * The inconsistency manifests in AmazonS3#getObject, + * AmazonS3#getObjectMetadata, and AmazonS3#copyObject. * This method sets up the provided s3ClientSpy to return a response to each * of these methods indicating an inconsistency where the requested object * version (eTag or versionId) is not available until a certain retry - * threshold is met. Providing inconsistent call count values above or + * threshold is met. + * Providing inconsistent call count values above or * below the overall retry limit allows a test to simulate a condition that * either should or should not result in an overall failure from retry * exhaustion. + * @param filename name of file (will be under test path) + * @param s3ClientSpy s3 client to patch + * @param getObjectInconsistencyCount number of GET inconsistencies + * @param getMetadataInconsistencyCount number of HEAD inconsistencies + * @param copyInconsistencyCount number of COPY inconsistencies. + * @return the path written + * @throws IOException failure to write the test data. */ private Path writeEventuallyConsistentFileVersion(String filename, AmazonS3 s3ClientSpy, int getObjectInconsistencyCount, - int getMetdataInconsistencyCount, + int getMetadataInconsistencyCount, int copyInconsistencyCount) throws IOException { - final Path testpath = path(filename); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - PrintWriter printWriter = new PrintWriter(out); - printWriter.println(TEST_DATA); - printWriter.close(); - final byte[] dataset = out.toByteArray(); + return writeEventuallyConsistentData(s3ClientSpy, + path(filename), + TEST_DATA_BYTES, + getObjectInconsistencyCount, + getMetadataInconsistencyCount, + copyInconsistencyCount); + } + + /** + * Writes data to a path and configures the S3 client for inconsistent + * HEAD, GET or COPY operations. + * @param testpath absolute path of file + * @param s3ClientSpy s3 client to patch + * @param dataset bytes to write. + * @param getObjectInconsistencyCount number of GET inconsistencies + * @param getMetadataInconsistencyCount number of HEAD inconsistencies + * @param copyInconsistencyCount number of COPY inconsistencies. + * @return the path written + * @throws IOException failure to write the test data. + */ + private Path writeEventuallyConsistentData(final AmazonS3 s3ClientSpy, + final Path testpath, + final byte[] dataset, + final int getObjectInconsistencyCount, + final int getMetadataInconsistencyCount, + final int copyInconsistencyCount) + throws IOException { writeDataset(fs, testpath, dataset, dataset.length, 1024, true); S3AFileStatus originalStatus = (S3AFileStatus) fs.getFileStatus(testpath); @@ -663,7 +777,12 @@ private Path writeEventuallyConsistentFileVersion(String filename, writeDataset(fs, testpath, dataset, dataset.length / 2, 1024, true); + LOG.debug("Original file info: {}: version={}, etag={}", testpath, + originalStatus.getVersionId(), originalStatus.getETag()); + S3AFileStatus newStatus = (S3AFileStatus) fs.getFileStatus(testpath); + LOG.debug("Updated file info: {}: version={}, etag={}", testpath, + newStatus.getVersionId(), newStatus.getETag()); stubTemporaryUnavailable(s3ClientSpy, getObjectInconsistencyCount, testpath, newStatus); @@ -671,7 +790,7 @@ private Path writeEventuallyConsistentFileVersion(String filename, stubTemporaryWrongVersion(s3ClientSpy, getObjectInconsistencyCount, testpath, originalStatus); - if (fs.getChangeDetectionPolicy().getMode() == Mode.Server) { + if (versionCheckingIsOnServer()) { // only stub inconsistency when mode is server since no constraints that // should trigger inconsistency are passed in any other mode stubTemporaryCopyInconsistency(s3ClientSpy, testpath, newStatus, @@ -679,11 +798,21 @@ private Path writeEventuallyConsistentFileVersion(String filename, } stubTemporaryMetadataInconsistency(s3ClientSpy, testpath, originalStatus, - newStatus, getMetdataInconsistencyCount); + newStatus, getMetadataInconsistencyCount); return testpath; } + /** + * Log the call hierarchy at debug level, helps track down + * where calls to operations are coming from. + */ + private void logLocationAtDebug() { + if (LOG.isDebugEnabled()) { + LOG.debug("Call hierarchy", new Exception("here")); + } + } + /** * Stubs {@link AmazonS3#getObject(GetObjectRequest)} * within s3ClientSpy to return null until inconsistentCallCount calls have @@ -706,6 +835,9 @@ public S3Object answer(InvocationOnMock invocation) throws Throwable { // inconsistentCallCount surpassed callCount++; if (callCount <= inconsistentCallCount) { + LOG.info("Temporarily unavailable {} count {} of {}", + testpath, callCount, inconsistentCallCount); + logLocationAtDebug(); return null; } return (S3Object) invocation.callRealMethod(); @@ -747,6 +879,9 @@ public S3Object answer(InvocationOnMock invocation) throws Throwable { callCount++; S3Object s3Object = (S3Object) invocation.callRealMethod(); if (callCount <= inconsistentCallCount) { + LOG.info("Temporary Wrong Version {} count {} of {}", + testpath, callCount, inconsistentCallCount); + logLocationAtDebug(); S3Object objectSpy = Mockito.spy(s3Object); ObjectMetadata metadataSpy = Mockito.spy(s3Object.getObjectMetadata()); @@ -788,8 +923,12 @@ public CopyObjectResult answer(InvocationOnMock invocation) throws Throwable { callCount++; if (callCount <= copyInconsistentCallCount) { + String message = "preconditions not met on call " + callCount + + " of " + copyInconsistentCallCount; + LOG.info("Copying {}: {}", testpath, message); + logLocationAtDebug(); AmazonServiceException exception = - new AmazonServiceException("preconditions not met"); + new AmazonServiceException(message); exception.setStatusCode(SC_PRECONDITION_FAILED); throw exception; } @@ -830,6 +969,9 @@ public ObjectMetadata answer(InvocationOnMock invocation) (ObjectMetadata) invocation.callRealMethod(); callCount++; if (callCount <= metadataInconsistentCallCount) { + LOG.info("Inconsistent metadata {} count {} of {}", + testpath, callCount, metadataInconsistentCallCount); + logLocationAtDebug(); ObjectMetadata metadataSpy = Mockito.spy(objectMetadata); when(metadataSpy.getETag()).thenReturn(originalStatus.getETag()); @@ -854,12 +996,7 @@ public ObjectMetadata answer(InvocationOnMock invocation) private Path writeFileWithNoVersionMetadata(String filename) throws IOException { final Path testpath = path(filename); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - PrintWriter printWriter = new PrintWriter(out); - printWriter.println(TEST_DATA); - printWriter.close(); - final byte[] dataset = out.toByteArray(); - writeDataset(fs, testpath, dataset, dataset.length, + writeDataset(fs, testpath, TEST_DATA_BYTES, TEST_DATA_BYTES.length, 1024, false); S3AFileStatus originalStatus = (S3AFileStatus) fs.getFileStatus(testpath); @@ -969,4 +1106,49 @@ private void requireS3Select() { Assume.assumeTrue("S3 Select is not enabled", getFileSystem().hasCapability(S3_SELECT_CAPABILITY)); } + + /** + * Spy on the filesystem at the S3 client level. + * @return a mocked S3 client to which the test FS is bonded. + */ + private AmazonS3 spyOnFilesystem() { + AmazonS3 s3ClientSpy = Mockito.spy( + fs.getAmazonS3ClientForTesting("mocking")); + fs.setAmazonS3Client(s3ClientSpy); + return s3ClientSpy; + } + + /** + * Expect reading this stream to fail. + * @param instream input stream. + * @return the caught exception. + * @throws Exception an other exception + */ + + private RemoteFileChangedException expectReadFailure( + final FSDataInputStream instream) + throws Exception { + return intercept(RemoteFileChangedException.class, "", + "read() returned", + () -> readToText(instream.read())); + } + + /** + * Convert the result of a read to a text string for errors. + * @param r result of the read() call. + * @return a string for exception text. + */ + private String readToText(int r) { + return r < 32 + ? (String.format("%02d", r)) + : (String.format("%c", (char) r)); + } + + /** + * Is the version checking on the server? + * @return true if the server returns 412 errors. + */ + private boolean versionCheckingIsOnServer() { + return fs.getChangeDetectionPolicy().getMode() == Mode.Server; + } } From c924d740b56467a3f8047d3f768219d5f24971ce Mon Sep 17 00:00:00 2001 From: Ben Roling Date: Fri, 10 May 2019 13:27:47 -0500 Subject: [PATCH 06/13] Fix testRenameEventuallyConsistentDirectory failures eTag and versionid weren't coming through on directory listings. --- .../org/apache/hadoop/fs/s3a/Listing.java | 16 +++++++- .../fs/s3a/ITestS3ARemoteFileChanged.java | 36 ++++++++++++++--- .../fs/s3a/ITestS3GuardListConsistency.java | 40 +++++++++++++++++++ 3 files changed, 84 insertions(+), 8 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java index e002e3ed5b30b..c0ef226c506f9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java @@ -38,7 +38,9 @@ import java.util.List; import java.util.ListIterator; import java.util.NoSuchElementException; +import java.util.Optional; import java.util.Set; +import java.util.stream.StreamSupport; import static org.apache.hadoop.fs.s3a.Constants.S3N_FOLDER_SUFFIX; import static org.apache.hadoop.fs.s3a.S3AUtils.createFileStatus; @@ -390,8 +392,18 @@ public S3AFileStatus next() throws IOException { status = statusBatchIterator.next(); // We remove from provided list the file status listed by S3 so that // this does not return duplicate items. - if (providedStatus.remove(status)) { - LOG.debug("Removed the status from provided file status {}", status); + + // The provided status is returned as it is assumed to have the better + // metadata (i.e. the eTag and versionId from S3Guard) + Optional provided = + StreamSupport.stream(providedStatus.spliterator(), false) + .filter(element -> status.equals(element)).findFirst(); + if (provided.isPresent()) { + providedStatus.remove(status); + LOG.debug( + "Removed and returned the status from provided file status {}", + status); + return provided.get(); } } else { if (providedStatusIterator.hasNext()) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java index 33f1d4da7e844..eb5b788ce9914 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java @@ -475,10 +475,14 @@ public void testRenameChangedFile() throws Throwable { * copyObject() for a rename. * The split of inconsistent responses between getObjectMetadata() and * copyObject() is somewhat arbitrary. + * @param metadataCallsExpectedBeforeRetryLoop number of getObjectMetadata + * calls expected before the retry loop * @return the inconsistencies for (metadata, copy) */ - private Pair renameInconsistencyCounts() { - int maxInconsistenciesBeforeFailure = TEST_MAX_RETRIES + 2; + private Pair renameInconsistencyCounts( + int metadataCallsExpectedBeforeRetryLoop) { + int maxInconsistenciesBeforeFailure = TEST_MAX_RETRIES + + metadataCallsExpectedBeforeRetryLoop; int copyInconsistencyCount = versionCheckingIsOnServer() ? 2 : 0; int metadataInconsistencyCount = @@ -500,7 +504,7 @@ public void testRenameEventuallyConsistentFile() throws Throwable { // copyObject(). // The split of inconsistent responses between getObjectMetadata() and // copyObject() is arbitrary. - Pair counts = renameInconsistencyCounts(); + Pair counts = renameInconsistencyCounts(1); int metadataInconsistencyCount = counts.getLeft(); int copyInconsistencyCount = counts.getRight(); final Path testpath1 = @@ -529,7 +533,9 @@ public void testRenameEventuallyConsistentFile2() throws Throwable { AmazonS3 s3ClientSpy = Mockito.spy(fs.getAmazonS3ClientForTesting("mocking")); fs.setAmazonS3Client(s3ClientSpy); - Pair counts = renameInconsistencyCounts(); + skipIfVersionPolicyAndNoVersionId(); + + Pair counts = renameInconsistencyCounts(1); int metadataInconsistencyCount = counts.getLeft(); int copyInconsistencyCount = counts.getRight(); final Path testpath2 = @@ -538,7 +544,6 @@ public void testRenameEventuallyConsistentFile2() throws Throwable { 0, metadataInconsistencyCount, copyInconsistencyCount + 1); - skipIfVersionPolicyAndNoVersionId(testpath2); final Path dest2 = path("dest2.dat"); if (expectedExceptionInteractions.contains( InteractionType.EVENTUALLY_CONSISTENT_COPY)) { @@ -573,7 +578,7 @@ public void testRenameEventuallyConsistentDirectory() throws Throwable { writeDataset(fs, consistentFile, TEST_DATA_BYTES, TEST_DATA_BYTES.length, 1024, true, true); - Pair counts = renameInconsistencyCounts(); + Pair counts = renameInconsistencyCounts(0); int metadataInconsistencyCount = counts.getLeft(); int copyInconsistencyCount = counts.getRight(); @@ -639,6 +644,9 @@ public void testReadAfterEventuallyConsistentWrite2() throws Throwable { final Path testpath2 = writeEventuallyConsistentFileVersion("eventually2.dat", s3ClientSpy, TEST_MAX_RETRIES + 1, 0, 0); + + skipIfVersionPolicyAndNoVersionId(testpath2); + try (FSDataInputStream instream2 = fs.open(testpath2)) { if (expectedExceptionInteractions.contains( InteractionType.EVENTUALLY_CONSISTENT_READ)) { @@ -1012,6 +1020,9 @@ private Path writeFileWithNoVersionMetadata(String filename) /** * The test is invalid if the policy uses versionId but the bucket doesn't * have versioning enabled. + * + * Tests the given file for a versionId to detect whether bucket versioning + * is enabled. */ private void skipIfVersionPolicyAndNoVersionId(Path testpath) throws IOException { @@ -1024,6 +1035,19 @@ private void skipIfVersionPolicyAndNoVersionId(Path testpath) } } + /** + * Like {@link #skipIfVersionPolicyAndNoVersionId(Path)} but generates a new + * file to test versionId against. + */ + private void skipIfVersionPolicyAndNoVersionId() throws IOException { + if (fs.getChangeDetectionPolicy().getSource() == Source.VersionId) { + Path versionIdFeatureTestFile = path("versionIdTest"); + writeDataset(fs, versionIdFeatureTestFile, TEST_DATA_BYTES, + TEST_DATA_BYTES.length, 1024, true, true); + skipIfVersionPolicyAndNoVersionId(versionIdFeatureTestFile); + } + } + private GetObjectRequest matchingGetObjectRequest(Path path, String eTag, String versionId) { return ArgumentMatchers.argThat(request -> { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java index 92eac3c71411f..dd5b50816c901 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java @@ -30,6 +30,8 @@ import org.apache.hadoop.fs.contract.AbstractFSContract; import org.apache.hadoop.fs.contract.s3a.S3AContract; +import com.google.common.collect.Iterators; +import com.google.common.collect.Lists; import org.junit.Assume; import org.junit.Test; @@ -557,6 +559,44 @@ public void testInconsistentS3ClientDeletes() throws Throwable { ); } + /** + * Tests that the file's eTag and versionId are preserved in recursive + * listings. + */ + @Test + public void testListingReturnsVersionMetadata() throws Throwable { + S3AFileSystem fs = getFileSystem(); + Assume.assumeTrue(fs.hasMetadataStore()); + + // write simple file + Path file = path("file1"); + try (FSDataOutputStream outputStream = fs.create(file)) { + outputStream.writeChars("hello"); + } + + // get individual file status + FileStatus[] fileStatuses = fs.listStatus(file); + assertEquals(1, fileStatuses.length); + S3AFileStatus status = (S3AFileStatus) fileStatuses[0]; + String eTag = status.getETag(); + String versionId = status.getVersionId(); + + // get status through recursive directory listing + RemoteIterator filesIterator = fs.listFiles( + file.getParent(), true); + List files = Lists.newArrayList(); + while (filesIterator.hasNext()) { + files.add(filesIterator.next()); + } + assertEquals(1, files.size()); + + // ensure eTag and versionId are preserved in directory listing + S3ALocatedFileStatus locatedFileStatus = + (S3ALocatedFileStatus) files.get(0); + assertEquals(eTag, locatedFileStatus.getETag()); + assertEquals(versionId, locatedFileStatus.getVersionId()); + } + /** * Assert that the two list sizes match; failure message includes the lists. * @param message text for the assertion From 348aa61a9b7bcafe067c3bdf374b07d08e06867d Mon Sep 17 00:00:00 2001 From: Ben Roling Date: Fri, 10 May 2019 14:26:19 -0500 Subject: [PATCH 07/13] Remove layered retries in copyFile() and select() Use ChangeTracker in getObjectMetadata() and avoid calling getObjectMetadata() in a retry loop since it retries internally. --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 98 +++++++++++++------ .../fs/s3a/ITestS3ARemoteFileChanged.java | 13 +-- 2 files changed, 75 insertions(+), 36 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index f1552cb6a5328..3c8cfb0d7ff0b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -1360,10 +1360,28 @@ childDst, length, getDefaultBlockSize(childDst), username, @VisibleForTesting @Retries.RetryTranslated public ObjectMetadata getObjectMetadata(Path path) throws IOException { + return getObjectMetadata(path, null, invoker, null); + } + + /** + * Low-level call to get at the object metadata. + * @param path path to the object + * @param changeTracker the change tracker to detect version inconsistencies + * @param invoker the invoker providing the retry policy + * @param operation the operation being performed (e.g. "read" or "copy") + * @return metadata + * @throws IOException IO and object access problems. + */ + @VisibleForTesting + @Retries.RetryTranslated + public ObjectMetadata getObjectMetadata(Path path, + ChangeTracker changeTracker, Invoker invoker, String operation) + throws IOException { return once("getObjectMetadata", path.toString(), () -> - // this always does a full HEAD to the object - getObjectMetadata(pathToKey(path))); + // this always does a full HEAD to the object + getObjectMetadata( + pathToKey(path), changeTracker, invoker, operation)); } /** @@ -1534,6 +1552,26 @@ public S3AStorageStatistics getStorageStatistics() { */ @Retries.RetryRaw protected ObjectMetadata getObjectMetadata(String key) throws IOException { + return getObjectMetadata(key, null, invoker,null); + } + + /** + * Request object metadata; increments counters in the process. + * Retry policy: retry untranslated. + * Uses changeTracker to detect an unexpected file version (eTag or versionId) + * @param key key + * @param changeTracker the change tracker to detect unexpected object version + * @param invoker the invoker providing the retry policy + * @param operation the operation (e.g. "read" or "copy") triggering this call + * @return the metadata + * @throws IOException if the retry invocation raises one (it shouldn't). + * @throws RemoteFileChangedException if an unexpected version is detected + */ + @Retries.RetryRaw + protected ObjectMetadata getObjectMetadata(String key, + ChangeTracker changeTracker, + Invoker invoker, + String operation) throws IOException { GetObjectMetadataRequest request = new GetObjectMetadataRequest(bucket, key); //SSE-C requires to be filled in if enabled for object metadata @@ -1541,7 +1579,11 @@ protected ObjectMetadata getObjectMetadata(String key) throws IOException { ObjectMetadata meta = invoker.retryUntranslated("GET " + key, true, () -> { incrementStatistic(OBJECT_METADATA_REQUESTS); - return s3.getObjectMetadata(request); + ObjectMetadata objectMetadata = s3.getObjectMetadata(request); + if (changeTracker != null) { + changeTracker.processMetadata(objectMetadata, operation); + } + return objectMetadata; }); incrementReadOperations(); return meta; @@ -2976,17 +3018,20 @@ private CopyResult copyFile(String srcKey, String dstKey, long size, .getVersionMismatchCounter(), srcAttributes); + String action = "\"copyFile(\" + srcKey + \", \" + dstKey + \")\", srcKey"; Invoker readInvoker = readContext.getReadInvoker(); + + ObjectMetadata srcom = + once(action, null, + () -> + getObjectMetadata(srcKey, changeTracker, readInvoker, "copy")); + ObjectMetadata dstom = cloneObjectMetadata(srcom); + setOptionalObjectMetadata(dstom); + return readInvoker.retry( - "\"copyFile(\" + srcKey + \", \" + dstKey + \")\", srcKey", null, + action, null, true, () -> { - ObjectMetadata srcom = getObjectMetadata(srcKey); - ObjectMetadata dstom = cloneObjectMetadata(srcom); - setOptionalObjectMetadata(dstom); - - changeTracker.processMetadata(srcom, "copy"); - CopyObjectRequest copyObjectRequest = new CopyObjectRequest(bucket, srcKey, bucket, dstKey); changeTracker.maybeApplyConstraint(copyObjectRequest); @@ -3850,24 +3895,21 @@ private FSDataInputStream select(final Path source, S3AReadOpContext readContext = createReadContext(fileStatus, inputPolicy, changeDetectionPolicy, ra); - Invoker readInvoker = readContext.getReadInvoker(); - readInvoker.retry("select", - path.toString(), true, - () -> { - if (!fileStatus.isDirectory()) { - // check that the object metadata lines up with what is expected - // based on the object attributes (which may contain an eTag or - // versionId) from S3Guard - ChangeTracker changeTracker = - new ChangeTracker(uri.toString(), - changeDetectionPolicy, - readContext.instrumentation.newInputStreamStatistics() - .getVersionMismatchCounter(), - objectAttributes); - ObjectMetadata objectMetadata = getObjectMetadata(path); - changeTracker.processMetadata(objectMetadata, "select"); - } - }); + if (!fileStatus.isDirectory()) { + // check that the object metadata lines up with what is expected + // based on the object attributes (which may contain an eTag or + // versionId) from S3Guard + ChangeTracker changeTracker = + new ChangeTracker(uri.toString(), + changeDetectionPolicy, + readContext.instrumentation.newInputStreamStatistics() + .getVersionMismatchCounter(), + objectAttributes); + + // will retry internally if wrong version detected + Invoker readInvoker = readContext.getReadInvoker(); + getObjectMetadata(path, changeTracker, readInvoker, "select"); + } // build and execute the request return selectBinding.select( diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java index eb5b788ce9914..0fcc50dd89cc6 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java @@ -471,22 +471,19 @@ public void testRenameChangedFile() throws Throwable { } /** - * Total inconsistent response count across getObjectMetadata() and + * Inconsistent response counts for getObjectMetadata() and * copyObject() for a rename. - * The split of inconsistent responses between getObjectMetadata() and - * copyObject() is somewhat arbitrary. * @param metadataCallsExpectedBeforeRetryLoop number of getObjectMetadata - * calls expected before the retry loop + * calls expected before the consistency checking retry loop * @return the inconsistencies for (metadata, copy) */ private Pair renameInconsistencyCounts( int metadataCallsExpectedBeforeRetryLoop) { - int maxInconsistenciesBeforeFailure = TEST_MAX_RETRIES + int metadataInconsistencyCount = TEST_MAX_RETRIES + metadataCallsExpectedBeforeRetryLoop; - int copyInconsistencyCount = versionCheckingIsOnServer() ? 2 : 0; + int copyInconsistencyCount = + versionCheckingIsOnServer() ? TEST_MAX_RETRIES : 0; - int metadataInconsistencyCount = - maxInconsistenciesBeforeFailure - copyInconsistencyCount; return Pair.of(metadataInconsistencyCount, copyInconsistencyCount); } From 16a428926fc6b62bfdaae81bb654e582d8ff4169 Mon Sep 17 00:00:00 2001 From: Ben Roling Date: Fri, 10 May 2019 15:54:59 -0500 Subject: [PATCH 08/13] qualify versionId on GetObjectMetadataRequest --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 3 ++ .../fs/s3a/impl/ChangeDetectionPolicy.java | 29 +++++++++++++++ .../hadoop/fs/s3a/impl/ChangeTracker.java | 11 ++++++ .../fs/s3a/ITestS3ARemoteFileChanged.java | 36 ++++++++++--------- 4 files changed, 63 insertions(+), 16 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 3c8cfb0d7ff0b..c284926686065 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -1579,6 +1579,9 @@ protected ObjectMetadata getObjectMetadata(String key, ObjectMetadata meta = invoker.retryUntranslated("GET " + key, true, () -> { incrementStatistic(OBJECT_METADATA_REQUESTS); + if (changeTracker != null) { + changeTracker.maybeApplyConstraint(request); + } ObjectMetadata objectMetadata = s3.getObjectMetadata(request); if (changeTracker != null) { changeTracker.processMetadata(objectMetadata, operation); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java index c513d7c35531e..50c52935d42c2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java @@ -21,6 +21,7 @@ import java.util.Locale; import com.amazonaws.services.s3.model.CopyObjectRequest; +import com.amazonaws.services.s3.model.GetObjectMetadataRequest; import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.transfer.model.CopyResult; @@ -245,6 +246,16 @@ public abstract void applyRevisionConstraint(GetObjectRequest request, public abstract void applyRevisionConstraint(CopyObjectRequest request, String revisionId); + /** + * Applies the given {@link #getRevisionId(ObjectMetadata, String) revisionId} + * as a server-side qualification on the {@code GetObjectMetadataRequest}. + * + * @param request the request + * @param revisionId the revision id + */ + public abstract void applyRevisionConstraint(GetObjectMetadataRequest request, + String revisionId); + /** * Takes appropriate action based on {@link #getMode() mode} when a change has * been detected. @@ -342,6 +353,11 @@ public void applyRevisionConstraint(CopyObjectRequest request, } } + @Override + public void applyRevisionConstraint(GetObjectMetadataRequest request, String revisionId) { + // GetObjectMetadataRequest doesn't support eTag qualification + } + @Override public Source getSource() { return Source.ETag; @@ -409,6 +425,14 @@ public void applyRevisionConstraint(CopyObjectRequest request, } } + @Override + public void applyRevisionConstraint(GetObjectMetadataRequest request, String revisionId) { + if (revisionId != null) { + LOG.debug("Restricting metadata request to version {}", revisionId); + request.withVersionId(revisionId); + } + } + @Override public Source getSource() { return Source.VersionId; @@ -462,6 +486,11 @@ public void applyRevisionConstraint(CopyObjectRequest request, } + @Override + public void applyRevisionConstraint(GetObjectMetadataRequest request, String revisionId) { + + } + @Override public String toString() { return "NoChangeDetection"; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java index 47788762ee7dd..e2b7075c5a5ed 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java @@ -22,6 +22,7 @@ import com.amazonaws.AmazonServiceException; import com.amazonaws.services.s3.model.CopyObjectRequest; +import com.amazonaws.services.s3.model.GetObjectMetadataRequest; import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.S3Object; @@ -145,6 +146,16 @@ public boolean maybeApplyConstraint( return false; } + public boolean maybeApplyConstraint( + final GetObjectMetadataRequest request) { + + if (policy.getMode() == ChangeDetectionPolicy.Mode.Server + && revisionId != null) { + policy.applyRevisionConstraint(request, revisionId); + return true; + } + return false; + } /** * Process the response from the server for validation against the diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java index 0fcc50dd89cc6..e2d1638fe19f6 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java @@ -98,7 +98,8 @@ private enum InteractionType { EVENTUALLY_CONSISTENT_READ, COPY, EVENTUALLY_CONSISTENT_COPY, - SELECT + SELECT, + EVENTUALLY_CONSISTENT_SELECT } private final String changeDetectionSource; @@ -117,7 +118,8 @@ public static Collection params() { InteractionType.EVENTUALLY_CONSISTENT_READ, InteractionType.COPY, InteractionType.EVENTUALLY_CONSISTENT_COPY, - InteractionType.SELECT)}, + InteractionType.SELECT, + InteractionType.EVENTUALLY_CONSISTENT_SELECT)}, // test with etag {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_SERVER, @@ -127,7 +129,8 @@ public static Collection params() { InteractionType.EVENTUALLY_CONSISTENT_READ, InteractionType.COPY, InteractionType.EVENTUALLY_CONSISTENT_COPY, - InteractionType.SELECT)}, + InteractionType.SELECT, + InteractionType.EVENTUALLY_CONSISTENT_SELECT)}, {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_CLIENT, Arrays.asList( InteractionType.READ, @@ -137,7 +140,8 @@ public static Collection params() { // not InteractionType.EVENTUALLY_CONSISTENT_COPY as copy change // detection can't really occur client-side. The eTag of // the new object can't be expected to match. - InteractionType.SELECT)}, + InteractionType.SELECT, + InteractionType.EVENTUALLY_CONSISTENT_SELECT)}, {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_WARN, Arrays.asList( InteractionType.READ_AFTER_DELETE)}, @@ -146,16 +150,13 @@ public static Collection params() { InteractionType.READ_AFTER_DELETE)}, // test with versionId - // when using server-side versionId, the normal read exceptions - // shouldn't happen since the previous version will still be available, - // but they will still happen on rename and select since we always do a - // client-side check against the current version + // when using server-side versionId, the exceptions + // shouldn't happen since the previous version will still be available {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_SERVER, Arrays.asList( InteractionType.EVENTUALLY_CONSISTENT_READ, - InteractionType.COPY, InteractionType.EVENTUALLY_CONSISTENT_COPY, - InteractionType.SELECT)}, + InteractionType.EVENTUALLY_CONSISTENT_SELECT)}, // with client-side versionId it will behave similar to client-side eTag {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_CLIENT, @@ -167,7 +168,8 @@ public static Collection params() { // not InteractionType.EVENTUALLY_CONSISTENT_COPY as copy change // detection can't really occur client-side. The versionId of // the new object can't be expected to match. - InteractionType.SELECT)}, + InteractionType.SELECT, + InteractionType.EVENTUALLY_CONSISTENT_SELECT)}, {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_WARN, Arrays.asList( @@ -409,13 +411,15 @@ public void testSelectEventuallyConsistentFile() throws Throwable { .get() .close(); - // TEST_MAX_RETRIES + 2 is required to before failure since there is - // one getObjectMetadata() call due to getFileStatus() before the - // change detection and retries in select() + // select() makes a getFileStaus() call before the consistency checking + // that will match the stub. As such, we need an extra inconsistency here + // to cross the threshold + int getMetadataInconsistencyCount = TEST_MAX_RETRIES + 2; final Path testpath2 = writeEventuallyConsistentFileVersion( - "select2.dat", s3ClientSpy, 0, TEST_MAX_RETRIES + 2, 0); + "select2.dat", s3ClientSpy, 0, getMetadataInconsistencyCount, 0); - if (expectedExceptionInteractions.contains(InteractionType.SELECT)) { + if (expectedExceptionInteractions.contains( + InteractionType.EVENTUALLY_CONSISTENT_SELECT)) { // should fail since the inconsistency lasts longer than the configured // retry limit interceptFuture(RemoteFileChangedException.class, "select", From f6258f7295d17e0ac5003c335bad390a7e6641fc Mon Sep 17 00:00:00 2001 From: Ben Roling Date: Mon, 13 May 2019 10:51:07 -0500 Subject: [PATCH 09/13] Fix handling around inconsistency during rename AWS response stubbing was incorrect for inconsistency during copyObject(). AWS throws AmazonClientException caused by NPE if constraints aren't met per https://github.com/aws/aws-sdk-java/issues/1644. Documentation and tests updated to reflect this. --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 6 +- .../hadoop/fs/s3a/impl/ChangeTracker.java | 2 + .../site/markdown/tools/hadoop-aws/s3guard.md | 56 ++++++++++++++ .../fs/s3a/ITestS3ARemoteFileChanged.java | 77 ++++++++++++++++--- .../fs/s3a/TestStreamChangeTracker.java | 2 + 5 files changed, 130 insertions(+), 13 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index c284926686065..14e09e4b1334a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -3021,18 +3021,18 @@ private CopyResult copyFile(String srcKey, String dstKey, long size, .getVersionMismatchCounter(), srcAttributes); - String action = "\"copyFile(\" + srcKey + \", \" + dstKey + \")\", srcKey"; + String action = "copyFile(" + srcKey + ", " + dstKey + ")"; Invoker readInvoker = readContext.getReadInvoker(); ObjectMetadata srcom = - once(action, null, + once(action, srcKey, () -> getObjectMetadata(srcKey, changeTracker, readInvoker, "copy")); ObjectMetadata dstom = cloneObjectMetadata(srcom); setOptionalObjectMetadata(dstom); return readInvoker.retry( - action, null, + action, srcKey, true, () -> { CopyObjectRequest copyObjectRequest = diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java index e2b7075c5a5ed..fba1b126cc54b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java @@ -229,6 +229,8 @@ public void processException(Exception e, String operation) throws RemoteFileChangedException { if (e instanceof AmazonServiceException) { AmazonServiceException serviceException = (AmazonServiceException) e; + // This isn't really going to be hit due to + // https://github.com/aws/aws-sdk-java/issues/1644 if (serviceException.getStatusCode() == SC_PRECONDITION_FAILED) { versionMismatches.incrementAndGet(); throw new RemoteFileChangedException(uri, operation, String.format( diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md index 3d7979cdd3deb..2fbc621ed27b0 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md @@ -1355,6 +1355,62 @@ org.apache.hadoop.fs.s3a.RemoteFileChangedException: open 's3a://my-bucket/test/ at java.io.FilterInputStream.read(FilterInputStream.java:83) ``` +### Error `AWSClientIOException: copyFile` caused by `NullPointerException` + +The AWS SDK has an [issue](https://github.com/aws/aws-sdk-java/issues/1644) +where it will throw a relatively generic `AmazonClientException` caused by +`NullPointerException` when copying a file and specifying a precondition +that cannot be met. This can bubble up from `S3AFileSystem.rename()`. It +suggests that the file in S3 is inconsistent with the metadata in S3Guard. + +``` +org.apache.hadoop.fs.s3a.AWSClientIOException: copyFile(test/rename-eventually2.dat, test/dest2.dat) on test/rename-eventually2.dat: com.amazonaws.AmazonClientException: Unable to complete transfer: null: Unable to complete transfer: null + at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:201) + at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:111) + at org.apache.hadoop.fs.s3a.Invoker.lambda$retry$4(Invoker.java:314) + at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:406) + at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:310) + at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:285) + at org.apache.hadoop.fs.s3a.S3AFileSystem.copyFile(S3AFileSystem.java:3034) + at org.apache.hadoop.fs.s3a.S3AFileSystem.innerRename(S3AFileSystem.java:1258) + at org.apache.hadoop.fs.s3a.S3AFileSystem.rename(S3AFileSystem.java:1119) + at org.apache.hadoop.fs.s3a.ITestS3ARemoteFileChanged.lambda$testRenameEventuallyConsistentFile2$6(ITestS3ARemoteFileChanged.java:556) + at org.apache.hadoop.test.LambdaTestUtils.intercept(LambdaTestUtils.java:498) + at org.apache.hadoop.fs.s3a.ITestS3ARemoteFileChanged.testRenameEventuallyConsistentFile2(ITestS3ARemoteFileChanged.java:554) + at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) + at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) + at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) + at java.lang.reflect.Method.invoke(Method.java:498) + at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50) + at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) + at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47) + at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) + at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) + at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27) + at org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55) + at org.junit.internal.runners.statements.FailOnTimeout$CallableStatement.call(FailOnTimeout.java:298) + at org.junit.internal.runners.statements.FailOnTimeout$CallableStatement.call(FailOnTimeout.java:292) + at java.util.concurrent.FutureTask.run(FutureTask.java:266) + at java.lang.Thread.run(Thread.java:748) +Caused by: com.amazonaws.AmazonClientException: Unable to complete transfer: null + at com.amazonaws.services.s3.transfer.internal.AbstractTransfer.unwrapExecutionException(AbstractTransfer.java:286) + at com.amazonaws.services.s3.transfer.internal.AbstractTransfer.rethrowExecutionException(AbstractTransfer.java:265) + at com.amazonaws.services.s3.transfer.internal.CopyImpl.waitForCopyResult(CopyImpl.java:67) + at org.apache.hadoop.fs.s3a.impl.CopyOutcome.waitForCopy(CopyOutcome.java:72) + at org.apache.hadoop.fs.s3a.S3AFileSystem.lambda$copyFile$14(S3AFileSystem.java:3047) + at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:109) + ... 25 more +Caused by: java.lang.NullPointerException + at com.amazonaws.services.s3.transfer.internal.CopyCallable.copyInOneChunk(CopyCallable.java:154) + at com.amazonaws.services.s3.transfer.internal.CopyCallable.call(CopyCallable.java:134) + at com.amazonaws.services.s3.transfer.internal.CopyMonitor.call(CopyMonitor.java:132) + at com.amazonaws.services.s3.transfer.internal.CopyMonitor.call(CopyMonitor.java:43) + at java.util.concurrent.FutureTask.run(FutureTask.java:266) + at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) + at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) + ... 1 more +``` + ## Other Topics For details on how to test S3Guard, see [Testing S3Guard](./testing.html#s3guard) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java index e2d1638fe19f6..186b4a552ae06 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java @@ -25,6 +25,7 @@ import java.util.Collection; import java.util.Optional; +import com.amazonaws.AmazonClientException; import com.amazonaws.AmazonServiceException; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.CopyObjectRequest; @@ -98,6 +99,7 @@ private enum InteractionType { EVENTUALLY_CONSISTENT_READ, COPY, EVENTUALLY_CONSISTENT_COPY, + EVENTUALLY_CONSISTENT_METADATA, SELECT, EVENTUALLY_CONSISTENT_SELECT } @@ -118,6 +120,7 @@ public static Collection params() { InteractionType.EVENTUALLY_CONSISTENT_READ, InteractionType.COPY, InteractionType.EVENTUALLY_CONSISTENT_COPY, + InteractionType.EVENTUALLY_CONSISTENT_METADATA, InteractionType.SELECT, InteractionType.EVENTUALLY_CONSISTENT_SELECT)}, @@ -129,6 +132,7 @@ public static Collection params() { InteractionType.EVENTUALLY_CONSISTENT_READ, InteractionType.COPY, InteractionType.EVENTUALLY_CONSISTENT_COPY, + InteractionType.EVENTUALLY_CONSISTENT_METADATA, InteractionType.SELECT, InteractionType.EVENTUALLY_CONSISTENT_SELECT)}, {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_CLIENT, @@ -140,6 +144,7 @@ public static Collection params() { // not InteractionType.EVENTUALLY_CONSISTENT_COPY as copy change // detection can't really occur client-side. The eTag of // the new object can't be expected to match. + InteractionType.EVENTUALLY_CONSISTENT_METADATA, InteractionType.SELECT, InteractionType.EVENTUALLY_CONSISTENT_SELECT)}, {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_WARN, @@ -156,6 +161,7 @@ public static Collection params() { Arrays.asList( InteractionType.EVENTUALLY_CONSISTENT_READ, InteractionType.EVENTUALLY_CONSISTENT_COPY, + InteractionType.EVENTUALLY_CONSISTENT_METADATA, InteractionType.EVENTUALLY_CONSISTENT_SELECT)}, // with client-side versionId it will behave similar to client-side eTag @@ -168,6 +174,7 @@ public static Collection params() { // not InteractionType.EVENTUALLY_CONSISTENT_COPY as copy change // detection can't really occur client-side. The versionId of // the new object can't be expected to match. + InteractionType.EVENTUALLY_CONSISTENT_METADATA, InteractionType.SELECT, InteractionType.EVENTUALLY_CONSISTENT_SELECT)}, @@ -527,11 +534,14 @@ public void testRenameEventuallyConsistentFile() throws Throwable { * Tests doing a rename() on a file where the version visible in S3 does not * match the version in the metadata store until a certain number of retries * has been met. + * The test expects failure by AWSClientIOException caused by NPE due to + * https://github.com/aws/aws-sdk-java/issues/1644 */ @Test - public void testRenameEventuallyConsistentFile2() throws Throwable { + public void testRenameEventuallyConsistentFileNPE() throws Throwable { requireS3Guard(); - AmazonS3 s3ClientSpy = Mockito.spy(fs.getAmazonS3ClientForTesting("mocking")); + AmazonS3 s3ClientSpy = Mockito.spy( + fs.getAmazonS3ClientForTesting("mocking")); fs.setAmazonS3Client(s3ClientSpy); skipIfVersionPolicyAndNoVersionId(); @@ -539,18 +549,68 @@ public void testRenameEventuallyConsistentFile2() throws Throwable { Pair counts = renameInconsistencyCounts(1); int metadataInconsistencyCount = counts.getLeft(); int copyInconsistencyCount = counts.getRight(); + // giving copyInconsistencyCount + 1 here should trigger the failure, + // exceeding the retry limit final Path testpath2 = - writeEventuallyConsistentFileVersion("rename-eventually2.dat", + writeEventuallyConsistentFileVersion("rename-eventuallyNPE.dat", s3ClientSpy, 0, metadataInconsistencyCount, copyInconsistencyCount + 1); - final Path dest2 = path("dest2.dat"); + final Path dest2 = path("destNPE.dat"); if (expectedExceptionInteractions.contains( InteractionType.EVENTUALLY_CONSISTENT_COPY)) { // should fail since the inconsistency is set up to persist longer than // the configured retry limit - intercept(RemoteFileChangedException.class, "", + // the expected exception is not RemoteFileChangedException due to + // https://github.com/aws/aws-sdk-java/issues/1644 + AWSClientIOException exception = + intercept(AWSClientIOException.class, + "Unable to complete transfer: null", + "expected copy() failure", + () -> fs.rename(testpath2, dest2)); + AmazonClientException cause = exception.getCause(); + assertNotNull(cause); + Throwable causeCause = cause.getCause(); + assertTrue(causeCause instanceof NullPointerException); + } else { + fs.rename(testpath2, dest2); + } + } + + /** + * Tests doing a rename() on a file where the version visible in S3 does not + * match the version in the metadata store until a certain number of retries + * has been met. + * The test expects failure by RemoteFileChangedException. + */ + @Test + public void testRenameEventuallyConsistentFileRFCE() throws Throwable { + requireS3Guard(); + AmazonS3 s3ClientSpy = Mockito.spy( + fs.getAmazonS3ClientForTesting("mocking")); + fs.setAmazonS3Client(s3ClientSpy); + + skipIfVersionPolicyAndNoVersionId(); + + Pair counts = renameInconsistencyCounts(1); + int metadataInconsistencyCount = counts.getLeft(); + int copyInconsistencyCount = counts.getRight(); + // giving metadataInconsistencyCount + 1 here should trigger the failure, + // exceeding the retry limit + final Path testpath2 = + writeEventuallyConsistentFileVersion("rename-eventuallyRFCE.dat", + s3ClientSpy, + 0, + metadataInconsistencyCount + 1, + copyInconsistencyCount); + final Path dest2 = path("destRFCE.dat"); + if (expectedExceptionInteractions.contains( + InteractionType.EVENTUALLY_CONSISTENT_METADATA)) { + // should fail since the inconsistency is set up to persist longer than + // the configured retry limit + intercept(RemoteFileChangedException.class, + "change detected on copy", "expected copy() failure", () -> fs.rename(testpath2, dest2)); } else { @@ -911,7 +971,7 @@ public S3Object answer(InvocationOnMock invocation) throws Throwable { /** * Stubs {@link AmazonS3#copyObject(CopyObjectRequest)} - * within s3ClientSpy to throw precondition failed error until + * within s3ClientSpy to return null (indicating preconditions not met) until * copyInconsistentCallCount calls have been made. * @param s3ClientSpy the spy to stub * @param testpath the path of the object the stub should apply to @@ -936,10 +996,7 @@ public CopyObjectResult answer(InvocationOnMock invocation) + " of " + copyInconsistentCallCount; LOG.info("Copying {}: {}", testpath, message); logLocationAtDebug(); - AmazonServiceException exception = - new AmazonServiceException(message); - exception.setStatusCode(SC_PRECONDITION_FAILED); - throw exception; + return null; } return (CopyObjectResult) invocation.callRealMethod(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java index 5e0edbb2c2b31..c2885b0542d04 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java @@ -245,6 +245,8 @@ public void testCopyVersionMismatch() throws Throwable { objectAttributes("etag", "versionId")); // 412 is translated to RemoteFileChangedException + // note: this scenario is never currently hit due to + // https://github.com/aws/aws-sdk-java/issues/1644 AmazonServiceException awsException = new AmazonServiceException("aws exception"); awsException.setStatusCode(HttpStatus.SC_PRECONDITION_FAILED); From 57ad29b58c83e17e313d57c6dd09310c6b3cad18 Mon Sep 17 00:00:00 2001 From: Ben Roling Date: Mon, 13 May 2019 11:10:49 -0500 Subject: [PATCH 10/13] Use SdkBaseException instead of Exception --- .../java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java | 3 ++- .../org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java index fba1b126cc54b..75fecd5f14632 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java @@ -21,6 +21,7 @@ import java.util.concurrent.atomic.AtomicLong; import com.amazonaws.AmazonServiceException; +import com.amazonaws.SdkBaseException; import com.amazonaws.services.s3.model.CopyObjectRequest; import com.amazonaws.services.s3.model.GetObjectMetadataRequest; import com.amazonaws.services.s3.model.GetObjectRequest; @@ -225,7 +226,7 @@ public void processResponse(final CopyResult copyResult) * generated (e.g. "copy", "read", "select"). * @throws RemoteFileChangedException if the remote file has changed. */ - public void processException(Exception e, String operation) throws + public void processException(SdkBaseException e, String operation) throws RemoteFileChangedException { if (e instanceof AmazonServiceException) { AmazonServiceException serviceException = (AmazonServiceException) e; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java index c2885b0542d04..c645ac5ad807d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java @@ -21,6 +21,7 @@ import java.util.concurrent.atomic.AtomicLong; import com.amazonaws.AmazonServiceException; +import com.amazonaws.SdkBaseException; import com.amazonaws.services.s3.Headers; import com.amazonaws.services.s3.model.CopyObjectRequest; import com.amazonaws.services.s3.model.GetObjectRequest; @@ -254,7 +255,7 @@ public void testCopyVersionMismatch() throws Throwable { RemoteFileChangedException.PRECONDITIONS_FAILED); // processing another type of exception does nothing - tracker.processException(new RuntimeException(), "copy"); + tracker.processException(new SdkBaseException("foo"), "copy"); } protected void assertConstraintApplied(final ChangeTracker tracker, @@ -279,7 +280,7 @@ protected RemoteFileChangedException expectChangeException( protected RemoteFileChangedException expectChangeException( final ChangeTracker tracker, - final Exception exception, + final SdkBaseException exception, final String operation, final String message) throws Exception { return expectException(tracker, exception, operation, message, @@ -332,7 +333,7 @@ protected T expectException( protected T expectException( final ChangeTracker tracker, - final Exception exception, + final SdkBaseException exception, final String operation, final String message, final Class clazz) throws Exception { From ac920dad7eec8ee4927cadd2d9d3476185cf527f Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 16 May 2019 14:01:33 +0200 Subject: [PATCH 11/13] HADOOP-16085: S3Guard versioning: get ITestS3ARemoteFileChanged to work consistently * clear auth mode flag from FS config, so there is a metastore check before rename(file), ensuring that the values passed to the mock are consistent everywhere * change error in ChangeDetectionPolicy to make clear when an error is coming on the client for a getObjectMetadata call even when the copy policy is "server" * and fix up ITestS3ARemoteFileChanged to use the constant string defined in ChangeDetectionPolicy precisely to stop tests being brittle against changes in the text. * new tests in ITestS3ARemoteFileChanged to not lose stack traces when examining causes/inner causes of exceptions, just to throw new assertions with nested causes. * deleted a couple of unused imports * moved from two spaces after a "." to one. Sorry. Change-Id: I30e28fdc9431668058ce1eae8517cc25f31fa051 --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 2 ++ .../fs/s3a/impl/ChangeDetectionPolicy.java | 7 +++-- .../fs/s3a/ITestS3ARemoteFileChanged.java | 28 +++++++++++++------ 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 14e09e4b1334a..e6fbd50541903 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -3042,6 +3042,8 @@ private CopyResult copyFile(String srcKey, String dstKey, long size, setOptionalCopyObjectRequestParameters(copyObjectRequest); copyObjectRequest.setCannedAccessControlList(cannedACL); copyObjectRequest.setNewObjectMetadata(dstom); + Optional.ofNullable(srcom.getStorageClass()) + .ifPresent(copyObjectRequest::setStorageClass); Copy copy = transfers.copy(copyObjectRequest); copy.addProgressListener(progressListener); CopyOutcome copyOutcome = CopyOutcome.waitForCopy(copy); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java index 50c52935d42c2..38dfb74586431 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java @@ -51,7 +51,7 @@ public abstract class ChangeDetectionPolicy { LoggerFactory.getLogger(ChangeDetectionPolicy.class); @VisibleForTesting - public static final String CHANGE_DETECTED = "change detected"; + public static final String CHANGE_DETECTED = "change detected on client"; private final Mode mode; private final boolean requireVersion; @@ -299,13 +299,14 @@ public ImmutablePair onChangeDetected( case Client: case Server: default: - // mode == Client (or Server, but really won't be called for Server) + // mode == Client or Server; will trigger on version failures + // of getObjectMetadata even on server. return new ImmutablePair<>(true, new RemoteFileChangedException(uri, operation, String.format("%s " + CHANGE_DETECTED - + " on %s%s." + + " during %s%s." + " Expected %s got %s", getSource(), operation, positionText, revisionId, newRevisionId))); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java index 186b4a552ae06..485e057a23c57 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java @@ -26,7 +26,6 @@ import java.util.Optional; import com.amazonaws.AmazonClientException; -import com.amazonaws.AmazonServiceException; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.CopyObjectRequest; import com.amazonaws.services.s3.model.CopyObjectResult; @@ -64,12 +63,12 @@ import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBucketOverrides; +import static org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.CHANGE_DETECTED; import static org.apache.hadoop.fs.s3a.select.SelectConstants.S3_SELECT_CAPABILITY; import static org.apache.hadoop.fs.s3a.select.SelectConstants.SELECT_SQL; import static org.apache.hadoop.test.LambdaTestUtils.eventually; import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.apache.hadoop.test.LambdaTestUtils.interceptFuture; -import static org.apache.http.HttpStatus.SC_PRECONDITION_FAILED; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.when; @@ -219,9 +218,11 @@ protected Configuration createConfiguration() { CHANGE_DETECT_SOURCE, CHANGE_DETECT_MODE, RETRY_LIMIT, - RETRY_INTERVAL); + RETRY_INTERVAL, + METADATASTORE_AUTHORITATIVE); conf.set(CHANGE_DETECT_SOURCE, changeDetectionSource); conf.set(CHANGE_DETECT_MODE, changeDetectionMode); + conf.setBoolean(METADATASTORE_AUTHORITATIVE, false); // reduce retry limit so FileNotFoundException cases timeout faster, // speeding up the tests @@ -418,8 +419,8 @@ public void testSelectEventuallyConsistentFile() throws Throwable { .get() .close(); - // select() makes a getFileStaus() call before the consistency checking - // that will match the stub. As such, we need an extra inconsistency here + // select() makes a getFileStatus() call before the consistency checking + // that will match the stub. As such, we need an extra inconsistency here // to cross the threshold int getMetadataInconsistencyCount = TEST_MAX_RETRIES + 2; final Path testpath2 = writeEventuallyConsistentFileVersion( @@ -564,15 +565,26 @@ public void testRenameEventuallyConsistentFileNPE() throws Throwable { // the configured retry limit // the expected exception is not RemoteFileChangedException due to // https://github.com/aws/aws-sdk-java/issues/1644 + // If this test is failing after an AWS SDK update, + // then it means the SDK bug is fixed. + // Please update this test to match the new behavior. AWSClientIOException exception = intercept(AWSClientIOException.class, "Unable to complete transfer: null", "expected copy() failure", () -> fs.rename(testpath2, dest2)); AmazonClientException cause = exception.getCause(); - assertNotNull(cause); + if (cause == null) { + // no cause; something else went wrong: throw. + throw new AssertionError("No inner cause", + exception); + } Throwable causeCause = cause.getCause(); - assertTrue(causeCause instanceof NullPointerException); + if (!(causeCause instanceof NullPointerException)) { + // null causeCause or it is the wrong type: throw + throw new AssertionError("Innermost cause is not NPE", + exception); + } } else { fs.rename(testpath2, dest2); } @@ -610,7 +622,7 @@ public void testRenameEventuallyConsistentFileRFCE() throws Throwable { // should fail since the inconsistency is set up to persist longer than // the configured retry limit intercept(RemoteFileChangedException.class, - "change detected on copy", + CHANGE_DETECTED, "expected copy() failure", () -> fs.rename(testpath2, dest2)); } else { From eb84a0654dbc9502b4451cb158e33e591a00c11b Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Fri, 17 May 2019 14:10:13 +0100 Subject: [PATCH 12/13] HADOOP-16085: test extensions and s3guard bucket-info * added auth mode as a param on tests; switch half the existing tests over to it (so improve coverage without adding new runs) * javadocs for the tests to help maintenance and debugging in future * updated test docs to cover versioned buckets * s3guard bucket info prints change mode/policy (and signing policy, while I am there) * and updated examples of bucket-info to be consistent with the current output. Change-Id: Ib2577da02f5d91df427a0359e6ccca2add4894da --- .../org/apache/hadoop/fs/s3a/Listing.java | 27 ++-- .../hadoop/fs/s3a/s3guard/S3GuardTool.java | 6 +- .../markdown/tools/hadoop-aws/committers.md | 9 +- .../tools/hadoop-aws/delegation_tokens.md | 5 +- .../site/markdown/tools/hadoop-aws/s3guard.md | 4 + .../site/markdown/tools/hadoop-aws/testing.md | 18 +++ .../fs/s3a/ITestS3ARemoteFileChanged.java | 125 ++++++++++++------ 7 files changed, 136 insertions(+), 58 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java index c0ef226c506f9..1353d06385bb5 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java @@ -33,14 +33,13 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; -import java.util.HashSet; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.ListIterator; +import java.util.Map; import java.util.NoSuchElementException; -import java.util.Optional; import java.util.Set; -import java.util.stream.StreamSupport; import static org.apache.hadoop.fs.s3a.Constants.S3N_FOLDER_SUFFIX; import static org.apache.hadoop.fs.s3a.S3AUtils.createFileStatus; @@ -320,7 +319,7 @@ class FileStatusListingIterator /** Iterator over the current set of results. */ private ListIterator statusBatchIterator; - private final Set providedStatus; + private final Map providedStatus; private Iterator providedStatusIterator; /** @@ -341,11 +340,12 @@ class FileStatusListingIterator this.source = source; this.filter = filter; this.acceptor = acceptor; - this.providedStatus = new HashSet<>(); + this.providedStatus = new HashMap<>(); for (; providedStatus != null && providedStatus.hasNext();) { final S3AFileStatus status = providedStatus.next(); - if (filter.accept(status.getPath()) && acceptor.accept(status)) { - this.providedStatus.add(status); + Path path = status.getPath(); + if (filter.accept(path) && acceptor.accept(status)) { + this.providedStatus.put(path, status); } } // build the first set of results. This will not trigger any @@ -378,7 +378,7 @@ private boolean sourceHasNext() throws IOException { // turn to file status that are only in provided list if (providedStatusIterator == null) { LOG.debug("Start iterating the provided status."); - providedStatusIterator = providedStatus.iterator(); + providedStatusIterator = providedStatus.values().iterator(); } return false; } @@ -390,20 +390,17 @@ public S3AFileStatus next() throws IOException { final S3AFileStatus status; if (sourceHasNext()) { status = statusBatchIterator.next(); - // We remove from provided list the file status listed by S3 so that + // We remove from provided map the file status listed by S3 so that // this does not return duplicate items. // The provided status is returned as it is assumed to have the better // metadata (i.e. the eTag and versionId from S3Guard) - Optional provided = - StreamSupport.stream(providedStatus.spliterator(), false) - .filter(element -> status.equals(element)).findFirst(); - if (provided.isPresent()) { - providedStatus.remove(status); + S3AFileStatus provided = providedStatus.remove(status.getPath()); + if (provided != null) { LOG.debug( "Removed and returned the status from provided file status {}", status); - return provided.get(); + return provided; } } else { if (providedStatusIterator.hasNext()) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java index 0458544832e3b..448ea9213f5b0 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java @@ -1166,7 +1166,7 @@ public int run(String[] args, PrintStream out) magic ? "is" : "is not"); println(out, "%nS3A Client"); - + printOption(out, "\tSigning Algorithm", SIGNING_ALGORITHM, "(unset)"); String endpoint = conf.getTrimmed(ENDPOINT, ""); println(out, "\tEndpoint: %s=%s", ENDPOINT, @@ -1175,6 +1175,10 @@ public int run(String[] args, PrintStream out) printOption(out, "\tEncryption", SERVER_SIDE_ENCRYPTION_ALGORITHM, "none"); printOption(out, "\tInput seek policy", INPUT_FADVISE, INPUT_FADV_NORMAL); + printOption(out, "\tChange Detection Source", CHANGE_DETECT_SOURCE, + CHANGE_DETECT_SOURCE_DEFAULT); + printOption(out, "\tChange Detection Mode", CHANGE_DETECT_MODE, + CHANGE_DETECT_MODE_DEFAULT); // look at delegation token support if (fs.getDelegationTokens().isPresent()) { diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committers.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committers.md index 09e123d6eda10..ef9c999359b0c 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committers.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committers.md @@ -690,10 +690,15 @@ Filesystem s3a://landsat-pds is not using S3Guard The "magic" committer is not supported S3A Client - Endpoint: fs.s3a.endpoint=(unset) + Signing Algorithm: fs.s3a.signing-algorithm=(unset) + Endpoint: fs.s3a.endpoint=s3.amazonaws.com Encryption: fs.s3a.server-side-encryption-algorithm=none Input seek policy: fs.s3a.experimental.input.fadvise=normal -2017-09-27 19:18:57,917 INFO util.ExitUtil: Exiting with status 46: 46: The magic committer is not enabled for s3a://landsat-pds + Change Detection Source: fs.s3a.change.detection.source=etag + Change Detection Mode: fs.s3a.change.detection.mode=server +Delegation token support is disabled +2019-05-17 13:53:38,245 [main] INFO util.ExitUtil (ExitUtil.java:terminate(210)) - + Exiting with status 46: 46: The magic committer is not enabled for s3a://landsat-pds ``` ## Error message: "File being created has a magic path, but the filesystem has magic file support disabled: diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/delegation_tokens.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/delegation_tokens.md index 30226f85eb9b7..aad3f355b2de6 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/delegation_tokens.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/delegation_tokens.md @@ -522,12 +522,15 @@ $ hadoop s3guard bucket-info s3a://landsat-pds/ Filesystem s3a://landsat-pds Location: us-west-2 Filesystem s3a://landsat-pds is not using S3Guard -The "magic" committer is supported +The "magic" committer is not supported S3A Client + Signing Algorithm: fs.s3a.signing-algorithm=(unset) Endpoint: fs.s3a.endpoint=s3.amazonaws.com Encryption: fs.s3a.server-side-encryption-algorithm=none Input seek policy: fs.s3a.experimental.input.fadvise=normal + Change Detection Source: fs.s3a.change.detection.source=etag + Change Detection Mode: fs.s3a.change.detection.mode=server Delegation Support enabled: token kind = S3ADelegationToken/Session Hadoop security mode: SIMPLE ``` diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md index 2fbc621ed27b0..a766abc616be9 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md @@ -644,9 +644,13 @@ Metadata Store Diagnostics: The "magic" committer is supported S3A Client + Signing Algorithm: fs.s3a.signing-algorithm=(unset) Endpoint: fs.s3a.endpoint=s3-eu-west-1.amazonaws.com Encryption: fs.s3a.server-side-encryption-algorithm=none Input seek policy: fs.s3a.experimental.input.fadvise=normal + Change Detection Source: fs.s3a.change.detection.source=etag + Change Detection Mode: fs.s3a.change.detection.mode=server +Delegation token support is disabled ``` This listing includes all the information about the table supplied from diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md index 707694c8c6137..f61b46a8da6ea 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md @@ -298,6 +298,24 @@ plugin: ```bash mvn surefire-report:failsafe-report-only ``` +## Testing Versioned Stores + +Some tests (specifically some in `ITestS3ARemoteFileChanged`) require +a versioned bucket for full test coverage as well as S3Guard being enabled. + +To enable versioning in a bucket. + +1. In the AWS S3 Management console find and select the bucket. +1. In the Properties "tab", set it as versioned. +1. Important Create a lifecycle rule to automatically clean up old versions +after 24h. This avoids running up bills for objects which tests runs create and +then delete. +1. Run the tests again. + +Once a bucket is converted to being versioned, it cannot be converted back +to being unversioned. + + ## Scale Tests There are a set of tests designed to measure the scalability and performance diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java index 485e057a23c57..dd750334524ea 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java @@ -74,6 +74,44 @@ /** * Test S3A remote file change detection. + * This is a very parameterized test; the first three parameters + * define configuration options for the tests, while the final one + * declares the expected outcomes given those options. + * + * This test uses mocking to insert transient failures into the S3 client, + * underneath the S3A Filesystem instance. + * + * This is used to simulate eventual consistency, so force the change policy + * failure modes to be encountered. + * + * If changes are made to the filesystem such that the number of calls to + * operations such as {@link S3AFileSystem#getObjectMetadata(Path)} are + * changed, the number of failures which the mock layer must generate may + * change. + * + * As the S3Guard auth mode flag does control whether or not a HEAD is issued + * in a call to {@code getFileStatus()}; the test parameter {@link #authMode} + * is used to help predict this count. + * + * Important: if you are seeing failures in this test after changing + * one of the rename/copy/open operations, it may be that an increase, + * decrease or change in the number of low-level S3 HEAD/GET operations is + * triggering the failures. + * Please review the changes to see that you haven't unintentionally done this. + * If it is intentional, please update the parameters here. + * + * If you are seeing failures without such a change, and nobody else is, + * it is likely that you have a different bucket configuration option which + * is somehow triggering a regression. If you can work out which option + * this is, then extend {@link #createConfiguration()} to reset that parameter + * too. + * + * Note: to help debug these issues, set the log for this to DEBUG: + *
    + *   log4j.logger.org.apache.hadoop.fs.s3a.ITestS3ARemoteFileChanged=DEBUG
    + * 
    + * The debug information printed will include a trace of where operations + * are being called from, to help understand why the test is failing. */ @RunWith(Parameterized.class) public class ITestS3ARemoteFileChanged extends AbstractS3ATestBase { @@ -105,14 +143,26 @@ private enum InteractionType { private final String changeDetectionSource; private final String changeDetectionMode; + private final boolean authMode; private final Collection expectedExceptionInteractions; private S3AFileSystem fs; - @Parameterized.Parameters(name = "{0}-{1}") + /** + * Test parameters. + *
      + *
    1. Change detection source: etag or version.
    2. + *
    3. Change detection policy: server, client, client+warn, none
    4. + *
    5. Whether to enable auth mode on the filesystem.
    6. + *
    7. Expected outcomes.
    8. + *
    + * @return the test configuration. + */ + @Parameterized.Parameters(name = "{0}-{1}-auth-{2}") public static Collection params() { return Arrays.asList(new Object[][]{ // make sure it works with invalid config {"bogus", "bogus", + true, Arrays.asList( InteractionType.READ, InteractionType.READ_AFTER_DELETE, @@ -125,6 +175,7 @@ public static Collection params() { // test with etag {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_SERVER, + true, Arrays.asList( InteractionType.READ, InteractionType.READ_AFTER_DELETE, @@ -135,6 +186,7 @@ public static Collection params() { InteractionType.SELECT, InteractionType.EVENTUALLY_CONSISTENT_SELECT)}, {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_CLIENT, + false, Arrays.asList( InteractionType.READ, InteractionType.EVENTUALLY_CONSISTENT_READ, @@ -147,9 +199,11 @@ public static Collection params() { InteractionType.SELECT, InteractionType.EVENTUALLY_CONSISTENT_SELECT)}, {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_WARN, + false, Arrays.asList( InteractionType.READ_AFTER_DELETE)}, {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_NONE, + false, Arrays.asList( InteractionType.READ_AFTER_DELETE)}, @@ -157,6 +211,7 @@ public static Collection params() { // when using server-side versionId, the exceptions // shouldn't happen since the previous version will still be available {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_SERVER, + true, Arrays.asList( InteractionType.EVENTUALLY_CONSISTENT_READ, InteractionType.EVENTUALLY_CONSISTENT_COPY, @@ -165,6 +220,7 @@ public static Collection params() { // with client-side versionId it will behave similar to client-side eTag {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_CLIENT, + false, Arrays.asList( InteractionType.READ, InteractionType.READ_AFTER_DELETE, @@ -178,9 +234,11 @@ public static Collection params() { InteractionType.EVENTUALLY_CONSISTENT_SELECT)}, {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_WARN, + true, Arrays.asList( InteractionType.READ_AFTER_DELETE)}, {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_NONE, + false, Arrays.asList( InteractionType.READ_AFTER_DELETE)} }); @@ -188,16 +246,21 @@ public static Collection params() { public ITestS3ARemoteFileChanged(String changeDetectionSource, String changeDetectionMode, + boolean authMode, Collection expectedExceptionInteractions) { this.changeDetectionSource = changeDetectionSource; this.changeDetectionMode = changeDetectionMode; + this.authMode = authMode; this.expectedExceptionInteractions = expectedExceptionInteractions; } @Override public void setup() throws Exception { super.setup(); + // skip all versioned checks if the remote FS doesn't do + // versions. fs = getFileSystem(); + skipIfVersionPolicyAndNoVersionId(); // cache the original S3 client for teardown. originalS3Client = Optional.of( fs.getAmazonS3ClientForTesting("caching")); @@ -222,7 +285,7 @@ protected Configuration createConfiguration() { METADATASTORE_AUTHORITATIVE); conf.set(CHANGE_DETECT_SOURCE, changeDetectionSource); conf.set(CHANGE_DETECT_MODE, changeDetectionMode); - conf.setBoolean(METADATASTORE_AUTHORITATIVE, false); + conf.setBoolean(METADATASTORE_AUTHORITATIVE, authMode); // reduce retry limit so FileNotFoundException cases timeout faster, // speeding up the tests @@ -231,6 +294,7 @@ protected Configuration createConfiguration() { if (conf.getClass(S3_METADATA_STORE_IMPL, MetadataStore.class) == NullMetadataStore.class) { + LOG.debug("Enabling local S3Guard metadata store"); // favor LocalMetadataStore over NullMetadataStore conf.setClass(S3_METADATA_STORE_IMPL, LocalMetadataStore.class, MetadataStore.class); @@ -248,6 +312,15 @@ protected Path path() throws IOException { return super.path(getMethodName()); } + /** + * How many HEAD requests are made in a call to + * {@link S3AFileSystem#getFileStatus(Path)}? + * @return a number >= 0. + */ + private int getFileStatusHeadCount() { + return authMode ? 0 : 1; + } + /** * Tests reading a file that is changed while the reader's InputStream is * open. @@ -265,8 +338,6 @@ public void testReadFileChangedStreamOpen() throws Throwable { writeDataset(fs, testpath, originalDataset, originalDataset.length, 1024, false); - skipIfVersionPolicyAndNoVersionId(testpath); - try(FSDataInputStream instream = fs.open(testpath)) { // seek forward and read successfully instream.seek(1024); @@ -344,8 +415,6 @@ public void testReadFileChangedStreamOpen() throws Throwable { public void testReadFileChangedOutOfSyncMetadata() throws Throwable { final Path testpath = writeOutOfSyncFileVersion("fileChangedOutOfSync.dat"); - skipIfVersionPolicyAndNoVersionId(testpath); - try (final FSDataInputStream instream = fs.open(testpath)) { if (expectedExceptionInteractions.contains(InteractionType.READ)) { expectReadFailure(instream); @@ -363,8 +432,6 @@ public void testReadFileChangedOutOfSyncMetadata() throws Throwable { public void testReadWithNoVersionMetadata() throws Throwable { final Path testpath = writeFileWithNoVersionMetadata("readnoversion.dat"); - skipIfVersionPolicyAndNoVersionId(testpath); - assertEquals("Contents of " + testpath, TEST_DATA, readUTF8(fs, testpath, -1)); @@ -379,8 +446,6 @@ public void testSelectChangedFile() throws Throwable { requireS3Select(); final Path testpath = writeOutOfSyncFileVersion("select.dat"); - skipIfVersionPolicyAndNoVersionId(testpath); - if (expectedExceptionInteractions.contains(InteractionType.SELECT)) { interceptFuture(RemoteFileChangedException.class, "select", fs.openFile(testpath) @@ -409,8 +474,6 @@ public void testSelectEventuallyConsistentFile() throws Throwable { final Path testpath1 = writeEventuallyConsistentFileVersion( "select1.dat", s3ClientSpy, 0, TEST_MAX_RETRIES, 0); - skipIfVersionPolicyAndNoVersionId(testpath1); - // should succeed since the inconsistency doesn't last longer than the // configured retry limit fs.openFile(testpath1) @@ -452,8 +515,6 @@ public void testSelectWithNoVersionMetadata() throws Throwable { final Path testpath = writeFileWithNoVersionMetadata("selectnoversion.dat"); - skipIfVersionPolicyAndNoVersionId(testpath); - try (FSDataInputStream instream = fs.openFile(testpath) .must(SELECT_SQL, "SELECT * FROM S3OBJECT").build().get()) { assertEquals(QUOTED_TEST_DATA, @@ -470,8 +531,6 @@ public void testSelectWithNoVersionMetadata() throws Throwable { public void testRenameChangedFile() throws Throwable { final Path testpath = writeOutOfSyncFileVersion("rename.dat"); - skipIfVersionPolicyAndNoVersionId(testpath); - final Path dest = path("dest.dat"); if (expectedExceptionInteractions.contains(InteractionType.COPY)) { intercept(RemoteFileChangedException.class, "", @@ -513,7 +572,8 @@ public void testRenameEventuallyConsistentFile() throws Throwable { // copyObject(). // The split of inconsistent responses between getObjectMetadata() and // copyObject() is arbitrary. - Pair counts = renameInconsistencyCounts(1); + Pair counts = renameInconsistencyCounts( + getFileStatusHeadCount()); int metadataInconsistencyCount = counts.getLeft(); int copyInconsistencyCount = counts.getRight(); final Path testpath1 = @@ -523,8 +583,6 @@ public void testRenameEventuallyConsistentFile() throws Throwable { metadataInconsistencyCount, copyInconsistencyCount); - skipIfVersionPolicyAndNoVersionId(testpath1); - final Path dest1 = path("dest1.dat"); // shouldn't fail since the inconsistency doesn't last through the // configured retry limit @@ -541,13 +599,11 @@ public void testRenameEventuallyConsistentFile() throws Throwable { @Test public void testRenameEventuallyConsistentFileNPE() throws Throwable { requireS3Guard(); - AmazonS3 s3ClientSpy = Mockito.spy( - fs.getAmazonS3ClientForTesting("mocking")); - fs.setAmazonS3Client(s3ClientSpy); - skipIfVersionPolicyAndNoVersionId(); + AmazonS3 s3ClientSpy = spyOnFilesystem(); - Pair counts = renameInconsistencyCounts(1); + Pair counts = renameInconsistencyCounts( + getFileStatusHeadCount()); int metadataInconsistencyCount = counts.getLeft(); int copyInconsistencyCount = counts.getRight(); // giving copyInconsistencyCount + 1 here should trigger the failure, @@ -599,13 +655,11 @@ public void testRenameEventuallyConsistentFileNPE() throws Throwable { @Test public void testRenameEventuallyConsistentFileRFCE() throws Throwable { requireS3Guard(); - AmazonS3 s3ClientSpy = Mockito.spy( - fs.getAmazonS3ClientForTesting("mocking")); - fs.setAmazonS3Client(s3ClientSpy); - skipIfVersionPolicyAndNoVersionId(); + AmazonS3 s3ClientSpy = spyOnFilesystem(); - Pair counts = renameInconsistencyCounts(1); + Pair counts = renameInconsistencyCounts( + getFileStatusHeadCount()); int metadataInconsistencyCount = counts.getLeft(); int copyInconsistencyCount = counts.getRight(); // giving metadataInconsistencyCount + 1 here should trigger the failure, @@ -633,6 +687,9 @@ public void testRenameEventuallyConsistentFileRFCE() throws Throwable { /** * Tests doing a rename() on a directory containing * an file which is eventually consistent. + * There is no call to getFileStatus on the source file whose + * inconsistency is simulated; the state of S3Guard auth mode is not + * relevant. */ @Test public void testRenameEventuallyConsistentDirectory() throws Throwable { @@ -663,8 +720,6 @@ public void testRenameEventuallyConsistentDirectory() throws Throwable { metadataInconsistencyCount, copyInconsistencyCount); - skipIfVersionPolicyAndNoVersionId(inconsistentFile); - // must not fail since the inconsistency doesn't last through the // configured retry limit fs.rename(sourcedir, destdir); @@ -679,8 +734,6 @@ public void testRenameWithNoVersionMetadata() throws Throwable { final Path testpath = writeFileWithNoVersionMetadata("renamenoversion.dat"); - skipIfVersionPolicyAndNoVersionId(testpath); - final Path dest = path("noversiondest.dat"); fs.rename(testpath, dest); assertEquals("Contents of " + dest, @@ -699,8 +752,6 @@ public void testReadAfterEventuallyConsistentWrite() throws Throwable { writeEventuallyConsistentFileVersion("eventually1.dat", s3ClientSpy, TEST_MAX_RETRIES, 0 , 0); - skipIfVersionPolicyAndNoVersionId(testpath1); - try (FSDataInputStream instream1 = fs.open(testpath1)) { // succeeds on the last retry instream1.read(); @@ -718,8 +769,6 @@ public void testReadAfterEventuallyConsistentWrite2() throws Throwable { writeEventuallyConsistentFileVersion("eventually2.dat", s3ClientSpy, TEST_MAX_RETRIES + 1, 0, 0); - skipIfVersionPolicyAndNoVersionId(testpath2); - try (FSDataInputStream instream2 = fs.open(testpath2)) { if (expectedExceptionInteractions.contains( InteractionType.EVENTUALLY_CONSISTENT_READ)) { @@ -745,8 +794,6 @@ public void testEventuallyConsistentReadOnReopen() throws Throwable { writeEventuallyConsistentFileVersion(filename, s3ClientSpy, 0, 0, 0); - skipIfVersionPolicyAndNoVersionId(testpath); - try (FSDataInputStream instream = fs.open(testpath)) { instream.read(); // overwrite the file, returning inconsistent version for From 3f9a56f8c5f271dfeb12a116ab85427bddcf3ee4 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Fri, 17 May 2019 14:29:03 +0100 Subject: [PATCH 13/13] HADOOP-16085: fix checkstyle complaints other those in javadocs and trivial >80 char line width Change-Id: I4782ae8d7e4da74251f30b81589f67954869127d --- .../java/org/apache/hadoop/fs/s3a/Listing.java | 2 +- .../org/apache/hadoop/fs/s3a/S3AFileStatus.java | 1 - .../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 12 ++++++------ .../hadoop/fs/s3a/impl/ChangeDetectionPolicy.java | 9 ++++++--- .../fs/s3a/s3guard/DynamoDBMetadataStore.java | 14 +++++++------- .../apache/hadoop/fs/s3a/s3guard/PathMetadata.java | 1 - .../s3guard/PathMetadataDynamoDBTranslation.java | 1 - .../hadoop/fs/s3a/ITestS3ARemoteFileChanged.java | 2 +- .../hadoop/fs/s3a/ITestS3GuardListConsistency.java | 1 - .../fs/s3a/s3guard/MetadataStoreTestBase.java | 6 ++---- .../TestObjectChangeDetectionAttributes.java | 4 ++-- 11 files changed, 25 insertions(+), 28 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java index 1353d06385bb5..0f8c52be186a1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java @@ -202,7 +202,7 @@ static final class SingleStatusRemoteIterator * @param status status value: may be null, in which case * the iterator is empty. */ - public SingleStatusRemoteIterator(S3ALocatedFileStatus status) { + SingleStatusRemoteIterator(S3ALocatedFileStatus status) { this.status = status; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java index b45e67ed7efd9..e8ff846f20fd0 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java @@ -21,7 +21,6 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsPermission; /** * File status for an S3A "file". diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index e6fbd50541903..e6850e9e7c5f5 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -1367,7 +1367,7 @@ public ObjectMetadata getObjectMetadata(Path path) throws IOException { * Low-level call to get at the object metadata. * @param path path to the object * @param changeTracker the change tracker to detect version inconsistencies - * @param invoker the invoker providing the retry policy + * @param changeInvoker the invoker providing the retry policy * @param operation the operation being performed (e.g. "read" or "copy") * @return metadata * @throws IOException IO and object access problems. @@ -1375,13 +1375,13 @@ public ObjectMetadata getObjectMetadata(Path path) throws IOException { @VisibleForTesting @Retries.RetryTranslated public ObjectMetadata getObjectMetadata(Path path, - ChangeTracker changeTracker, Invoker invoker, String operation) + ChangeTracker changeTracker, Invoker changeInvoker, String operation) throws IOException { return once("getObjectMetadata", path.toString(), () -> // this always does a full HEAD to the object getObjectMetadata( - pathToKey(path), changeTracker, invoker, operation)); + pathToKey(path), changeTracker, changeInvoker, operation)); } /** @@ -1561,7 +1561,7 @@ protected ObjectMetadata getObjectMetadata(String key) throws IOException { * Uses changeTracker to detect an unexpected file version (eTag or versionId) * @param key key * @param changeTracker the change tracker to detect unexpected object version - * @param invoker the invoker providing the retry policy + * @param changeInvoker the invoker providing the retry policy * @param operation the operation (e.g. "read" or "copy") triggering this call * @return the metadata * @throws IOException if the retry invocation raises one (it shouldn't). @@ -1570,13 +1570,13 @@ protected ObjectMetadata getObjectMetadata(String key) throws IOException { @Retries.RetryRaw protected ObjectMetadata getObjectMetadata(String key, ChangeTracker changeTracker, - Invoker invoker, + Invoker changeInvoker, String operation) throws IOException { GetObjectMetadataRequest request = new GetObjectMetadataRequest(bucket, key); //SSE-C requires to be filled in if enabled for object metadata generateSSECustomerKey().ifPresent(request::setSSECustomerKey); - ObjectMetadata meta = invoker.retryUntranslated("GET " + key, true, + ObjectMetadata meta = changeInvoker.retryUntranslated("GET " + key, true, () -> { incrementStatistic(OBJECT_METADATA_REQUESTS); if (changeTracker != null) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java index 38dfb74586431..b0e9d6f454426 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java @@ -355,7 +355,8 @@ public void applyRevisionConstraint(CopyObjectRequest request, } @Override - public void applyRevisionConstraint(GetObjectMetadataRequest request, String revisionId) { + public void applyRevisionConstraint(GetObjectMetadataRequest request, + String revisionId) { // GetObjectMetadataRequest doesn't support eTag qualification } @@ -427,7 +428,8 @@ public void applyRevisionConstraint(CopyObjectRequest request, } @Override - public void applyRevisionConstraint(GetObjectMetadataRequest request, String revisionId) { + public void applyRevisionConstraint(GetObjectMetadataRequest request, + String revisionId) { if (revisionId != null) { LOG.debug("Restricting metadata request to version {}", revisionId); request.withVersionId(revisionId); @@ -488,7 +490,8 @@ public void applyRevisionConstraint(CopyObjectRequest request, } @Override - public void applyRevisionConstraint(GetObjectMetadataRequest request, String revisionId) { + public void applyRevisionConstraint(GetObjectMetadataRequest request, + String revisionId) { } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java index 289abf258a9cf..a9e1f3368990c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java @@ -627,15 +627,15 @@ private DDBPathMetadata innerGet(Path path, boolean wantEmptyDirectoryFlag) } /** - * Make a FileStatus object for a directory at given path. The FileStatus - * only contains what S3A needs, and omits mod time since S3A uses its own - * implementation which returns current system time. - * @param owner username of owner + * Make a S3AFileStatus object for a directory at given path. + * The FileStatus only contains what S3A needs, and omits mod time + * since S3A uses its own implementation which returns current system time. + * @param dirOwner username of owner * @param path path to dir - * @return new FileStatus + * @return new S3AFileStatus */ - private S3AFileStatus makeDirStatus(String owner, Path path) { - return new S3AFileStatus(Tristate.UNKNOWN, path, owner); + private S3AFileStatus makeDirStatus(String dirOwner, Path path) { + return new S3AFileStatus(Tristate.UNKNOWN, path, dirOwner); } @Override diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java index 5c00cdc5df706..8824439db7822 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java @@ -21,7 +21,6 @@ import com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.Tristate; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java index 7c168b9370961..c9559ec1517f7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java @@ -40,7 +40,6 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.Constants; import org.apache.hadoop.fs.s3a.S3AFileStatus; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java index dd750334524ea..c345a1f9da745 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java @@ -415,7 +415,7 @@ public void testReadFileChangedStreamOpen() throws Throwable { public void testReadFileChangedOutOfSyncMetadata() throws Throwable { final Path testpath = writeOutOfSyncFileVersion("fileChangedOutOfSync.dat"); - try (final FSDataInputStream instream = fs.open(testpath)) { + try (FSDataInputStream instream = fs.open(testpath)) { if (expectedExceptionInteractions.contains(InteractionType.READ)) { expectReadFailure(instream); } else { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java index dd5b50816c901..c90dd7c63383e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java @@ -30,7 +30,6 @@ import org.apache.hadoop.fs.contract.AbstractFSContract; import org.apache.hadoop.fs.contract.s3a.S3AContract; -import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import org.junit.Assume; import org.junit.Test; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java index ced2e76cb53fd..47544f4eb6204 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java @@ -958,8 +958,7 @@ S3AFileStatus basicFileStatus(int size, boolean isDir, long blockSize, long modificationTime, Path path) { if (isDir) { return new S3AFileStatus(Tristate.UNKNOWN, path, null); - } - else { + } else { return new S3AFileStatus(size, modificationTime, path, blockSize, null, null, null); } @@ -969,8 +968,7 @@ public static S3AFileStatus basicFileStatus(Path path, int size, boolean isDir, long newModTime) throws IOException { if (isDir) { return new S3AFileStatus(Tristate.UNKNOWN, path, OWNER); - } - else { + } else { return new S3AFileStatus(size, newModTime, path, BLOCK_SIZE, OWNER, null, null); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestObjectChangeDetectionAttributes.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestObjectChangeDetectionAttributes.java index f24f63daaa197..f001262b36a04 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestObjectChangeDetectionAttributes.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestObjectChangeDetectionAttributes.java @@ -300,7 +300,7 @@ public void describeTo(Description description) { } private Matcher - correctInitiateMultipartUploadRequest(final String key) { + correctInitiateMultipartUploadRequest(final String key) { return new BaseMatcher() { @Override public void describeTo(Description description) { @@ -320,7 +320,7 @@ public boolean matches(Object item) { } private Matcher - correctMultipartUploadRequest(final String key) { + correctMultipartUploadRequest(final String key) { return new BaseMatcher() { @Override public boolean matches(Object item) {