From 2107f347f0bb54f6f9ca02f7e9579e541ce1ad55 Mon Sep 17 00:00:00 2001 From: Mukund Thakur Date: Sun, 9 Feb 2020 14:40:53 +0530 Subject: [PATCH 1/5] HADOOP-16711 Add way to skip verifyBuckets check in S3A fs init() --- .../org/apache/hadoop/fs/s3a/Constants.java | 14 +++ .../apache/hadoop/fs/s3a/S3AFileSystem.java | 49 +++++++- .../markdown/tools/hadoop-aws/performance.md | 24 ++++ .../hadoop/fs/s3a/AbstractS3AMockTest.java | 1 + .../fs/s3a/ITestS3ABucketExistence.java | 118 ++++++++++++++++++ .../hadoop/fs/s3a/MockS3ClientFactory.java | 1 + 6 files changed, 204 insertions(+), 3 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index e107d4987f0da..086321f9a2265 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -20,8 +20,10 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; +import java.net.URI; import java.util.concurrent.TimeUnit; /** @@ -422,6 +424,18 @@ private Constants() { "fs.s3a.metadatastore.authoritative"; public static final boolean DEFAULT_METADATASTORE_AUTHORITATIVE = false; + /** + * Bucket validation parameter which can be set by client. This will be + * used in {@link S3AFileSystem#initialize(URI, Configuration)} + */ + public static final String S3A_BUCKET_PROBE = "fs.s3a.bucket.probe"; + + /** + * Default value of bucket validation parameter. An existence of bucket + * will be validated using {@link S3AFileSystem#verifyBucketExistsV2()} + */ + public static final int S3A_BUCKET_PROBE_DEFAULT = 2; + /** * How long a directory listing in the MS is considered as authoritative. */ diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index cc12848df9ea9..91f8b0c6ba436 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -384,9 +384,7 @@ public void initialize(URI name, Configuration originalConf) initCannedAcls(conf); // This initiates a probe against S3 for the bucket existing. - // It is where all network and authentication configuration issues - // surface, and is potentially slow. - verifyBucketExists(); + doBucketProbing(); inputPolicy = S3AInputPolicy.getPolicy( conf.getTrimmed(INPUT_FADVISE, INPUT_FADV_NORMAL)); @@ -452,6 +450,35 @@ public void initialize(URI name, Configuration originalConf) } + /** + * Test bucket existence in S3. + * When value of {@link Constants#S3A_BUCKET_PROBE is set to 0 by client, + * bucket existence check is not done to improve performance of + * S3AFileSystem initialization. When set to 1 or 2, bucket existence check + * will be performed which is potentially slow. + * @throws IOException + */ + @Retries.RetryTranslated + private void doBucketProbing() throws IOException { + int bucketProbe = this.getConf() + .getInt(S3A_BUCKET_PROBE, S3A_BUCKET_PROBE_DEFAULT); + Preconditions.checkArgument(bucketProbe >= 0 && bucketProbe <= 2, + "Value of " + S3A_BUCKET_PROBE + " should be between 0 to 2"); + switch (bucketProbe) { + case 0: + break; + case 1: + verifyBucketExists(); + break; + case 2: + verifyBucketExistsV2(); + break; + default: + //This will never get executed because of above Precondition check. + break; + } + } + /** * Initialize the thread pool. * This must be re-invoked after replacing the S3Client during test @@ -511,6 +538,22 @@ protected void verifyBucketExists() } } + /** + * Verify that the bucket exists. This will correctly throw an exception + * when credentials are invalid. + * Retry policy: retrying, translated. + * @throws FileNotFoundException the bucket is absent + * @throws IOException any other problem talking to S3 + */ + @Retries.RetryTranslated + protected void verifyBucketExistsV2() + throws FileNotFoundException, IOException { + if (!invoker.retry("doesBucketExistV2", bucket, true, + () -> s3.doesBucketExistV2(bucket))) { + throw new FileNotFoundException("Bucket " + bucket + " does not exist"); + } + } + /** * Get S3A Instrumentation. For test purposes. * @return this instance's instrumentation. diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md index 5543263471e78..1dba0480c67c3 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md @@ -608,3 +608,27 @@ with HADOOP-15669. Other options may be added to `fs.s3a.ssl.channel.mode` in the future as further SSL optimizations are made. + +## Tuning S3AFileSystem Initialization. +Any client using S3AFileSystem has to initialize it by providing a S3 bucket +and configuration. The init method checks if the bucket provided is valid +or not which is a slow operation leading poor performance. We can ignore +bucket validation by configuring `fs.s3a.bucket.probe` as follows: + +```xml + + fs.s3a.bucket.probe + 0 + + The value can be 0, 1 or 2(default). When set to 0, bucket existence + check won't be done during initialization thus making it faster. + Though it should be noted that if bucket is not available in S3, + consecutive calls like listing, put etc might fail with + FileNotFoundException. When set to 1, bucket existence check will + be done using V1 api of S3 client which doesn't verify the permissions + to read bucket. When set to 2, bucket existence check will + be done using V2 api of S3 client which doesn't verify the permissions + to read bucket. + + +``` \ No newline at end of file diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java index 886795a9d90fc..99bab73e71c33 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java @@ -75,6 +75,7 @@ public Configuration createConfiguration() { conf.setBoolean(CommitConstants.MAGIC_COMMITTER_ENABLED, true); // use minimum multipart size for faster triggering conf.setLong(Constants.MULTIPART_SIZE, MULTIPART_MIN_SIZE); + conf.setInt(Constants.S3A_BUCKET_PROBE, 1); return conf; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java new file mode 100644 index 0000000000000..f7738373324f5 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URI; +import java.util.UUID; + +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IOUtils; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; +import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A; +import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Class to test bucket existence api. + * See {@link S3AFileSystem#doBucketProbing()}. + */ +public class ITestS3ABucketExistence extends AbstractS3ATestBase { + + private FileSystem fs; + + private final String randomBucket = + "random-bucket-" + UUID.randomUUID().toString(); + + private final URI uri = URI.create(FS_S3A + "://" + randomBucket); + + @Test + public void testNoBucketProbing() throws Exception { + Configuration configuration = getConfiguration(); + configuration.setInt(S3A_BUCKET_PROBE, 0); + try { + fs = FileSystem.get(uri, configuration); + } catch (IOException ex) { + LOG.error("Exception : ", ex); + throw ex; + } + + Path path = new Path(uri); + intercept(FileNotFoundException.class, + "No such file or directory: " + path, + () -> fs.getFileStatus(path)); + + Path src = new Path(fs.getUri() + "/testfile"); + byte[] data = dataset(1024, 'a', 'z'); + intercept(FileNotFoundException.class, + "The specified bucket does not exist", + () -> writeDataset(fs, src, data, data.length, 1024 * 1024, true)); + } + + @Test + public void testBucketProbingV1() throws Exception { + Configuration configuration = getConfiguration(); + configuration.setInt(S3A_BUCKET_PROBE, 1); + intercept(FileNotFoundException.class, + () -> FileSystem.get(uri, configuration)); + } + + @Test + public void testBucketProbingV2() throws Exception { + Configuration configuration = getConfiguration(); + configuration.setInt(S3A_BUCKET_PROBE, 2); + intercept(FileNotFoundException.class, + () -> FileSystem.get(uri, configuration)); + } + + @Test + public void testBucketProbingParameterValidation() throws Exception { + Configuration configuration = getConfiguration(); + configuration.setInt(S3A_BUCKET_PROBE, 3); + intercept(IllegalArgumentException.class, + "Value of " + S3A_BUCKET_PROBE + " should be between 0 to 2", + "Should throw IllegalArgumentException", + () -> FileSystem.get(uri, configuration)); + configuration.setInt(S3A_BUCKET_PROBE, -1); + intercept(IllegalArgumentException.class, + "Value of " + S3A_BUCKET_PROBE + " should be between 0 to 2", + "Should throw IllegalArgumentException", + () -> FileSystem.get(uri, configuration)); + } + + @Override + protected Configuration getConfiguration() { + Configuration configuration = super.getConfiguration(); + S3ATestUtils.disableFilesystemCaching(configuration); + return configuration; + } + + @Override + public void teardown() throws Exception { + IOUtils.cleanupWithLogger(getLogger(), fs); + super.teardown(); + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java index 2397f6cbaface..4644cf24764ae 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java @@ -41,6 +41,7 @@ public AmazonS3 createS3Client(URI name, final String userAgentSuffix) { AmazonS3 s3 = mock(AmazonS3.class); when(s3.doesBucketExist(bucket)).thenReturn(true); + when(s3.doesBucketExistV2(bucket)).thenReturn(true); // this listing is used in startup if purging is enabled, so // return a stub value MultipartUploadListing noUploads = new MultipartUploadListing(); From 1783e7282d52f5dca141c202bac7830b7794f003 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 12 Feb 2020 20:02:47 +0000 Subject: [PATCH 2/5] HADOOP-16711 skip bucket existence check. Adds a new exception UnknownStoreException to indicate "there's no store there" * raised in verify bucket existence checks * and when translating AWS exceptions into IOEs * The S3A retry policy fails fast on this * And s3GetFileStatus recognises the same failure and raises it Except when the metastore shortcircuits S3 IO, this means all operations against a nonexistent store will fail with a unique exception. ITestS3ABucketExistence is extended to * disable metastore (getFileStatus(/) was returning a value) * always create new instances * invoke all the operations which catch and swallow FNFEs (exists, isFile, isDir, delete) Change-Id: Ide630ec9738ef971eba603b618bd612456fa064b --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 24 +++-- .../apache/hadoop/fs/s3a/S3ARetryPolicy.java | 1 + .../org/apache/hadoop/fs/s3a/S3AUtils.java | 13 +++ .../hadoop/fs/s3a/UnknownStoreException.java | 61 ++++++++++++ .../tools/hadoop-aws/troubleshooting_s3a.md | 46 ++++++--- .../fs/s3a/ITestS3ABucketExistence.java | 98 +++++++++++++------ .../src/test/resources/core-site.xml | 5 + 7 files changed, 193 insertions(+), 55 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UnknownStoreException.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 91f8b0c6ba436..451536c67d984 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -526,15 +526,15 @@ protected static S3AStorageStatistics createStorageStatistics() { * Verify that the bucket exists. This does not check permissions, * not even read access. * Retry policy: retrying, translated. - * @throws FileNotFoundException the bucket is absent + * @throws UnknownStoreException the bucket is absent * @throws IOException any other problem talking to S3 */ @Retries.RetryTranslated protected void verifyBucketExists() - throws FileNotFoundException, IOException { + throws UnknownStoreException, IOException { if (!invoker.retry("doesBucketExist", bucket, true, () -> s3.doesBucketExist(bucket))) { - throw new FileNotFoundException("Bucket " + bucket + " does not exist"); + throw new UnknownStoreException("Bucket " + bucket + " does not exist"); } } @@ -542,15 +542,15 @@ protected void verifyBucketExists() * Verify that the bucket exists. This will correctly throw an exception * when credentials are invalid. * Retry policy: retrying, translated. - * @throws FileNotFoundException the bucket is absent + * @throws UnknownStoreException the bucket is absent * @throws IOException any other problem talking to S3 */ @Retries.RetryTranslated protected void verifyBucketExistsV2() - throws FileNotFoundException, IOException { + throws UnknownStoreException, IOException { if (!invoker.retry("doesBucketExistV2", bucket, true, () -> s3.doesBucketExistV2(bucket))) { - throw new FileNotFoundException("Bucket " + bucket + " does not exist"); + throw new UnknownStoreException("Bucket " + bucket + " does not exist"); } } @@ -2890,7 +2890,9 @@ S3AFileStatus s3GetFileStatus(final Path path, } catch (AmazonServiceException e) { // if the response is a 404 error, it just means that there is // no file at that path...the remaining checks will be needed. - if (e.getStatusCode() != SC_404) { + if (e.getStatusCode() != SC_404 + || UnknownStoreException.E_NO_SUCH_BUCKET.equals( + e.getErrorCode())) { throw translateException("getFileStatus", path, e); } } catch (AmazonClientException e) { @@ -2922,7 +2924,9 @@ S3AFileStatus s3GetFileStatus(final Path path, meta.getVersionId()); } } catch (AmazonServiceException e) { - if (e.getStatusCode() != SC_404) { + if (e.getStatusCode() != SC_404 + || UnknownStoreException.E_NO_SUCH_BUCKET.equals( + e.getErrorCode())) { throw translateException("getFileStatus", newKey, e); } } catch (AmazonClientException e) { @@ -2961,7 +2965,9 @@ S3AFileStatus s3GetFileStatus(final Path path, return new S3AFileStatus(Tristate.TRUE, path, username); } } catch (AmazonServiceException e) { - if (e.getStatusCode() != SC_404) { + if (e.getStatusCode() != SC_404 + || UnknownStoreException.E_NO_SUCH_BUCKET.equals( + e.getErrorCode())) { throw translateException("getFileStatus", path, e); } } catch (AmazonClientException e) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java index 09e9c993b065b..d2954b3a92045 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java @@ -188,6 +188,7 @@ protected Map, RetryPolicy> createExceptionMap() { policyMap.put(AccessDeniedException.class, fail); policyMap.put(NoAuthWithAWSException.class, fail); policyMap.put(FileNotFoundException.class, fail); + policyMap.put(UnknownStoreException.class, fail); policyMap.put(InvalidRequestException.class, fail); // metadata stores should do retries internally when it makes sense diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index e2a488e8fed9c..9915914887b25 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -249,6 +249,19 @@ public static IOException translateException(@Nullable String operation, // the object isn't there case 404: + if (UnknownStoreException.E_NO_SUCH_BUCKET.equals( + ase.getErrorCode())) { + // this is a missing bucket + ioe = new UnknownStoreException(path, ase); + } else { + // a normal unknown object + ioe = new FileNotFoundException(message); + ioe.initCause(ase); + } + break; + + // this also surfaces sometimes and is considered to + // be ~ a not found exception. case 410: ioe = new FileNotFoundException(message); ioe.initCause(ase); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UnknownStoreException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UnknownStoreException.java new file mode 100644 index 0000000000000..7b5c16ae4e8f8 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UnknownStoreException.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * The bucket or other AWS resource is unknown. + * Why not a subclass of FileNotFoundException? + * There's too much code which caches an FNFE and infers that the file isn't there; + * a missing bucket is far more significant. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class UnknownStoreException extends IOException { + + /** + * The AWS S3 error code used to recognize when a 404 means the bucket is + * unknown. + */ + public static final String E_NO_SUCH_BUCKET = "NoSuchBucket"; + + /** + * Constructor. + * @param message message + */ + public UnknownStoreException(final String message) { + this(message, null); + } + + /** + * Constructor. + * @param message message + * @param cause cause (may be null) + */ + public UnknownStoreException(final String message, Throwable cause) { + super(message); + if (cause != null) { + initCause(cause); + } + } +} diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md index 5408c44aea4ac..c3eb424c838bb 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md @@ -1203,29 +1203,43 @@ a new one than read to the end of a large file. Note: the threshold when data is read rather than the stream aborted can be tuned by `fs.s3a.readahead.range`; seek policy in `fs.s3a.experimental.input.fadvise`. -### `FileNotFoundException` Bucket does not exist. +### `UnknownStoreException` Bucket does not exist. The bucket does not exist. ``` -java.io.FileNotFoundException: Bucket stevel45r56666 does not exist - at org.apache.hadoop.fs.s3a.S3AFileSystem.verifyBucketExists(S3AFileSystem.java:361) - at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:293) - at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3288) - at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:123) - at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3337) - at org.apache.hadoop.fs.FileSystem$Cache.getUnique(FileSystem.java:3311) - at org.apache.hadoop.fs.FileSystem.newInstance(FileSystem.java:529) - at org.apache.hadoop.fs.s3a.s3guard.S3GuardTool$BucketInfo.run(S3GuardTool.java:997) - at org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.run(S3GuardTool.java:309) - at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76) - at org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.run(S3GuardTool.java:1218) - at org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.main(S3GuardTool.java:1227) +org.apache.hadoop.fs.s3a.UnknownStoreException: + Bucket random-bucket-33013fb8-f7f7-4edb-9c26-16a6ed019184 does not exist + at org.apache.hadoop.fs.s3a.S3AFileSystem.verifyBucketExists(S3AFileSystem.java:537) + at org.apache.hadoop.fs.s3a.S3AFileSystem.doBucketProbing(S3AFileSystem.java:471) + at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:387) + at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3422) + at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:502) ``` +Check the URI is correct, and that the bucket actually exists. + +If using a third-party store, verify that you've configured +the client to talk to the specific server in `fs.s3a.endpoint`. Forgetting to update this value and +asking the AWS S3 endpoint for a bucket is not an unusual occurrence. + +This can surface during IO if the bucket has been deleted, or the startup check for bucket existence +has been disabled by setting `fs.s3a.bucket.probe` to 0. + +``` +org.apache.hadoop.fs.s3a.UnknownStoreException: s3a://random-bucket-7d9217b0-b426-4344-82ea-25d6cbb316f1/ + + at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:254) + at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:167) + at org.apache.hadoop.fs.s3a.S3AFileSystem.innerListFiles(S3AFileSystem.java:4149) + at org.apache.hadoop.fs.s3a.S3AFileSystem.listFiles(S3AFileSystem.java:3983) +Caused by: com.amazonaws.services.s3.model.AmazonS3Exception: +The specified bucket does not exist + (Service: Amazon S3; Status Code: 404; Error Code: NoSuchBucket + at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1712) + at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1367) +``` -Check the URI. If using a third-party store, verify that you've configured -the client to talk to the specific server in `fs.s3a.endpoint`. ## Other Issues diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java index f7738373324f5..d64c41e3fdc84 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java @@ -18,10 +18,9 @@ package org.apache.hadoop.fs.s3a; -import java.io.FileNotFoundException; -import java.io.IOException; import java.net.URI; import java.util.UUID; +import java.util.concurrent.Callable; import org.junit.Test; @@ -29,16 +28,18 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.test.LambdaTestUtils; import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A; import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE; +import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_METASTORE_NULL; +import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL; import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** - * Class to test bucket existence api. - * See {@link S3AFileSystem#doBucketProbing()}. + * Class to test bucket existence APIs. */ public class ITestS3ABucketExistence extends AbstractS3ATestBase { @@ -47,51 +48,88 @@ public class ITestS3ABucketExistence extends AbstractS3ATestBase { private final String randomBucket = "random-bucket-" + UUID.randomUUID().toString(); - private final URI uri = URI.create(FS_S3A + "://" + randomBucket); + private final URI uri = URI.create(FS_S3A + "://" + randomBucket + "/"); @Test public void testNoBucketProbing() throws Exception { - Configuration configuration = getConfiguration(); - configuration.setInt(S3A_BUCKET_PROBE, 0); - try { - fs = FileSystem.get(uri, configuration); - } catch (IOException ex) { - LOG.error("Exception : ", ex); - throw ex; - } - - Path path = new Path(uri); - intercept(FileNotFoundException.class, - "No such file or directory: " + path, - () -> fs.getFileStatus(path)); - - Path src = new Path(fs.getUri() + "/testfile"); + describe("Disable init-time probes and expect FS operations to fail"); + Configuration conf = createConfigurationWithProbe(0); + // metastores can bypass S3 checks, so disable S3Guard, always + conf.set(S3_METADATA_STORE_IMPL, S3GUARD_METASTORE_NULL); + + fs = FileSystem.get(uri, conf); + + Path root = new Path(uri); + + expectUnknownStore( + () -> fs.getFileStatus(root)); + + expectUnknownStore( + () -> fs.listStatus(root)); + + Path src = new Path(root, "testfile"); + expectUnknownStore( + () -> fs.getFileStatus(src)); + + // the exception must not be caught and marked down to an FNFE + expectUnknownStore(() -> fs.exists(src)); + expectUnknownStore(() -> fs.isFile(src)); + expectUnknownStore(() -> fs.isDirectory(src)); + expectUnknownStore(() -> fs.mkdirs(src)); + expectUnknownStore(() -> fs.delete(src)); + byte[] data = dataset(1024, 'a', 'z'); - intercept(FileNotFoundException.class, - "The specified bucket does not exist", + expectUnknownStore( () -> writeDataset(fs, src, data, data.length, 1024 * 1024, true)); } + /** + * Expect an operation to raise an UnknownStoreException. + * @param eval closure + * @param return type of closure + * @throws Exception anything else raised. + */ + public static void expectUnknownStore( + Callable eval) + throws Exception { + intercept(UnknownStoreException.class, eval); + } + + /** + * Expect an operation to raise an UnknownStoreException. + * @param eval closure + * @throws Exception anything else raised. + */ + public static void expectUnknownStore( + LambdaTestUtils.VoidCallable eval) + throws Exception { + intercept(UnknownStoreException.class, eval); + } + + private Configuration createConfigurationWithProbe(final int probe) { + Configuration conf = new Configuration(getFileSystem().getConf()); + S3ATestUtils.disableFilesystemCaching(conf); + conf.setInt(S3A_BUCKET_PROBE, probe); + return conf; + } + @Test public void testBucketProbingV1() throws Exception { - Configuration configuration = getConfiguration(); - configuration.setInt(S3A_BUCKET_PROBE, 1); - intercept(FileNotFoundException.class, + Configuration configuration = createConfigurationWithProbe(1); + expectUnknownStore( () -> FileSystem.get(uri, configuration)); } @Test public void testBucketProbingV2() throws Exception { - Configuration configuration = getConfiguration(); - configuration.setInt(S3A_BUCKET_PROBE, 2); - intercept(FileNotFoundException.class, + Configuration configuration = createConfigurationWithProbe(2); + expectUnknownStore( () -> FileSystem.get(uri, configuration)); } @Test public void testBucketProbingParameterValidation() throws Exception { - Configuration configuration = getConfiguration(); - configuration.setInt(S3A_BUCKET_PROBE, 3); + Configuration configuration = createConfigurationWithProbe(3); intercept(IllegalArgumentException.class, "Value of " + S3A_BUCKET_PROBE + " should be between 0 to 2", "Should throw IllegalArgumentException", diff --git a/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml b/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml index 5fd7c25f2463f..8d2a50028eda8 100644 --- a/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml +++ b/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml @@ -50,6 +50,11 @@ The read-only landsat-pds repository isn't managed by s3guard + + fs.s3a.bucket.landsat-pds.probe + 0 + Let's postpone existence checks to the first IOoperation + From c156409ca94369b15a27c91f089c569db90333bd Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 12 Feb 2020 20:10:48 +0000 Subject: [PATCH 3/5] HADOOP-16711 Constants javadoc tuning remove the @links to protected methods; add @value Change-Id: I24d6a922cc6d3de48aeb39cd47713430011f41ab --- .../src/main/java/org/apache/hadoop/fs/s3a/Constants.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 086321f9a2265..65efaf8528eef 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -426,13 +426,15 @@ private Constants() { /** * Bucket validation parameter which can be set by client. This will be - * used in {@link S3AFileSystem#initialize(URI, Configuration)} + * used in {@code S3AFileSystem.initialize(URI, Configuration)}. + * Value: {@value} */ public static final String S3A_BUCKET_PROBE = "fs.s3a.bucket.probe"; /** * Default value of bucket validation parameter. An existence of bucket - * will be validated using {@link S3AFileSystem#verifyBucketExistsV2()} + * will be validated using {@code S3AFileSystem.verifyBucketExistsV2()}. + * Value: {@value} */ public static final int S3A_BUCKET_PROBE_DEFAULT = 2; From a1eed6a11acf1c41c3bb56616c5693cffdefdf05 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 13 Feb 2020 13:38:00 +0000 Subject: [PATCH 4/5] HADOOP-16711 cleanup and test fixup Created a new class org.apache.hadoop.fs.s3a.impl.ErrorTranslation; future work related to mapping from AWS exceptions to IOEs&C can go in there rather than S3AUtils. Moved the checks for an AmazonServiceException being caused by a missing bucket to there; this cleans up uses of the probe. Add a unit test for the recognition/translation. Change-Id: If81573b0c379def4bae715e4395f3ac19857c08e --- .../org/apache/hadoop/fs/s3a/Constants.java | 2 - .../apache/hadoop/fs/s3a/S3AFileSystem.java | 13 ++-- .../org/apache/hadoop/fs/s3a/S3AUtils.java | 4 +- .../hadoop/fs/s3a/UnknownStoreException.java | 12 ++-- .../hadoop/fs/s3a/impl/ErrorTranslation.java | 67 +++++++++++++++++++ .../fs/s3a/TestS3AExceptionTranslation.java | 24 +++++++ .../s3guard/AbstractS3GuardToolTestBase.java | 4 +- .../s3a/s3guard/ITestS3GuardToolDynamoDB.java | 3 +- 8 files changed, 105 insertions(+), 24 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 65efaf8528eef..5b423ab3648b7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -20,10 +20,8 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; -import java.net.URI; import java.util.concurrent.TimeUnit; /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 451536c67d984..5ec22945f6510 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -170,6 +170,7 @@ import static org.apache.hadoop.fs.s3a.auth.RolePolicies.allowS3Operations; import static org.apache.hadoop.fs.s3a.auth.delegation.S3ADelegationTokens.TokenIssuingPolicy.NoTokensAvailable; import static org.apache.hadoop.fs.s3a.auth.delegation.S3ADelegationTokens.hasDelegationTokenBinding; +import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404; import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.fixBucketRegion; import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; @@ -2890,9 +2891,7 @@ S3AFileStatus s3GetFileStatus(final Path path, } catch (AmazonServiceException e) { // if the response is a 404 error, it just means that there is // no file at that path...the remaining checks will be needed. - if (e.getStatusCode() != SC_404 - || UnknownStoreException.E_NO_SUCH_BUCKET.equals( - e.getErrorCode())) { + if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) { throw translateException("getFileStatus", path, e); } } catch (AmazonClientException e) { @@ -2924,9 +2923,7 @@ S3AFileStatus s3GetFileStatus(final Path path, meta.getVersionId()); } } catch (AmazonServiceException e) { - if (e.getStatusCode() != SC_404 - || UnknownStoreException.E_NO_SUCH_BUCKET.equals( - e.getErrorCode())) { + if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) { throw translateException("getFileStatus", newKey, e); } } catch (AmazonClientException e) { @@ -2965,9 +2962,7 @@ S3AFileStatus s3GetFileStatus(final Path path, return new S3AFileStatus(Tristate.TRUE, path, username); } } catch (AmazonServiceException e) { - if (e.getStatusCode() != SC_404 - || UnknownStoreException.E_NO_SUCH_BUCKET.equals( - e.getErrorCode())) { + if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) { throw translateException("getFileStatus", path, e); } } catch (AmazonClientException e) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index 9915914887b25..3775848fc8daa 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -86,6 +86,7 @@ import static org.apache.commons.lang3.StringUtils.isEmpty; import static org.apache.hadoop.fs.s3a.Constants.*; +import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket; import static org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteSupport.translateDeleteException; import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; @@ -249,8 +250,7 @@ public static IOException translateException(@Nullable String operation, // the object isn't there case 404: - if (UnknownStoreException.E_NO_SUCH_BUCKET.equals( - ase.getErrorCode())) { + if (isUnknownBucket(ase)) { // this is a missing bucket ioe = new UnknownStoreException(path, ase); } else { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UnknownStoreException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UnknownStoreException.java index 7b5c16ae4e8f8..0129005e0674b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UnknownStoreException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/UnknownStoreException.java @@ -25,20 +25,16 @@ /** * The bucket or other AWS resource is unknown. + * * Why not a subclass of FileNotFoundException? - * There's too much code which caches an FNFE and infers that the file isn't there; - * a missing bucket is far more significant. + * There's too much code which caches an FNFE and infers that the file isn't + * there - a missing bucket is far more significant and generally should + * not be ignored. */ @InterfaceAudience.Public @InterfaceStability.Evolving public class UnknownStoreException extends IOException { - /** - * The AWS S3 error code used to recognize when a 404 means the bucket is - * unknown. - */ - public static final String E_NO_SUCH_BUCKET = "NoSuchBucket"; - /** * Constructor. * @param message message diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java new file mode 100644 index 0000000000000..39738d51ee6f1 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import com.amazonaws.AmazonServiceException; + +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404; + +/** + * Translate from AWS SDK-wrapped exceptions into IOExceptions with + * as much information as possible. + * The core of the translation logic is in S3AUtils, in + * {@code translateException} and nearby; that has grown to be + * a large a complex piece of logic, as it ties in with retry/recovery + * policies, throttling, etc. + * + * This class is where future expansion of that code should go so that we have + * an isolated place for all the changes.. + * The existing code las been left in S3AUtils it is to avoid cherry-picking + * problems on backports. + */ +public class ErrorTranslation { + + /** + * Does this exception indicate that the AWS Bucket was unknown. + * @param e exception. + * @return true if the status code and error code mean that the + * remote bucket is unknown. + */ + public static boolean isUnknownBucket(AmazonServiceException e) { + return e.getStatusCode() == SC_404 + && AwsErrorCodes.E_NO_SUCH_BUCKET.equals(e.getErrorCode()); + } + + /** + * AWS error codes explicitly recognized and processes specially; + * kept in their own class for isolation. + */ + public static final class AwsErrorCodes { + + /** + * The AWS S3 error code used to recognize when a 404 means the bucket is + * unknown. + */ + public static final String E_NO_SUCH_BUCKET = "NoSuchBucket"; + + /** private constructor. */ + private AwsErrorCodes() { + } + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java index 9b8659513004c..60bc1a1b54071 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java @@ -39,6 +39,8 @@ import org.junit.Test; +import org.apache.hadoop.fs.s3a.impl.ErrorTranslation; + import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains; /** @@ -202,4 +204,26 @@ public void testExtractInterruptedIO() throws Throwable { new InterruptedIOException("")))); } + /** + * 404 defaults to FileNotFound. + */ + @Test + public void test404Handling() throws Exception { + verifyTranslated( + FileNotFoundException.class, + createS3Exception(404)); + } + + /** + * 404 + NoSuchBucket == Unknown bucket. + */ + @Test + public void testUnknownBucketException() throws Exception { + AmazonS3Exception ex404 = createS3Exception(404); + ex404.setErrorCode(ErrorTranslation.AwsErrorCodes.E_NO_SUCH_BUCKET); + verifyTranslated( + UnknownStoreException.class, + ex404); + } + } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java index 13d2646317df6..aa74c002d4b3d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java @@ -21,7 +21,6 @@ import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.net.URI; @@ -36,6 +35,7 @@ import java.util.concurrent.TimeUnit; import org.apache.hadoop.fs.s3a.S3AUtils; +import org.apache.hadoop.fs.s3a.UnknownStoreException; import org.apache.hadoop.util.StopWatch; import com.google.common.base.Preconditions; import org.apache.hadoop.fs.FileSystem; @@ -506,7 +506,7 @@ public void testToolsNoBucket() throws Throwable { cmdR.getName(), S3A_THIS_BUCKET_DOES_NOT_EXIST }; - intercept(FileNotFoundException.class, + intercept(UnknownStoreException.class, () -> cmdR.run(argsR)); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java index 915f1cc190c50..ba93927e8dc86 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java @@ -41,6 +41,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.Constants; import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.s3a.UnknownStoreException; import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Destroy; import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Init; import org.apache.hadoop.util.ExitUtil; @@ -319,7 +320,7 @@ public void testCLIFsckWithParamParentOfRoot() throws Exception { @Test public void testCLIFsckFailInitializeFs() throws Exception { - intercept(FileNotFoundException.class, "does not exist", + intercept(UnknownStoreException.class, () -> run(S3GuardTool.Fsck.NAME, "-check", "s3a://this-bucket-does-not-exist-" + UUID.randomUUID())); } From 68c4d6ee36ae914e87ef55edcc81fed063c331bb Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 20 Feb 2020 11:45:57 +0000 Subject: [PATCH 5/5] HADOOP-16711. Final review of changes from stevel Change-Id: I379afa2a10dc7691abb2bd09014fd52a73e3f7f6 --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 16 +++++--- .../site/markdown/tools/hadoop-aws/index.md | 20 ++++++++++ .../markdown/tools/hadoop-aws/performance.md | 26 +++++------- .../tools/hadoop-aws/troubleshooting_s3a.md | 9 +++-- .../fs/s3a/ITestS3ABucketExistence.java | 11 +++++ .../fs/s3a/TestS3AExceptionTranslation.java | 40 ++++++++----------- .../src/test/resources/core-site.xml | 3 +- 7 files changed, 75 insertions(+), 50 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 5ec22945f6510..95a0b6d928a9e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -453,20 +453,23 @@ public void initialize(URI name, Configuration originalConf) /** * Test bucket existence in S3. - * When value of {@link Constants#S3A_BUCKET_PROBE is set to 0 by client, + * When the value of {@link Constants#S3A_BUCKET_PROBE} is set to 0, * bucket existence check is not done to improve performance of * S3AFileSystem initialization. When set to 1 or 2, bucket existence check * will be performed which is potentially slow. - * @throws IOException + * If 3 or higher: warn and use the v2 check. + * @throws UnknownStoreException the bucket is absent + * @throws IOException any other problem talking to S3 */ @Retries.RetryTranslated private void doBucketProbing() throws IOException { - int bucketProbe = this.getConf() + int bucketProbe = getConf() .getInt(S3A_BUCKET_PROBE, S3A_BUCKET_PROBE_DEFAULT); - Preconditions.checkArgument(bucketProbe >= 0 && bucketProbe <= 2, + Preconditions.checkArgument(bucketProbe >= 0, "Value of " + S3A_BUCKET_PROBE + " should be between 0 to 2"); switch (bucketProbe) { case 0: + LOG.debug("skipping check for bucket existence"); break; case 1: verifyBucketExists(); @@ -475,7 +478,10 @@ private void doBucketProbing() throws IOException { verifyBucketExistsV2(); break; default: - //This will never get executed because of above Precondition check. + // we have no idea what this is, assume it is from a later release. + LOG.warn("Unknown bucket probe option {}: {}; falling back to check #2", + S3A_BUCKET_PROBE, bucketProbe); + verifyBucketExistsV2(); break; } } diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 9697e7ac40f1f..01d862953d894 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -1000,6 +1000,26 @@ options are covered in [Testing](./testing.md). converged to Integer.MAX_VALUE milliseconds + + + fs.s3a.bucket.probe + 2 + + The value can be 0, 1 or 2 (default). + When set to 0, bucket existence checks won't be done + during initialization thus making it faster. + Though it should be noted that when the bucket is not available in S3, + or if fs.s3a.endpoint points to the wrong instance of a private S3 store + consecutive calls like listing, read, write etc. will all fail with + an UnknownStoreException. + When set to 1, the bucket existence check will be done using the + V1 API of the S3 protocol which doesn't verify the client's permissions + to list or read data in the bucket. + When set to 2, the bucket existence check will be done using the + V2 API of the S3 protocol -which does verify that the + client has permission to to read the bucket. + + ``` ## Retry and Recovery diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md index 1dba0480c67c3..6ca6060810682 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md @@ -609,26 +609,18 @@ with HADOOP-15669. Other options may be added to `fs.s3a.ssl.channel.mode` in the future as further SSL optimizations are made. -## Tuning S3AFileSystem Initialization. -Any client using S3AFileSystem has to initialize it by providing a S3 bucket -and configuration. The init method checks if the bucket provided is valid -or not which is a slow operation leading poor performance. We can ignore -bucket validation by configuring `fs.s3a.bucket.probe` as follows: +## Tuning FileSystem Initialization. + +When an S3A Filesystem instance is created and initialized, the client +checks if the bucket provided is valid. This can be slow. +You can ignore bucket validation by configuring `fs.s3a.bucket.probe` as follows: ```xml fs.s3a.bucket.probe 0 - - The value can be 0, 1 or 2(default). When set to 0, bucket existence - check won't be done during initialization thus making it faster. - Though it should be noted that if bucket is not available in S3, - consecutive calls like listing, put etc might fail with - FileNotFoundException. When set to 1, bucket existence check will - be done using V1 api of S3 client which doesn't verify the permissions - to read bucket. When set to 2, bucket existence check will - be done using V2 api of S3 client which doesn't verify the permissions - to read bucket. - -``` \ No newline at end of file +``` + +Note: if the bucket does not exist, this issue will surface when operations are performed +on the filesystem; you will see `UnknownStoreException` stack traces. diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md index c3eb424c838bb..47bc81e0ec4b3 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md @@ -1220,11 +1220,12 @@ org.apache.hadoop.fs.s3a.UnknownStoreException: Check the URI is correct, and that the bucket actually exists. If using a third-party store, verify that you've configured -the client to talk to the specific server in `fs.s3a.endpoint`. Forgetting to update this value and -asking the AWS S3 endpoint for a bucket is not an unusual occurrence. +the client to talk to the specific server in `fs.s3a.endpoint`. +Forgetting to update this value and asking the AWS S3 endpoint +for a bucket is not an unusual occurrence. -This can surface during IO if the bucket has been deleted, or the startup check for bucket existence -has been disabled by setting `fs.s3a.bucket.probe` to 0. +This can surface during filesystem API calls if the bucket is deleted while you are using it, + -or the startup check for bucket existence has been disabled by setting `fs.s3a.bucket.probe` to 0. ``` org.apache.hadoop.fs.s3a.UnknownStoreException: s3a://random-bucket-7d9217b0-b426-4344-82ea-25d6cbb316f1/ diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java index d64c41e3fdc84..bde0a6449106e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java @@ -68,6 +68,7 @@ public void testNoBucketProbing() throws Exception { () -> fs.listStatus(root)); Path src = new Path(root, "testfile"); + Path dest = new Path(root, "dst"); expectUnknownStore( () -> fs.getFileStatus(src)); @@ -77,6 +78,7 @@ public void testNoBucketProbing() throws Exception { expectUnknownStore(() -> fs.isDirectory(src)); expectUnknownStore(() -> fs.mkdirs(src)); expectUnknownStore(() -> fs.delete(src)); + expectUnknownStore(() -> fs.rename(src, dest)); byte[] data = dataset(1024, 'a', 'z'); expectUnknownStore( @@ -106,6 +108,12 @@ public static void expectUnknownStore( intercept(UnknownStoreException.class, eval); } + /** + * Create a new configuration with the given bucket probe; + * we also disable FS caching. + * @param probe value to use as the bucket probe. + * @return a configuration. + */ private Configuration createConfigurationWithProbe(final int probe) { Configuration conf = new Configuration(getFileSystem().getConf()); S3ATestUtils.disableFilesystemCaching(conf); @@ -115,6 +123,7 @@ private Configuration createConfigurationWithProbe(final int probe) { @Test public void testBucketProbingV1() throws Exception { + describe("Test the V1 bucket probe"); Configuration configuration = createConfigurationWithProbe(1); expectUnknownStore( () -> FileSystem.get(uri, configuration)); @@ -122,6 +131,7 @@ public void testBucketProbingV1() throws Exception { @Test public void testBucketProbingV2() throws Exception { + describe("Test the V2 bucket probe"); Configuration configuration = createConfigurationWithProbe(2); expectUnknownStore( () -> FileSystem.get(uri, configuration)); @@ -129,6 +139,7 @@ public void testBucketProbingV2() throws Exception { @Test public void testBucketProbingParameterValidation() throws Exception { + // TODO; update Configuration configuration = createConfigurationWithProbe(3); intercept(IllegalArgumentException.class, "Value of " + S3A_BUCKET_PROBE + " should be between 0 to 2", diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java index 60bc1a1b54071..95bd7c21b85a1 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java @@ -21,6 +21,7 @@ import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; import static org.apache.hadoop.fs.s3a.S3AUtils.*; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404; import static org.junit.Assert.*; import java.io.EOFException; @@ -100,9 +101,24 @@ public void test403isNotPermittedFound() throws Exception { verifyTranslated(403, AccessDeniedException.class); } + /** + * 404 defaults to FileNotFound. + */ @Test public void test404isNotFound() throws Exception { - verifyTranslated(404, FileNotFoundException.class); + verifyTranslated(SC_404, FileNotFoundException.class); + } + + /** + * 404 + NoSuchBucket == Unknown bucket. + */ + @Test + public void testUnknownBucketException() throws Exception { + AmazonS3Exception ex404 = createS3Exception(SC_404); + ex404.setErrorCode(ErrorTranslation.AwsErrorCodes.E_NO_SUCH_BUCKET); + verifyTranslated( + UnknownStoreException.class, + ex404); } @Test @@ -204,26 +220,4 @@ public void testExtractInterruptedIO() throws Throwable { new InterruptedIOException("")))); } - /** - * 404 defaults to FileNotFound. - */ - @Test - public void test404Handling() throws Exception { - verifyTranslated( - FileNotFoundException.class, - createS3Exception(404)); - } - - /** - * 404 + NoSuchBucket == Unknown bucket. - */ - @Test - public void testUnknownBucketException() throws Exception { - AmazonS3Exception ex404 = createS3Exception(404); - ex404.setErrorCode(ErrorTranslation.AwsErrorCodes.E_NO_SUCH_BUCKET); - verifyTranslated( - UnknownStoreException.class, - ex404); - } - } diff --git a/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml b/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml index 8d2a50028eda8..a90edbe24fc4a 100644 --- a/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml +++ b/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml @@ -50,10 +50,11 @@ The read-only landsat-pds repository isn't managed by s3guard + fs.s3a.bucket.landsat-pds.probe 0 - Let's postpone existence checks to the first IOoperation + Let's postpone existence checks to the first IO operation