Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,20 @@ private Constants() {
"fs.s3a.metadatastore.authoritative";
public static final boolean DEFAULT_METADATASTORE_AUTHORITATIVE = false;

/**
* Bucket validation parameter which can be set by client. This will be
* used in {@code S3AFileSystem.initialize(URI, Configuration)}.
* Value: {@value}
*/
public static final String S3A_BUCKET_PROBE = "fs.s3a.bucket.probe";

/**
* Default value of bucket validation parameter. An existence of bucket
* will be validated using {@code S3AFileSystem.verifyBucketExistsV2()}.
* Value: {@value}
*/
public static final int S3A_BUCKET_PROBE_DEFAULT = 2;

/**
* How long a directory listing in the MS is considered as authoritative.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@
import static org.apache.hadoop.fs.s3a.auth.delegation.S3ADelegationTokens.hasDelegationTokenBinding;
import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.submit;
import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.waitForCompletionIgnoringExceptions;
import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404;
import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.fixBucketRegion;
import static org.apache.hadoop.io.IOUtils.cleanupWithLogger;
Expand Down Expand Up @@ -392,9 +393,7 @@ public void initialize(URI name, Configuration originalConf)
initCannedAcls(conf);

// This initiates a probe against S3 for the bucket existing.
// It is where all network and authentication configuration issues
// surface, and is potentially slow.
verifyBucketExists();
doBucketProbing();

inputPolicy = S3AInputPolicy.getPolicy(
conf.getTrimmed(INPUT_FADVISE, INPUT_FADV_NORMAL));
Expand Down Expand Up @@ -463,6 +462,41 @@ public void initialize(URI name, Configuration originalConf)

}

/**
* Test bucket existence in S3.
* When the value of {@link Constants#S3A_BUCKET_PROBE} is set to 0,
* bucket existence check is not done to improve performance of
* S3AFileSystem initialization. When set to 1 or 2, bucket existence check
* will be performed which is potentially slow.
* If 3 or higher: warn and use the v2 check.
* @throws UnknownStoreException the bucket is absent
* @throws IOException any other problem talking to S3
*/
@Retries.RetryTranslated
private void doBucketProbing() throws IOException {
int bucketProbe = getConf()
.getInt(S3A_BUCKET_PROBE, S3A_BUCKET_PROBE_DEFAULT);
Preconditions.checkArgument(bucketProbe >= 0,
"Value of " + S3A_BUCKET_PROBE + " should be >= 0");
switch (bucketProbe) {
case 0:
LOG.debug("skipping check for bucket existence");
break;
case 1:
verifyBucketExists();
break;
case 2:
verifyBucketExistsV2();
break;
default:
// we have no idea what this is, assume it is from a later release.
LOG.warn("Unknown bucket probe option {}: {}; falling back to check #2",
S3A_BUCKET_PROBE, bucketProbe);
verifyBucketExistsV2();
break;
}
}

/**
* Initialize the thread pool.
* This must be re-invoked after replacing the S3Client during test
Expand Down Expand Up @@ -510,15 +544,31 @@ protected static S3AStorageStatistics createStorageStatistics() {
* Verify that the bucket exists. This does not check permissions,
* not even read access.
* Retry policy: retrying, translated.
* @throws FileNotFoundException the bucket is absent
* @throws UnknownStoreException the bucket is absent
* @throws IOException any other problem talking to S3
*/
@Retries.RetryTranslated
protected void verifyBucketExists()
throws FileNotFoundException, IOException {
throws UnknownStoreException, IOException {
if (!invoker.retry("doesBucketExist", bucket, true,
() -> s3.doesBucketExist(bucket))) {
throw new FileNotFoundException("Bucket " + bucket + " does not exist");
throw new UnknownStoreException("Bucket " + bucket + " does not exist");
}
}

/**
* Verify that the bucket exists. This will correctly throw an exception
* when credentials are invalid.
* Retry policy: retrying, translated.
* @throws UnknownStoreException the bucket is absent
* @throws IOException any other problem talking to S3
*/
@Retries.RetryTranslated
protected void verifyBucketExistsV2()
throws UnknownStoreException, IOException {
if (!invoker.retry("doesBucketExistV2", bucket, true,
() -> s3.doesBucketExistV2(bucket))) {
throw new UnknownStoreException("Bucket " + bucket + " does not exist");
}
}

Expand Down Expand Up @@ -2891,7 +2941,7 @@ S3AFileStatus s3GetFileStatus(final Path path,
} catch (AmazonServiceException e) {
// if the response is a 404 error, it just means that there is
// no file at that path...the remaining checks will be needed.
if (e.getStatusCode() != SC_404) {
if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) {
throw translateException("getFileStatus", path, e);
}
} catch (AmazonClientException e) {
Expand Down Expand Up @@ -2923,7 +2973,7 @@ S3AFileStatus s3GetFileStatus(final Path path,
meta.getVersionId());
}
} catch (AmazonServiceException e) {
if (e.getStatusCode() != SC_404) {
if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) {
throw translateException("getFileStatus", newKey, e);
}
} catch (AmazonClientException e) {
Expand Down Expand Up @@ -2962,7 +3012,7 @@ S3AFileStatus s3GetFileStatus(final Path path,
return new S3AFileStatus(Tristate.TRUE, path, username);
}
} catch (AmazonServiceException e) {
if (e.getStatusCode() != SC_404) {
if (e.getStatusCode() != SC_404 || isUnknownBucket(e)) {
throw translateException("getFileStatus", path, e);
}
} catch (AmazonClientException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ protected Map<Class<? extends Exception>, RetryPolicy> createExceptionMap() {
policyMap.put(AccessDeniedException.class, fail);
policyMap.put(NoAuthWithAWSException.class, fail);
policyMap.put(FileNotFoundException.class, fail);
policyMap.put(UnknownStoreException.class, fail);
policyMap.put(InvalidRequestException.class, fail);

// metadata stores should do retries internally when it makes sense
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@

import static org.apache.commons.lang3.StringUtils.isEmpty;
import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket;
import static org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteSupport.translateDeleteException;
import static org.apache.hadoop.io.IOUtils.cleanupWithLogger;

Expand Down Expand Up @@ -249,6 +250,18 @@ public static IOException translateException(@Nullable String operation,

// the object isn't there
case 404:
if (isUnknownBucket(ase)) {
// this is a missing bucket
ioe = new UnknownStoreException(path, ase);
} else {
// a normal unknown object
ioe = new FileNotFoundException(message);
ioe.initCause(ase);
}
break;

// this also surfaces sometimes and is considered to
// be ~ a not found exception.
case 410:
ioe = new FileNotFoundException(message);
ioe.initCause(ase);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.fs.s3a;

import java.io.IOException;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;

/**
* The bucket or other AWS resource is unknown.
*
* Why not a subclass of FileNotFoundException?
* There's too much code which caches an FNFE and infers that the file isn't
* there - a missing bucket is far more significant and generally should
* not be ignored.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class UnknownStoreException extends IOException {

/**
* Constructor.
* @param message message
*/
public UnknownStoreException(final String message) {
this(message, null);
}

/**
* Constructor.
* @param message message
* @param cause cause (may be null)
*/
public UnknownStoreException(final String message, Throwable cause) {
super(message);
if (cause != null) {
initCause(cause);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.fs.s3a.impl;

import com.amazonaws.AmazonServiceException;

import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404;

/**
* Translate from AWS SDK-wrapped exceptions into IOExceptions with
* as much information as possible.
* The core of the translation logic is in S3AUtils, in
* {@code translateException} and nearby; that has grown to be
* a large a complex piece of logic, as it ties in with retry/recovery
* policies, throttling, etc.
*
* This class is where future expansion of that code should go so that we have
* an isolated place for all the changes..
* The existing code las been left in S3AUtils it is to avoid cherry-picking
* problems on backports.
*/
public class ErrorTranslation {

/**
* Private constructor for utility class.
*/
private ErrorTranslation() {
}

/**
* Does this exception indicate that the AWS Bucket was unknown.
* @param e exception.
* @return true if the status code and error code mean that the
* remote bucket is unknown.
*/
public static boolean isUnknownBucket(AmazonServiceException e) {
return e.getStatusCode() == SC_404
&& AwsErrorCodes.E_NO_SUCH_BUCKET.equals(e.getErrorCode());
}

/**
* AWS error codes explicitly recognized and processes specially;
* kept in their own class for isolation.
*/
public static final class AwsErrorCodes {

/**
* The AWS S3 error code used to recognize when a 404 means the bucket is
* unknown.
*/
public static final String E_NO_SUCH_BUCKET = "NoSuchBucket";

/** private constructor. */
private AwsErrorCodes() {
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,26 @@ options are covered in [Testing](./testing.md).
converged to Integer.MAX_VALUE milliseconds
</description>
</property>

<property>
<name>fs.s3a.bucket.probe</name>
<value>2</value>
<description>
The value can be 0, 1 or 2 (default).
When set to 0, bucket existence checks won't be done
during initialization thus making it faster.
Though it should be noted that when the bucket is not available in S3,
or if fs.s3a.endpoint points to the wrong instance of a private S3 store
consecutive calls like listing, read, write etc. will fail with
an UnknownStoreException.
When set to 1, the bucket existence check will be done using the
V1 API of the S3 protocol which doesn't verify the client's permissions
to list or read data in the bucket.
When set to 2, the bucket existence check will be done using the
V2 API of the S3 protocol which does verify that the
client has permission to read the bucket.
</description>
</property>
```

## <a name="retry_and_recovery"></a>Retry and Recovery
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -608,3 +608,19 @@ with HADOOP-15669.

Other options may be added to `fs.s3a.ssl.channel.mode` in the future as
further SSL optimizations are made.

## Tuning FileSystem Initialization.

When an S3A Filesystem instance is created and initialized, the client
checks if the bucket provided is valid. This can be slow.
You can ignore bucket validation by configuring `fs.s3a.bucket.probe` as follows:

```xml
<property>
<name>fs.s3a.bucket.probe</name>
<value>0</value>
</property>
```

Note: if the bucket does not exist, this issue will surface when operations are performed
on the filesystem; you will see `UnknownStoreException` stack traces.
Loading