diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 764a6adaca27d..d1d0b48115d33 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1203,4 +1203,18 @@ private Constants() { * Default maximum read size in bytes during vectored reads : {@value}. */ public static final int DEFAULT_AWS_S3_VECTOR_READS_MAX_MERGED_READ_SIZE = 1253376; //1M + + /** + * Flag for immediate failure when observing a {@link AWSBadRequestException}. + * If it's disabled and set to false, the failure is treated as retryable. + * Value {@value}. + */ + public static final String FAIL_ON_AWS_BAD_REQUEST = "fs.s3a.fail.on.aws.bad.request"; + + /** + * Default value for immediate failure when observing a + * {@link AWSBadRequestException}: {@value}. + */ + public static final boolean DEFAULT_FAIL_ON_AWS_BAD_REQUEST = true; + } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java index 30427f7672a43..0d0ee47b58cfb 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java @@ -214,7 +214,10 @@ protected Map, RetryPolicy> createExceptionMap() { // policy on a 400/bad request still ambiguous. // Treated as an immediate failure - policyMap.put(AWSBadRequestException.class, fail); + RetryPolicy awsBadRequestExceptionRetryPolicy = + configuration.getBoolean(FAIL_ON_AWS_BAD_REQUEST, DEFAULT_FAIL_ON_AWS_BAD_REQUEST) ? + fail : retryIdempotentCalls; + policyMap.put(AWSBadRequestException.class, awsBadRequestExceptionRetryPolicy); // Status 500 error code is also treated as a connectivity problem policyMap.put(AWSStatus500Exception.class, connectivityFailure); diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 7c0a49f8fbeda..c78f6b4f2fea7 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -1118,12 +1118,15 @@ from them. * Connection timeout: `ConnectTimeoutException`. Timeout before setting up a connection to the S3 endpoint (or proxy). -* HTTP response status code 400, "Bad Request" +* HTTP response status code 400, "Bad Request" aka `AWSBadRequestException` The status code 400, Bad Request usually means that the request is unrecoverable; it's the generic "No" response. Very rarely it does recover, which is why it is in this category, rather than that -of unrecoverable failures. +of unrecoverable failures. The default behavior fails immediately +without retry. If your system is failure sensitive, you can +configure `fs.s3a.fail.on.aws.bad.request` to `false` and allow +to retry when observing a Bad Request with status code 400. These failures will be retried with an exponential sleep interval set in `fs.s3a.retry.interval`, up to the limit set in `fs.s3a.retry.limit`. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestInvoker.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestInvoker.java index 35199f4092790..1e5e8a02e0ecc 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestInvoker.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestInvoker.java @@ -311,12 +311,25 @@ public void testRetryAWSConnectivity() throws Throwable { */ @Test(expected = AWSBadRequestException.class) public void testRetryBadRequestNotIdempotent() throws Throwable { - invoker.retry("test", null, false, + + invoker.retry("test", null, true, () -> { throw BAD_REQUEST; }); } + @Test + public void testRetryBadRequestIdempotent() throws Throwable { + Configuration conf = new Configuration(FAST_RETRY_CONF); + conf.setBoolean(FAIL_ON_AWS_BAD_REQUEST, false); + S3ARetryPolicy retryPolicy = new S3ARetryPolicy(conf); + + IOException ex = translateException("GET", "/", BAD_REQUEST); + assertRetryAction("Expected retry on aws bad request", + retryPolicy, RetryPolicy.RetryAction.RETRY, + ex, 1, true); + } + @Test public void testConnectionRetryPolicyIdempotent() throws Throwable { assertRetryAction("Expected retry on connection timeout",