From 1073428546e46abc3b9d91bb3657129024be4def Mon Sep 17 00:00:00 2001 From: Adnan Hemani Date: Thu, 8 Feb 2024 22:47:31 -0800 Subject: [PATCH 1/5] HADOOP-19050. Add S3 Access Grants Support in S3A --- .../org/apache/hadoop/fs/s3a/Constants.java | 17 +++ .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 25 ++++ .../site/markdown/tools/hadoop-aws/index.md | 32 ++++++ .../s3a/TestS3AccessGrantConfiguration.java | 108 ++++++++++++++++++ 4 files changed, 182 insertions(+) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 96dc2be6a260d..48a3d1e3e1b72 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1624,4 +1624,21 @@ private Constants() { * Value: {@value}. */ public static final boolean DEFAULT_AWS_S3_CLASSLOADER_ISOLATION = true; + + /** + * Flag {@value} + * to enable S3 Access Grants to control authorization to S3 data. More information: + * https://aws.amazon.com/s3/features/access-grants/ + * and + * https://github.com/aws/aws-s3-accessgrants-plugin-java-v2/ + */ + public static final String AWS_S3_ACCESS_GRANTS_ENABLED = "fs.s3a.s3accessgrants.enabled"; + + /** + * Flag {@value} to enable jobs fall back to the Job Execution IAM role in + * case they get Access Denied from the S3 Access Grants call. More information: + * https://github.com/aws/aws-s3-accessgrants-plugin-java-v2/ + */ + public static final String AWS_S3_ACCESS_GRANTS_FALLBACK_TO_IAM_ENABLED = + "fs.s3a.s3accessgrants.fallback.to.iam"; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 7f6978e8e9284..65fdc5fc6f0a8 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -37,6 +37,7 @@ import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; import software.amazon.awssdk.identity.spi.AwsCredentialsIdentity; import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.s3accessgrants.plugin.S3AccessGrantsPlugin; import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.S3BaseClientBuilder; import software.amazon.awssdk.services.s3.S3Client; @@ -53,6 +54,8 @@ import org.apache.hadoop.fs.store.LogExactlyOnce; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; +import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESS_GRANTS_ENABLED; +import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESS_GRANTS_FALLBACK_TO_IAM_ENABLED; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_DEFAULT_REGION; import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT; @@ -94,6 +97,11 @@ public class DefaultS3ClientFactory extends Configured private static final LogExactlyOnce WARN_OF_DEFAULT_REGION_CHAIN = new LogExactlyOnce(LOG); + /** + * A one-off logger of S3 Access Grants usage and attachment status to the S3 client. + */ + private static final LogExactlyOnce LOG_S3AG_ENABLED = new LogExactlyOnce(LOG); + /** * Warning message printed when the SDK Region chain is in use. */ @@ -178,6 +186,8 @@ private , ClientT> Build configureEndpointAndRegion(builder, parameters, conf); + applyS3AccessGrantsConfigurations(builder, conf); + S3Configuration serviceConfiguration = S3Configuration.builder() .pathStyleAccessEnabled(parameters.isPathStyleAccess()) .checksumValidationEnabled(parameters.isChecksumValidationEnabled()) @@ -402,4 +412,19 @@ private static Region getS3RegionFromEndpoint(final String endpoint, return Region.of(AWS_S3_DEFAULT_REGION); } + private static , ClientT> void + applyS3AccessGrantsConfigurations(BuilderT builder, Configuration conf) { + if (!conf.getBoolean(AWS_S3_ACCESS_GRANTS_ENABLED, false)){ + LOG_S3AG_ENABLED.debug("S3 Access Grants plugin is not enabled."); + return; + } + + LOG_S3AG_ENABLED.info("S3 Access Grants plugin is enabled."); + boolean isFallbackEnabled = conf.getBoolean(AWS_S3_ACCESS_GRANTS_FALLBACK_TO_IAM_ENABLED, false); + S3AccessGrantsPlugin accessGrantsPlugin = + S3AccessGrantsPlugin.builder().enableFallback(isFallbackEnabled).build(); + builder.addPlugin(accessGrantsPlugin); + LOG_S3AG_ENABLED.info("S3 Access Grants plugin is added to S3 client with fallback: {}", isFallbackEnabled); + } + } diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index d601e21bbed69..bb60658432b3d 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -614,6 +614,38 @@ If the following property is not set or set to `true`, the following exception w java.io.IOException: From option fs.s3a.aws.credentials.provider java.lang.ClassNotFoundException: Class CustomCredentialsProvider not found ``` +## S3 Authorization Using S3 Access Grants + +[S3 Access Grants](https://aws.amazon.com/s3/features/access-grants/) can be used to grant accesses to S3 data using IAM Principals. +In order to enable S3 Access Grants to work with S3A, we enable the +[S3 Access Grants plugin](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2) on all S3 clients, +which is found within the AWS Java SDK bundle (v2.23.19+). + +We support both cross-region access (by default) and the +[fallback-to-IAM configuration](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2?tab=readme-ov-file#using-the-plugin) +which allows you to fallback to using your IAM role (and its permission sets directly) to access your S3 data in the case that S3 Access Grants +is unable to authorize your S3 call. + +The following is how you can enable this feature: + +```xml + + fs.s3a.s3accessgrants.enabled + true + + + + fs.s3a.s3accessgrants.fallback.to.iam + true + +``` + +Please note that we only enable the [S3 Access Grants plugin](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2) on the S3 clients +as part of this feature. Any usage issues or bug reporting should be done directly at the plugin's +[GitHub repo](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2/issues). + +For more details on using S3 Access Grants, please refer to +[Managing access with S3 Access Grants](https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-grants.html). ## Storing secrets with Hadoop Credential Providers diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java new file mode 100644 index 0000000000000..edaf2486f78a8 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.test.AbstractHadoopTestBase; +import org.junit.Assert; +import org.junit.Test; + +import software.amazon.awssdk.awscore.AwsClient; +import software.amazon.awssdk.s3accessgrants.plugin.S3AccessGrantsIdentityProvider; +import software.amazon.awssdk.services.s3.S3BaseClientBuilder; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESS_GRANTS_ENABLED; + + +/** + * Test S3 Access Grants configurations. + */ +public class TestS3AccessGrantConfiguration extends AbstractHadoopTestBase { + /** + * This credential provider will be attached to any client + * that has been configured with the S3 Access Grants plugin. + * {@link software.amazon.awssdk.s3accessgrants.plugin.S3AccessGrantsPlugin}. + */ + public static final String S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS = + S3AccessGrantsIdentityProvider.class.getName(); + + @Test + public void testS3AccessGrantsEnabled() throws IOException, URISyntaxException { + // Feature is explicitly enabled + AwsClient s3AsyncClient = getAwsClient(createConfig(true), true); + Assert.assertEquals( + S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, + getCredentialProviderName(s3AsyncClient)); + + AwsClient s3Client = getAwsClient(createConfig(true), false); + Assert.assertEquals( + S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, + getCredentialProviderName(s3Client)); + } + + @Test + public void testS3AccessGrantsDisabled() throws IOException, URISyntaxException { + // Disabled by default + AwsClient s3AsyncDefaultClient = getAwsClient(new Configuration(), true); + Assert.assertNotEquals( + S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, + getCredentialProviderName(s3AsyncDefaultClient)); + + AwsClient s3DefaultClient = getAwsClient(new Configuration(), true); + Assert.assertNotEquals( + S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, + getCredentialProviderName(s3DefaultClient)); + + // Disabled if explicitly set + AwsClient s3AsyncExplicitlyDisabledClient = getAwsClient(createConfig(false), true); + Assert.assertNotEquals( + S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, + getCredentialProviderName(s3AsyncExplicitlyDisabledClient)); + + AwsClient s3ExplicitlyDisabledClient = getAwsClient(createConfig(false), true); + Assert.assertNotEquals( + S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, + getCredentialProviderName(s3ExplicitlyDisabledClient)); + } + + private Configuration createConfig(boolean s3agEnabled) { + Configuration conf = new Configuration(); + conf.setBoolean(AWS_S3_ACCESS_GRANTS_ENABLED, s3agEnabled); + return conf; + } + + private String getCredentialProviderName(AwsClient awsClient) { + return awsClient.serviceClientConfiguration().credentialsProvider().getClass().getName(); + } + + private , ClientT> AwsClient + getAwsClient(Configuration conf, boolean asyncClient) throws IOException, URISyntaxException { + DefaultS3ClientFactory factory = new DefaultS3ClientFactory(); + factory.setConf(conf); + S3ClientFactory.S3ClientCreationParameters parameters = + new S3ClientFactory.S3ClientCreationParameters(); + URI uri = new URI("any-uri"); + return asyncClient ? + factory.createS3AsyncClient(uri, parameters): factory.createS3Client(uri, parameters); + } +} \ No newline at end of file From 369d0d828afe381ab6d7a7a423da043da0d2e66a Mon Sep 17 00:00:00 2001 From: Adnan Hemani Date: Thu, 8 Feb 2024 22:51:47 -0800 Subject: [PATCH 2/5] add empty line at bottom of file --- .../apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java index edaf2486f78a8..c800a97ae5f28 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java @@ -105,4 +105,4 @@ private String getCredentialProviderName(AwsClient awsClient) { return asyncClient ? factory.createS3AsyncClient(uri, parameters): factory.createS3Client(uri, parameters); } -} \ No newline at end of file +} From 5ff42de6cb06200a75a06af444489c2755b57a8c Mon Sep 17 00:00:00 2001 From: Adnan Hemani Date: Fri, 9 Feb 2024 14:48:55 -0800 Subject: [PATCH 3/5] PR Revision 1 --- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 9 +- .../site/markdown/tools/hadoop-aws/index.md | 20 +-- .../s3a/TestS3AccessGrantConfiguration.java | 135 +++++++++--------- 3 files changed, 82 insertions(+), 82 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 65fdc5fc6f0a8..f253d1ce40194 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -419,12 +419,13 @@ private static Region getS3RegionFromEndpoint(final String endpoint, return; } - LOG_S3AG_ENABLED.info("S3 Access Grants plugin is enabled."); - boolean isFallbackEnabled = conf.getBoolean(AWS_S3_ACCESS_GRANTS_FALLBACK_TO_IAM_ENABLED, false); + boolean isFallbackEnabled = + conf.getBoolean(AWS_S3_ACCESS_GRANTS_FALLBACK_TO_IAM_ENABLED, false); S3AccessGrantsPlugin accessGrantsPlugin = - S3AccessGrantsPlugin.builder().enableFallback(isFallbackEnabled).build(); + S3AccessGrantsPlugin.builder().enableFallback(isFallbackEnabled).build(); builder.addPlugin(accessGrantsPlugin); - LOG_S3AG_ENABLED.info("S3 Access Grants plugin is added to S3 client with fallback: {}", isFallbackEnabled); + LOG_S3AG_ENABLED.info( + "S3 Access Grants plugin is enabled with IAM fallback set to {}", isFallbackEnabled); } } diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index bb60658432b3d..2b50516f3a2aa 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -617,16 +617,16 @@ java.io.IOException: From option fs.s3a.aws.credentials.provider java.lang.Class ## S3 Authorization Using S3 Access Grants [S3 Access Grants](https://aws.amazon.com/s3/features/access-grants/) can be used to grant accesses to S3 data using IAM Principals. -In order to enable S3 Access Grants to work with S3A, we enable the -[S3 Access Grants plugin](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2) on all S3 clients, +In order to enable S3 Access Grants, S3A utilizes the +[S3 Access Grants plugin](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2) on all S3 clients, which is found within the AWS Java SDK bundle (v2.23.19+). -We support both cross-region access (by default) and the -[fallback-to-IAM configuration](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2?tab=readme-ov-file#using-the-plugin) -which allows you to fallback to using your IAM role (and its permission sets directly) to access your S3 data in the case that S3 Access Grants -is unable to authorize your S3 call. +S3A supports both cross-region access (by default) and the +[fallback-to-IAM configuration](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2?tab=readme-ov-file#using-the-plugin) +which allows S3A to fallback to using the IAM role (and its permission sets directly) to access S3 data in the case that S3 Access Grants +is unable to authorize the S3 call. -The following is how you can enable this feature: +The following is how this feature can be enabled: ```xml @@ -640,11 +640,11 @@ The following is how you can enable this feature: ``` -Please note that we only enable the [S3 Access Grants plugin](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2) on the S3 clients -as part of this feature. Any usage issues or bug reporting should be done directly at the plugin's +Please note that S3A only enables the [S3 Access Grants plugin](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2) on the S3 clients +as part of this feature. Any usage issues or bug reporting should be done directly at the plugin's [GitHub repo](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2/issues). -For more details on using S3 Access Grants, please refer to +For more details on using S3 Access Grants, please refer to [Managing access with S3 Access Grants](https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-grants.html). ## Storing secrets with Hadoop Credential Providers diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java index c800a97ae5f28..ccc8438858cbb 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java @@ -25,7 +25,6 @@ import software.amazon.awssdk.awscore.AwsClient; import software.amazon.awssdk.s3accessgrants.plugin.S3AccessGrantsIdentityProvider; -import software.amazon.awssdk.services.s3.S3BaseClientBuilder; import java.io.IOException; import java.net.URI; @@ -38,71 +37,71 @@ * Test S3 Access Grants configurations. */ public class TestS3AccessGrantConfiguration extends AbstractHadoopTestBase { - /** - * This credential provider will be attached to any client - * that has been configured with the S3 Access Grants plugin. - * {@link software.amazon.awssdk.s3accessgrants.plugin.S3AccessGrantsPlugin}. - */ - public static final String S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS = - S3AccessGrantsIdentityProvider.class.getName(); - - @Test - public void testS3AccessGrantsEnabled() throws IOException, URISyntaxException { - // Feature is explicitly enabled - AwsClient s3AsyncClient = getAwsClient(createConfig(true), true); - Assert.assertEquals( - S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, - getCredentialProviderName(s3AsyncClient)); - - AwsClient s3Client = getAwsClient(createConfig(true), false); - Assert.assertEquals( - S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, - getCredentialProviderName(s3Client)); - } - - @Test - public void testS3AccessGrantsDisabled() throws IOException, URISyntaxException { - // Disabled by default - AwsClient s3AsyncDefaultClient = getAwsClient(new Configuration(), true); - Assert.assertNotEquals( - S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, - getCredentialProviderName(s3AsyncDefaultClient)); - - AwsClient s3DefaultClient = getAwsClient(new Configuration(), true); - Assert.assertNotEquals( - S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, - getCredentialProviderName(s3DefaultClient)); - - // Disabled if explicitly set - AwsClient s3AsyncExplicitlyDisabledClient = getAwsClient(createConfig(false), true); - Assert.assertNotEquals( - S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, - getCredentialProviderName(s3AsyncExplicitlyDisabledClient)); - - AwsClient s3ExplicitlyDisabledClient = getAwsClient(createConfig(false), true); - Assert.assertNotEquals( - S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, - getCredentialProviderName(s3ExplicitlyDisabledClient)); - } - - private Configuration createConfig(boolean s3agEnabled) { - Configuration conf = new Configuration(); - conf.setBoolean(AWS_S3_ACCESS_GRANTS_ENABLED, s3agEnabled); - return conf; - } - - private String getCredentialProviderName(AwsClient awsClient) { - return awsClient.serviceClientConfiguration().credentialsProvider().getClass().getName(); - } - - private , ClientT> AwsClient - getAwsClient(Configuration conf, boolean asyncClient) throws IOException, URISyntaxException { - DefaultS3ClientFactory factory = new DefaultS3ClientFactory(); - factory.setConf(conf); - S3ClientFactory.S3ClientCreationParameters parameters = - new S3ClientFactory.S3ClientCreationParameters(); - URI uri = new URI("any-uri"); - return asyncClient ? - factory.createS3AsyncClient(uri, parameters): factory.createS3Client(uri, parameters); - } + /** + * This credential provider will be attached to any client + * that has been configured with the S3 Access Grants plugin. + * {@link software.amazon.awssdk.s3accessgrants.plugin.S3AccessGrantsPlugin}. + */ + public static final String S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS = + S3AccessGrantsIdentityProvider.class.getName(); + + @Test + public void testS3AccessGrantsEnabled() throws IOException, URISyntaxException { + // Feature is explicitly enabled + AwsClient s3AsyncClient = getAwsClient(createConfig(true), true); + Assert.assertEquals( + S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, + getCredentialProviderName(s3AsyncClient)); + + AwsClient s3Client = getAwsClient(createConfig(true), false); + Assert.assertEquals( + S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, + getCredentialProviderName(s3Client)); + } + + @Test + public void testS3AccessGrantsDisabled() throws IOException, URISyntaxException { + // Disabled by default + AwsClient s3AsyncDefaultClient = getAwsClient(new Configuration(), true); + Assert.assertNotEquals( + S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, + getCredentialProviderName(s3AsyncDefaultClient)); + + AwsClient s3DefaultClient = getAwsClient(new Configuration(), true); + Assert.assertNotEquals( + S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, + getCredentialProviderName(s3DefaultClient)); + + // Disabled if explicitly set + AwsClient s3AsyncExplicitlyDisabledClient = getAwsClient(createConfig(false), true); + Assert.assertNotEquals( + S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, + getCredentialProviderName(s3AsyncExplicitlyDisabledClient)); + + AwsClient s3ExplicitlyDisabledClient = getAwsClient(createConfig(false), true); + Assert.assertNotEquals( + S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, + getCredentialProviderName(s3ExplicitlyDisabledClient)); + } + + private Configuration createConfig(boolean s3agEnabled) { + Configuration conf = new Configuration(); + conf.setBoolean(AWS_S3_ACCESS_GRANTS_ENABLED, s3agEnabled); + return conf; + } + + private String getCredentialProviderName(AwsClient awsClient) { + return awsClient.serviceClientConfiguration().credentialsProvider().getClass().getName(); + } + + private AwsClient getAwsClient(Configuration conf, boolean asyncClient) + throws IOException, URISyntaxException { + DefaultS3ClientFactory factory = new DefaultS3ClientFactory(); + factory.setConf(conf); + S3ClientFactory.S3ClientCreationParameters parameters = + new S3ClientFactory.S3ClientCreationParameters(); + URI uri = new URI("any-uri"); + return asyncClient ? + factory.createS3AsyncClient(uri, parameters): factory.createS3Client(uri, parameters); + } } From 67c0d847733770445c1de5673c828a2d48a4cea8 Mon Sep 17 00:00:00 2001 From: Adnan Hemani Date: Fri, 8 Mar 2024 02:24:34 -0800 Subject: [PATCH 4/5] PR Revision 2; Various review items --- .../org/apache/hadoop/fs/s3a/Constants.java | 4 +- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 17 +- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 10 + .../tools/hadoop-aws/authentication.md | 433 ++++++++++++++++++ .../site/markdown/tools/hadoop-aws/index.md | 415 +---------------- .../s3a/TestS3AccessGrantConfiguration.java | 95 ++-- 6 files changed, 510 insertions(+), 464 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/authentication.md diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 48a3d1e3e1b72..259e596d9940d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1632,7 +1632,7 @@ private Constants() { * and * https://github.com/aws/aws-s3-accessgrants-plugin-java-v2/ */ - public static final String AWS_S3_ACCESS_GRANTS_ENABLED = "fs.s3a.s3accessgrants.enabled"; + public static final String AWS_S3_ACCESS_GRANTS_ENABLED = "fs.s3a.access.grants.enabled"; /** * Flag {@value} to enable jobs fall back to the Job Execution IAM role in @@ -1640,5 +1640,5 @@ private Constants() { * https://github.com/aws/aws-s3-accessgrants-plugin-java-v2/ */ public static final String AWS_S3_ACCESS_GRANTS_FALLBACK_TO_IAM_ENABLED = - "fs.s3a.s3accessgrants.fallback.to.iam"; + "fs.s3a.access.grants.fallback.to.iam"; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index f253d1ce40194..db019b22585b5 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -97,11 +97,6 @@ public class DefaultS3ClientFactory extends Configured private static final LogExactlyOnce WARN_OF_DEFAULT_REGION_CHAIN = new LogExactlyOnce(LOG); - /** - * A one-off logger of S3 Access Grants usage and attachment status to the S3 client. - */ - private static final LogExactlyOnce LOG_S3AG_ENABLED = new LogExactlyOnce(LOG); - /** * Warning message printed when the SDK Region chain is in use. */ @@ -414,18 +409,20 @@ private static Region getS3RegionFromEndpoint(final String endpoint, private static , ClientT> void applyS3AccessGrantsConfigurations(BuilderT builder, Configuration conf) { - if (!conf.getBoolean(AWS_S3_ACCESS_GRANTS_ENABLED, false)){ - LOG_S3AG_ENABLED.debug("S3 Access Grants plugin is not enabled."); + boolean isS3AccessGrantsEnabled = conf.getBoolean(AWS_S3_ACCESS_GRANTS_ENABLED, false); + if (!isS3AccessGrantsEnabled){ + LOG.debug("S3 Access Grants plugin is not enabled."); return; } boolean isFallbackEnabled = conf.getBoolean(AWS_S3_ACCESS_GRANTS_FALLBACK_TO_IAM_ENABLED, false); S3AccessGrantsPlugin accessGrantsPlugin = - S3AccessGrantsPlugin.builder().enableFallback(isFallbackEnabled).build(); + S3AccessGrantsPlugin.builder() + .enableFallback(isFallbackEnabled) + .build(); builder.addPlugin(accessGrantsPlugin); - LOG_S3AG_ENABLED.info( - "S3 Access Grants plugin is enabled with IAM fallback set to {}", isFallbackEnabled); + LOG.info("S3 Access Grants plugin is enabled with IAM fallback set to {}", isFallbackEnabled); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 3aec03766dacf..df413ebcceab1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -494,6 +494,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, */ private String configuredRegion; + /** + * Is a S3 Access Grants Enabled? + */ + private boolean s3AccessGrantsEnabled; + /** Add any deprecated keys. */ @SuppressWarnings("deprecation") private static void addDeprecatedKeys() { @@ -747,6 +752,7 @@ public void initialize(URI name, Configuration originalConf) optimizedCopyFromLocal = conf.getBoolean(OPTIMIZED_COPY_FROM_LOCAL, OPTIMIZED_COPY_FROM_LOCAL_DEFAULT); LOG.debug("Using optimized copyFromLocal implementation: {}", optimizedCopyFromLocal); + s3AccessGrantsEnabled = conf.getBoolean(AWS_S3_ACCESS_GRANTS_ENABLED, false); } catch (SdkException e) { // amazon client exception: stop all services then throw the translation cleanupWithLogger(LOG, span); @@ -5516,6 +5522,10 @@ public boolean hasPathCapability(final Path path, final String capability) case FIPS_ENDPOINT: return fipsEnabled; + // is S3 Access Grants enabled + case AWS_S3_ACCESS_GRANTS_ENABLED: + return s3AccessGrantsEnabled; + default: return super.hasPathCapability(p, cap); } diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/authentication.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/authentication.md new file mode 100644 index 0000000000000..0451e8978f0ad --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/authentication.md @@ -0,0 +1,433 @@ + + +# Authenticating with S3 + + + +Except when interacting with public S3 buckets, the S3A client +needs the credentials needed to interact with buckets. + +The client supports multiple authentication mechanisms and can be configured as to +which mechanisms to use, and their order of use. Custom implementations +of `com.amazonaws.auth.AWSCredentialsProvider` may also be used. +However, with the upcoming upgrade to AWS Java SDK V2, these classes will need to be +updated to implement `software.amazon.awssdk.auth.credentials.AwsCredentialsProvider`. +For more information see [Upcoming upgrade to AWS Java SDK V2](./aws_sdk_upgrade.html). + +### Authentication properties + +```xml + + fs.s3a.access.key + AWS access key ID used by S3A file system. Omit for IAM role-based or provider-based authentication. + + + + fs.s3a.secret.key + AWS secret key used by S3A file system. Omit for IAM role-based or provider-based authentication. + + + + fs.s3a.session.token + Session token, when using org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider + as one of the providers. + + + + + fs.s3a.aws.credentials.provider + + org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider, + org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider, + software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider, + org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider + + + Comma-separated class names of credential provider classes which implement + software.amazon.awssdk.auth.credentials.AwsCredentialsProvider. + + When S3A delegation tokens are not enabled, this list will be used + to directly authenticate with S3 and other AWS services. + When S3A Delegation tokens are enabled, depending upon the delegation + token binding it may be used + to communicate wih the STS endpoint to request session/role + credentials. + + + + + fs.s3a.aws.credentials.provider.mapping + + Comma-separated key-value pairs of mapped credential providers that are + separated by equal operator (=). The key can be used by + fs.s3a.aws.credentials.provider config, and it will be translated into + the specified value of credential provider class based on the key-value + pair provided by this config. + + Example: + com.amazonaws.auth.AnonymousAWSCredentials=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider, + com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper=org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider, + com.amazonaws.auth.InstanceProfileCredentialsProvider=org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider + + With the above key-value pairs, if fs.s3a.aws.credentials.provider specifies + com.amazonaws.auth.AnonymousAWSCredentials, it will be remapped to + org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider by S3A while + preparing AWS credential provider list for any S3 access. + We can use the same credentials provider list for both v1 and v2 SDK clients. + + +``` + +### Authenticating via the AWS Environment Variables + +S3A supports configuration via [the standard AWS environment variables](http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html#cli-environment). + +The core environment variables are for the access key and associated secret: + +```bash +export AWS_ACCESS_KEY_ID=my.aws.key +export AWS_SECRET_ACCESS_KEY=my.secret.key +``` + +If the environment variable `AWS_SESSION_TOKEN` is set, session authentication +using "Temporary Security Credentials" is enabled; the Key ID and secret key +must be set to the credentials for that specific session. + +```bash +export AWS_SESSION_TOKEN=SECRET-SESSION-TOKEN +export AWS_ACCESS_KEY_ID=SESSION-ACCESS-KEY +export AWS_SECRET_ACCESS_KEY=SESSION-SECRET-KEY +``` + +These environment variables can be used to set the authentication credentials +instead of properties in the Hadoop configuration. + +*Important:* +These environment variables are generally not propagated from client to server when +YARN applications are launched. That is: having the AWS environment variables +set when an application is launched will not permit the launched application +to access S3 resources. The environment variables must (somehow) be set +on the hosts/processes where the work is executed. + +### Changing Authentication Providers + +The standard way to authenticate is with an access key and secret key set in +the Hadoop configuration files. + +By default, the S3A client follows the following authentication chain: + +1. The options `fs.s3a.access.key`, `fs.s3a.secret.key` and `fs.s3a.sesson.key` + are looked for in the Hadoop XML configuration/Hadoop credential providers, + returning a set of session credentials if all three are defined. +1. The `fs.s3a.access.key` and `fs.s3a.secret.key` are looked for in the Hadoop + XML configuration//Hadoop credential providers, returning a set of long-lived + credentials if they are defined. +1. The [AWS environment variables](http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html#cli-environment), + are then looked for: these will return session or full credentials depending + on which values are set. +1. An attempt is made to query the Amazon EC2 Instance/k8s container Metadata Service to + retrieve credentials published to EC2 VMs. + +S3A can be configured to obtain client authentication providers from classes +which integrate with the AWS SDK by implementing the +`software.amazon.awssdk.auth.credentials.AwsCredentialsProvider` +interface. +This is done by listing the implementation classes, in order of +preference, in the configuration option `fs.s3a.aws.credentials.provider`. +In previous hadoop releases, providers were required to +implement the AWS V1 SDK interface `com.amazonaws.auth.AWSCredentialsProvider`. +Consult the [Upgrading S3A to AWS SDK V2](./aws_sdk_upgrade.html) documentation +to see how to migrate credential providers. + +*Important*: AWS Credential Providers are distinct from _Hadoop Credential Providers_. +As will be covered later, Hadoop Credential Providers allow passwords and other secrets +to be stored and transferred more securely than in XML configuration files. +AWS Credential Providers are classes which can be used by the Amazon AWS SDK to +obtain an AWS login from a different source in the system, including environment +variables, JVM properties and configuration files. + +All Hadoop `fs.s3a.` options used to store login details can all be secured +in [Hadoop credential providers](../../../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html); +this is advised as a more secure way to store valuable secrets. + +There are a number of AWS Credential Providers inside the `hadoop-aws` JAR: + +| Hadoop module credential provider | Authentication Mechanism | +|----------------------------------------------------------------|--------------------------------------------------| +| `org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider` | Session Credentials in configuration | +| `org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider` | Simple name/secret credentials in configuration | +| `org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider` | Anonymous Login | +| `org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider` | [Assumed Role credentials](./assumed_roles.html) | +| `org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider` | EC2/k8s instance credentials | + + +There are also many in the Amazon SDKs, with the common ones being as follows + +| classname | description | +|----------------------------------------------------------------------------------|------------------------------| +| `software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider` | AWS Environment Variables | +| `software.amazon.awssdk.auth.credentials.InstanceProfileCredentialsProvider` | EC2 Metadata Credentials | +| `software.amazon.awssdk.auth.credentials.ContainerCredentialsProvider` | EC2/k8s Metadata Credentials | + + + +### EC2 IAM Metadata Authentication with `InstanceProfileCredentialsProvider` + +Applications running in EC2 may associate an IAM role with the VM and query the +[EC2 Instance Metadata Service](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html) +for credentials to access S3. Within the AWS SDK, this functionality is +provided by `InstanceProfileCredentialsProvider`, which internally enforces a +singleton instance in order to prevent throttling problem. + +### Using Named Profile Credentials with `ProfileCredentialsProvider` + +You can configure Hadoop to authenticate to AWS using a [named profile](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-profiles.html). + +To authenticate with a named profile: + +1. Declare `software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider` as the provider. +1. Set your profile via the `AWS_PROFILE` environment variable. +1. Due to a [bug in version 1 of the AWS Java SDK](https://github.com/aws/aws-sdk-java/issues/803), + you'll need to remove the `profile` prefix from the AWS configuration section heading. + + Here's an example of what your AWS configuration files should look like: + + ``` + $ cat ~/.aws/config + [user1] + region = us-east-1 + $ cat ~/.aws/credentials + [user1] + aws_access_key_id = ... + aws_secret_access_key = ... + aws_session_token = ... + aws_security_token = ... + ``` +Note: + +1. The `region` setting is only used if `fs.s3a.endpoint.region` is set to the empty string. +1. For the credentials to be available to applications running in a Hadoop cluster, the + configuration files MUST be in the `~/.aws/` directory on the local filesystem in + all hosts in the cluster. + +### Using Session Credentials with `TemporaryAWSCredentialsProvider` + +[Temporary Security Credentials](http://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp.html) +can be obtained from the Amazon Security Token Service; these +consist of an access key, a secret key, and a session token. + +To authenticate with these: + +1. Declare `org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider` as the + provider. +1. Set the session key in the property `fs.s3a.session.token`, + and the access and secret key properties to those of this temporary session. + +Example: + +```xml + + fs.s3a.aws.credentials.provider + org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider + + + + fs.s3a.access.key + SESSION-ACCESS-KEY + + + + fs.s3a.secret.key + SESSION-SECRET-KEY + + + + fs.s3a.session.token + SECRET-SESSION-TOKEN + +``` + +The lifetime of session credentials are fixed when the credentials +are issued; once they expire the application will no longer be able to +authenticate to AWS. + +### Anonymous Login with `AnonymousAWSCredentialsProvider` + +Specifying `org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider` allows +anonymous access to a publicly accessible S3 bucket without any credentials. +It can be useful for accessing public data sets without requiring AWS credentials. + +```xml + + fs.s3a.aws.credentials.provider + org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider + +``` + +Once this is done, there's no need to supply any credentials +in the Hadoop configuration or via environment variables. + +This option can be used to verify that an object store does +not permit unauthenticated access: that is, if an attempt to list +a bucket is made using the anonymous credentials, it should fail —unless +explicitly opened up for broader access. + +```bash +hadoop fs -ls \ + -D fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider \ + s3a://noaa-isd-pds/ +``` + +1. Allowing anonymous access to an S3 bucket compromises + security and therefore is unsuitable for most use cases. + +1. If a list of credential providers is given in `fs.s3a.aws.credentials.provider`, + then the Anonymous Credential provider *must* come last. If not, credential + providers listed after it will be ignored. + +### Simple name/secret credentials with `SimpleAWSCredentialsProvider`* + +This is the standard credential provider, which supports the secret +key in `fs.s3a.access.key` and token in `fs.s3a.secret.key` +values. + +```xml + + fs.s3a.aws.credentials.provider + org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider + +``` + +This is the basic authenticator used in the default authentication chain. + +This means that the default S3A authentication chain can be defined as + +```xml + + fs.s3a.aws.credentials.provider + + org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider, + org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider, + software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider + org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider + + +``` + +## Protecting the AWS Credentials + +It is critical that you never share or leak your AWS credentials. +Loss of credentials can leak/lose all your data, run up large bills, +and significantly damage your organisation. + +1. Never share your secrets. + +1. Never commit your secrets into an SCM repository. + The [git secrets](https://github.com/awslabs/git-secrets) can help here. + +1. Never include AWS credentials in bug reports, files attached to them, + or similar. + +1. If you use the `AWS_` environment variables, your list of environment variables + is equally sensitive. + +1. Never use root credentials. + Use IAM user accounts, with each user/application having its own set of credentials. + +1. Use IAM permissions to restrict the permissions individual users and applications + have. This is best done through roles, rather than configuring individual users. + +1. Avoid passing in secrets to Hadoop applications/commands on the command line. + The command line of any launched program is visible to all users on a Unix system + (via `ps`), and preserved in command histories. + +1. Explore using [IAM Assumed Roles](assumed_roles.html) for role-based permissions + management: a specific S3A connection can be made with a different assumed role + and permissions from the primary user account. + +1. Consider a workflow in which users and applications are issued with short-lived + session credentials, configuring S3A to use these through + the `TemporaryAWSCredentialsProvider`. + +1. Have a secure process in place for cancelling and re-issuing credentials for + users and applications. Test it regularly by using it to refresh credentials. + +1. In installations where Kerberos is enabled, [S3A Delegation Tokens](delegation_tokens.html) + can be used to acquire short-lived session/role credentials and then pass them + into the shared application. This can ensure that the long-lived secrets stay + on the local system. + +When running in EC2, the IAM EC2 instance credential provider will automatically +obtain the credentials needed to access AWS services in the role the EC2 VM +was deployed as. +This AWS credential provider is enabled in S3A by default. + +## Custom AWS Credential Providers and Apache Spark + +Apache Spark employs two class loaders, one that loads "distribution" (Spark + Hadoop) classes and one that +loads custom user classes. If the user wants to load custom implementations of AWS credential providers, +custom signers, delegation token providers or any other dynamically loaded extension class +through user provided jars she will need to set the following configuration: + +```xml + + fs.s3a.classloader.isolation + false + + + fs.s3a.aws.credentials.provider + CustomCredentialsProvider + +``` + +If the following property is not set or set to `true`, the following exception will be thrown: + +``` +java.io.IOException: From option fs.s3a.aws.credentials.provider java.lang.ClassNotFoundException: Class CustomCredentialsProvider not found +``` + +## S3 Authorization Using S3 Access Grants + +[S3 Access Grants](https://aws.amazon.com/s3/features/access-grants/) can be used to grant accesses to S3 data using IAM Principals. +In order to enable S3 Access Grants, S3A utilizes the +[S3 Access Grants plugin](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2) on all S3 clients, +which is found within the AWS Java SDK bundle (v2.23.19+). + +S3A supports both cross-region access (by default) and the +[fallback-to-IAM configuration](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2?tab=readme-ov-file#using-the-plugin) +which allows S3A to fallback to using the IAM role (and its permission sets directly) to access S3 data in the case that S3 Access Grants +is unable to authorize the S3 call. + +The following is how to enable this feature: + +```xml + + fs.s3a.s3accessgrants.enabled + true + + + + fs.s3a.s3accessgrants.fallback.to.iam + true + +``` + +Note: +1. S3A only enables the [S3 Access Grants plugin](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2) on the S3 clients +as part of this feature. Any usage issues or bug reporting should be done directly at the plugin's +[GitHub repo](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2/issues). + +For more details on using S3 Access Grants, please refer to +[Managing access with S3 Access Grants](https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-grants.html). diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 2b50516f3a2aa..7412a4cebcc4f 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -232,420 +232,7 @@ Also, please check [S3 endpoint and region settings in detail](connecting.html#s ## Authenticating with S3 -Except when interacting with public S3 buckets, the S3A client -needs the credentials needed to interact with buckets. - -The client supports multiple authentication mechanisms and can be configured as to -which mechanisms to use, and their order of use. Custom implementations -of `com.amazonaws.auth.AWSCredentialsProvider` may also be used. -However, with the upcoming upgrade to AWS Java SDK V2, these classes will need to be -updated to implement `software.amazon.awssdk.auth.credentials.AwsCredentialsProvider`. -For more information see [Upcoming upgrade to AWS Java SDK V2](./aws_sdk_upgrade.html). - -### Authentication properties - -```xml - - fs.s3a.access.key - AWS access key ID used by S3A file system. Omit for IAM role-based or provider-based authentication. - - - - fs.s3a.secret.key - AWS secret key used by S3A file system. Omit for IAM role-based or provider-based authentication. - - - - fs.s3a.session.token - Session token, when using org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider - as one of the providers. - - - - - fs.s3a.aws.credentials.provider - - org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider, - org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider, - software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider, - org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider - - - Comma-separated class names of credential provider classes which implement - software.amazon.awssdk.auth.credentials.AwsCredentialsProvider. - - When S3A delegation tokens are not enabled, this list will be used - to directly authenticate with S3 and other AWS services. - When S3A Delegation tokens are enabled, depending upon the delegation - token binding it may be used - to communicate wih the STS endpoint to request session/role - credentials. - - - - - fs.s3a.aws.credentials.provider.mapping - - Comma-separated key-value pairs of mapped credential providers that are - separated by equal operator (=). The key can be used by - fs.s3a.aws.credentials.provider config, and it will be translated into - the specified value of credential provider class based on the key-value - pair provided by this config. - - Example: - com.amazonaws.auth.AnonymousAWSCredentials=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider, - com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper=org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider, - com.amazonaws.auth.InstanceProfileCredentialsProvider=org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider - - With the above key-value pairs, if fs.s3a.aws.credentials.provider specifies - com.amazonaws.auth.AnonymousAWSCredentials, it will be remapped to - org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider by S3A while - preparing AWS credential provider list for any S3 access. - We can use the same credentials provider list for both v1 and v2 SDK clients. - - -``` - -### Authenticating via the AWS Environment Variables - -S3A supports configuration via [the standard AWS environment variables](http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html#cli-environment). - -The core environment variables are for the access key and associated secret: - -```bash -export AWS_ACCESS_KEY_ID=my.aws.key -export AWS_SECRET_ACCESS_KEY=my.secret.key -``` - -If the environment variable `AWS_SESSION_TOKEN` is set, session authentication -using "Temporary Security Credentials" is enabled; the Key ID and secret key -must be set to the credentials for that specific session. - -```bash -export AWS_SESSION_TOKEN=SECRET-SESSION-TOKEN -export AWS_ACCESS_KEY_ID=SESSION-ACCESS-KEY -export AWS_SECRET_ACCESS_KEY=SESSION-SECRET-KEY -``` - -These environment variables can be used to set the authentication credentials -instead of properties in the Hadoop configuration. - -*Important:* -These environment variables are generally not propagated from client to server when -YARN applications are launched. That is: having the AWS environment variables -set when an application is launched will not permit the launched application -to access S3 resources. The environment variables must (somehow) be set -on the hosts/processes where the work is executed. - -### Changing Authentication Providers - -The standard way to authenticate is with an access key and secret key set in -the Hadoop configuration files. - -By default, the S3A client follows the following authentication chain: - -1. The options `fs.s3a.access.key`, `fs.s3a.secret.key` and `fs.s3a.sesson.key` -are looked for in the Hadoop XML configuration/Hadoop credential providers, -returning a set of session credentials if all three are defined. -1. The `fs.s3a.access.key` and `fs.s3a.secret.key` are looked for in the Hadoop -XML configuration//Hadoop credential providers, returning a set of long-lived -credentials if they are defined. -1. The [AWS environment variables](http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html#cli-environment), -are then looked for: these will return session or full credentials depending -on which values are set. -1. An attempt is made to query the Amazon EC2 Instance/k8s container Metadata Service to - retrieve credentials published to EC2 VMs. - -S3A can be configured to obtain client authentication providers from classes -which integrate with the AWS SDK by implementing the -`software.amazon.awssdk.auth.credentials.AwsCredentialsProvider` -interface. -This is done by listing the implementation classes, in order of -preference, in the configuration option `fs.s3a.aws.credentials.provider`. -In previous hadoop releases, providers were required to -implement the AWS V1 SDK interface `com.amazonaws.auth.AWSCredentialsProvider`. -Consult the [Upgrading S3A to AWS SDK V2](./aws_sdk_upgrade.html) documentation -to see how to migrate credential providers. - -*Important*: AWS Credential Providers are distinct from _Hadoop Credential Providers_. -As will be covered later, Hadoop Credential Providers allow passwords and other secrets -to be stored and transferred more securely than in XML configuration files. -AWS Credential Providers are classes which can be used by the Amazon AWS SDK to -obtain an AWS login from a different source in the system, including environment -variables, JVM properties and configuration files. - -All Hadoop `fs.s3a.` options used to store login details can all be secured -in [Hadoop credential providers](../../../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html); -this is advised as a more secure way to store valuable secrets. - -There are a number of AWS Credential Providers inside the `hadoop-aws` JAR: - -| Hadoop module credential provider | Authentication Mechanism | -|----------------------------------------------------------------|--------------------------------------------------| -| `org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider` | Session Credentials in configuration | -| `org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider` | Simple name/secret credentials in configuration | -| `org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider` | Anonymous Login | -| `org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider` | [Assumed Role credentials](./assumed_roles.html) | -| `org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider` | EC2/k8s instance credentials | - - -There are also many in the Amazon SDKs, with the common ones being as follows - -| classname | description | -|----------------------------------------------------------------------------------|------------------------------| -| `software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider` | AWS Environment Variables | -| `software.amazon.awssdk.auth.credentials.InstanceProfileCredentialsProvider` | EC2 Metadata Credentials | -| `software.amazon.awssdk.auth.credentials.ContainerCredentialsProvider` | EC2/k8s Metadata Credentials | - - - -### EC2 IAM Metadata Authentication with `InstanceProfileCredentialsProvider` - -Applications running in EC2 may associate an IAM role with the VM and query the -[EC2 Instance Metadata Service](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html) -for credentials to access S3. Within the AWS SDK, this functionality is -provided by `InstanceProfileCredentialsProvider`, which internally enforces a -singleton instance in order to prevent throttling problem. - -### Using Named Profile Credentials with `ProfileCredentialsProvider` - -You can configure Hadoop to authenticate to AWS using a [named profile](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-profiles.html). - -To authenticate with a named profile: - -1. Declare `software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider` as the provider. -1. Set your profile via the `AWS_PROFILE` environment variable. -1. Due to a [bug in version 1 of the AWS Java SDK](https://github.com/aws/aws-sdk-java/issues/803), -you'll need to remove the `profile` prefix from the AWS configuration section heading. - - Here's an example of what your AWS configuration files should look like: - - ``` - $ cat ~/.aws/config - [user1] - region = us-east-1 - $ cat ~/.aws/credentials - [user1] - aws_access_key_id = ... - aws_secret_access_key = ... - aws_session_token = ... - aws_security_token = ... - ``` -Note: - -1. The `region` setting is only used if `fs.s3a.endpoint.region` is set to the empty string. -1. For the credentials to be available to applications running in a Hadoop cluster, the - configuration files MUST be in the `~/.aws/` directory on the local filesystem in - all hosts in the cluster. - -### Using Session Credentials with `TemporaryAWSCredentialsProvider` - -[Temporary Security Credentials](http://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp.html) -can be obtained from the Amazon Security Token Service; these -consist of an access key, a secret key, and a session token. - -To authenticate with these: - -1. Declare `org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider` as the -provider. -1. Set the session key in the property `fs.s3a.session.token`, -and the access and secret key properties to those of this temporary session. - -Example: - -```xml - - fs.s3a.aws.credentials.provider - org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider - - - - fs.s3a.access.key - SESSION-ACCESS-KEY - - - - fs.s3a.secret.key - SESSION-SECRET-KEY - - - - fs.s3a.session.token - SECRET-SESSION-TOKEN - -``` - -The lifetime of session credentials are fixed when the credentials -are issued; once they expire the application will no longer be able to -authenticate to AWS. - -### Anonymous Login with `AnonymousAWSCredentialsProvider` - -Specifying `org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider` allows -anonymous access to a publicly accessible S3 bucket without any credentials. -It can be useful for accessing public data sets without requiring AWS credentials. - -```xml - - fs.s3a.aws.credentials.provider - org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider - -``` - -Once this is done, there's no need to supply any credentials -in the Hadoop configuration or via environment variables. - -This option can be used to verify that an object store does -not permit unauthenticated access: that is, if an attempt to list -a bucket is made using the anonymous credentials, it should fail —unless -explicitly opened up for broader access. - -```bash -hadoop fs -ls \ - -D fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider \ - s3a://noaa-isd-pds/ -``` - -1. Allowing anonymous access to an S3 bucket compromises -security and therefore is unsuitable for most use cases. - -1. If a list of credential providers is given in `fs.s3a.aws.credentials.provider`, -then the Anonymous Credential provider *must* come last. If not, credential -providers listed after it will be ignored. - -### Simple name/secret credentials with `SimpleAWSCredentialsProvider`* - -This is the standard credential provider, which supports the secret -key in `fs.s3a.access.key` and token in `fs.s3a.secret.key` -values. - -```xml - - fs.s3a.aws.credentials.provider - org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider - -``` - -This is the basic authenticator used in the default authentication chain. - -This means that the default S3A authentication chain can be defined as - -```xml - - fs.s3a.aws.credentials.provider - - org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider, - org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider, - software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider - org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider - - -``` - -## Protecting the AWS Credentials - -It is critical that you never share or leak your AWS credentials. -Loss of credentials can leak/lose all your data, run up large bills, -and significantly damage your organisation. - -1. Never share your secrets. - -1. Never commit your secrets into an SCM repository. -The [git secrets](https://github.com/awslabs/git-secrets) can help here. - -1. Never include AWS credentials in bug reports, files attached to them, -or similar. - -1. If you use the `AWS_` environment variables, your list of environment variables -is equally sensitive. - -1. Never use root credentials. -Use IAM user accounts, with each user/application having its own set of credentials. - -1. Use IAM permissions to restrict the permissions individual users and applications -have. This is best done through roles, rather than configuring individual users. - -1. Avoid passing in secrets to Hadoop applications/commands on the command line. -The command line of any launched program is visible to all users on a Unix system -(via `ps`), and preserved in command histories. - -1. Explore using [IAM Assumed Roles](assumed_roles.html) for role-based permissions -management: a specific S3A connection can be made with a different assumed role -and permissions from the primary user account. - -1. Consider a workflow in which users and applications are issued with short-lived -session credentials, configuring S3A to use these through -the `TemporaryAWSCredentialsProvider`. - -1. Have a secure process in place for cancelling and re-issuing credentials for -users and applications. Test it regularly by using it to refresh credentials. - -1. In installations where Kerberos is enabled, [S3A Delegation Tokens](delegation_tokens.html) -can be used to acquire short-lived session/role credentials and then pass them -into the shared application. This can ensure that the long-lived secrets stay -on the local system. - -When running in EC2, the IAM EC2 instance credential provider will automatically -obtain the credentials needed to access AWS services in the role the EC2 VM -was deployed as. -This AWS credential provider is enabled in S3A by default. - -## Custom AWS Credential Providers and Apache Spark - -Apache Spark employs two class loaders, one that loads "distribution" (Spark + Hadoop) classes and one that -loads custom user classes. If the user wants to load custom implementations of AWS credential providers, -custom signers, delegation token providers or any other dynamically loaded extension class -through user provided jars she will need to set the following configuration: - -```xml - - fs.s3a.classloader.isolation - false - - - fs.s3a.aws.credentials.provider - CustomCredentialsProvider - -``` - -If the following property is not set or set to `true`, the following exception will be thrown: - -``` -java.io.IOException: From option fs.s3a.aws.credentials.provider java.lang.ClassNotFoundException: Class CustomCredentialsProvider not found -``` - -## S3 Authorization Using S3 Access Grants - -[S3 Access Grants](https://aws.amazon.com/s3/features/access-grants/) can be used to grant accesses to S3 data using IAM Principals. -In order to enable S3 Access Grants, S3A utilizes the -[S3 Access Grants plugin](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2) on all S3 clients, -which is found within the AWS Java SDK bundle (v2.23.19+). - -S3A supports both cross-region access (by default) and the -[fallback-to-IAM configuration](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2?tab=readme-ov-file#using-the-plugin) -which allows S3A to fallback to using the IAM role (and its permission sets directly) to access S3 data in the case that S3 Access Grants -is unable to authorize the S3 call. - -The following is how this feature can be enabled: - -```xml - - fs.s3a.s3accessgrants.enabled - true - - - - fs.s3a.s3accessgrants.fallback.to.iam - true - -``` - -Please note that S3A only enables the [S3 Access Grants plugin](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2) on the S3 clients -as part of this feature. Any usage issues or bug reporting should be done directly at the plugin's -[GitHub repo](https://github.com/aws/aws-s3-accessgrants-plugin-java-v2/issues). - -For more details on using S3 Access Grants, please refer to -[Managing access with S3 Access Grants](https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-grants.html). +See [Authenticating with S3](authentication.md). ## Storing secrets with Hadoop Credential Providers diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java index ccc8438858cbb..e403cbbb099e3 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java @@ -18,17 +18,18 @@ package org.apache.hadoop.fs.s3a; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.test.AbstractHadoopTestBase; -import org.junit.Assert; -import org.junit.Test; +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import org.assertj.core.api.AbstractStringAssert; +import org.assertj.core.api.Assertions; +import org.junit.Test; import software.amazon.awssdk.awscore.AwsClient; import software.amazon.awssdk.s3accessgrants.plugin.S3AccessGrantsIdentityProvider; -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.test.AbstractHadoopTestBase; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESS_GRANTS_ENABLED; @@ -47,41 +48,44 @@ public class TestS3AccessGrantConfiguration extends AbstractHadoopTestBase { @Test public void testS3AccessGrantsEnabled() throws IOException, URISyntaxException { - // Feature is explicitly enabled - AwsClient s3AsyncClient = getAwsClient(createConfig(true), true); - Assert.assertEquals( - S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, - getCredentialProviderName(s3AsyncClient)); - - AwsClient s3Client = getAwsClient(createConfig(true), false); - Assert.assertEquals( - S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, - getCredentialProviderName(s3Client)); + assertCredentialProviderClass( + createConfig(true), + true, + "S3 Access Grants is explicitly enabled on an S3 Async Client", + true); + + assertCredentialProviderClass( + createConfig(true), + false, + "S3 Access Grants is explicitly enabled on an S3 Non-Async Client", + true); } @Test public void testS3AccessGrantsDisabled() throws IOException, URISyntaxException { - // Disabled by default - AwsClient s3AsyncDefaultClient = getAwsClient(new Configuration(), true); - Assert.assertNotEquals( - S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, - getCredentialProviderName(s3AsyncDefaultClient)); - - AwsClient s3DefaultClient = getAwsClient(new Configuration(), true); - Assert.assertNotEquals( - S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, - getCredentialProviderName(s3DefaultClient)); - - // Disabled if explicitly set - AwsClient s3AsyncExplicitlyDisabledClient = getAwsClient(createConfig(false), true); - Assert.assertNotEquals( - S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, - getCredentialProviderName(s3AsyncExplicitlyDisabledClient)); - - AwsClient s3ExplicitlyDisabledClient = getAwsClient(createConfig(false), true); - Assert.assertNotEquals( - S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS, - getCredentialProviderName(s3ExplicitlyDisabledClient)); + assertCredentialProviderClass( + new Configuration(), + true, + "S3 Access Grants is implicitly disabled (default behavior) on an S3 Async Client", + false); + + assertCredentialProviderClass( + new Configuration(), + false, + "S3 Access Grants is implicitly disabled (default behavior) on an S3 Non-Async Client", + false); + + assertCredentialProviderClass( + createConfig(false), + true, + "S3 Access Grants is explicitly disabled on an S3 Async Client", + false); + + assertCredentialProviderClass( + createConfig(false), + false, + "S3 Access Grants is explicitly disabled on an S3 Non-Async Client", + false); } private Configuration createConfig(boolean s3agEnabled) { @@ -104,4 +108,19 @@ private AwsClient getAwsClient(Configuration conf, boolean asyncClient) return asyncClient ? factory.createS3AsyncClient(uri, parameters): factory.createS3Client(uri, parameters); } + + private void assertCredentialProviderClass( + Configuration configuration, boolean asyncClient, String message, boolean shouldMatch) + throws IOException, URISyntaxException { + AwsClient awsClient = getAwsClient(configuration, asyncClient); + AbstractStringAssert assertion = + Assertions + .assertThat(S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS) + .describedAs(message); + if (shouldMatch) { + assertion.isEqualTo(getCredentialProviderName(awsClient)); + } else { + assertion.isNotEqualTo(getCredentialProviderName(awsClient)); + } + } } From 3cfb18e9d733ddabd41e882193de4190dd6620d2 Mon Sep 17 00:00:00 2001 From: Adnan Hemani Date: Wed, 13 Mar 2024 11:49:13 -0700 Subject: [PATCH 5/5] PR Revision 2 --- .../apache/hadoop/fs/s3a/DefaultS3ClientFactory.java | 12 +++++++++--- .../java/org/apache/hadoop/fs/s3a/S3AFileSystem.java | 2 +- .../apache/hadoop/fs/s3a/impl/InternalConstants.java | 2 ++ .../site/markdown/tools/hadoop-aws/authentication.md | 2 +- .../fs/s3a/TestS3AccessGrantConfiguration.java | 2 +- 5 files changed, 14 insertions(+), 6 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index db019b22585b5..ba9fc080c2c51 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -115,6 +115,11 @@ public class DefaultS3ClientFactory extends Configured public static final String ERROR_ENDPOINT_WITH_FIPS = "Non central endpoint cannot be set when " + FIPS_ENDPOINT + " is true"; + /** + * A one-off log stating whether S3 Access Grants are enabled. + */ + private static final LogExactlyOnce LOG_S3AG_ENABLED = new LogExactlyOnce(LOG); + @Override public S3Client createS3Client( final URI uri, @@ -181,7 +186,7 @@ private , ClientT> Build configureEndpointAndRegion(builder, parameters, conf); - applyS3AccessGrantsConfigurations(builder, conf); + maybeApplyS3AccessGrantsConfigurations(builder, conf); S3Configuration serviceConfiguration = S3Configuration.builder() .pathStyleAccessEnabled(parameters.isPathStyleAccess()) @@ -408,7 +413,7 @@ private static Region getS3RegionFromEndpoint(final String endpoint, } private static , ClientT> void - applyS3AccessGrantsConfigurations(BuilderT builder, Configuration conf) { + maybeApplyS3AccessGrantsConfigurations(BuilderT builder, Configuration conf) { boolean isS3AccessGrantsEnabled = conf.getBoolean(AWS_S3_ACCESS_GRANTS_ENABLED, false); if (!isS3AccessGrantsEnabled){ LOG.debug("S3 Access Grants plugin is not enabled."); @@ -422,7 +427,8 @@ private static Region getS3RegionFromEndpoint(final String endpoint, .enableFallback(isFallbackEnabled) .build(); builder.addPlugin(accessGrantsPlugin); - LOG.info("S3 Access Grants plugin is enabled with IAM fallback set to {}", isFallbackEnabled); + LOG_S3AG_ENABLED.info( + "S3 Access Grants plugin is enabled with IAM fallback set to {}", isFallbackEnabled); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index df413ebcceab1..a404fc1c21732 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -495,7 +495,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, private String configuredRegion; /** - * Is a S3 Access Grants Enabled? + * Are S3 Access Grants Enabled? */ private boolean s3AccessGrantsEnabled; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java index 1d12a41008b6b..7d23c10d8b5c8 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java @@ -36,6 +36,7 @@ import static org.apache.hadoop.fs.CommonPathCapabilities.FS_CHECKSUMS; import static org.apache.hadoop.fs.CommonPathCapabilities.FS_MULTIPART_UPLOADER; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_STANDARD_OPTIONS; +import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESS_GRANTS_ENABLED; import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_OPERATIONS_PURGE_UPLOADS; import static org.apache.hadoop.fs.s3a.Constants.ENABLE_MULTI_DELETE; import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT; @@ -272,6 +273,7 @@ private InternalConstants() { FS_MULTIPART_UPLOADER, DIRECTORY_LISTING_INCONSISTENT, FIPS_ENDPOINT, + AWS_S3_ACCESS_GRANTS_ENABLED, // s3 specific STORE_CAPABILITY_AWS_V2, diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/authentication.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/authentication.md index 0451e8978f0ad..8f22aa9df452e 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/authentication.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/authentication.md @@ -22,7 +22,7 @@ needs the credentials needed to interact with buckets. The client supports multiple authentication mechanisms and can be configured as to which mechanisms to use, and their order of use. Custom implementations of `com.amazonaws.auth.AWSCredentialsProvider` may also be used. -However, with the upcoming upgrade to AWS Java SDK V2, these classes will need to be +However, with the upgrade to AWS Java SDK V2 in Hadoop 3.4.0, these classes will need to be updated to implement `software.amazon.awssdk.auth.credentials.AwsCredentialsProvider`. For more information see [Upcoming upgrade to AWS Java SDK V2](./aws_sdk_upgrade.html). diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java index e403cbbb099e3..7199aac061c17 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AccessGrantConfiguration.java @@ -41,7 +41,7 @@ public class TestS3AccessGrantConfiguration extends AbstractHadoopTestBase { /** * This credential provider will be attached to any client * that has been configured with the S3 Access Grants plugin. - * {@link software.amazon.awssdk.s3accessgrants.plugin.S3AccessGrantsPlugin}. + * {@code software.amazon.awssdk.s3accessgrants.plugin.S3AccessGrantsPlugin}. */ public static final String S3_ACCESS_GRANTS_EXPECTED_CREDENTIAL_PROVIDER_CLASS = S3AccessGrantsIdentityProvider.class.getName();