From 1c2e59c6423808c1da084d08f50128a85556cb7f Mon Sep 17 00:00:00 2001 From: Syed Shameerur Rahman Date: Tue, 24 Sep 2024 13:05:36 +0530 Subject: [PATCH 1/5] HADOOP-19286: Support S3A cross region access when S3 region/endpoint is set --- .../org/apache/hadoop/fs/s3a/Constants.java | 13 ++++++++ .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 14 +++++--- .../hadoop/fs/s3a/ITestS3AConfiguration.java | 5 +-- .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 32 +++++++++++++++++++ 4 files changed, 58 insertions(+), 6 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index e58ac02e33731..7282cdab2fd05 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1372,6 +1372,19 @@ private Constants() { */ public static final String XA_HEADER_PREFIX = "header."; + /** + * S3 cross region access enabled ? + * Value: {@value}. + */ + + public static final String AWS_S3_CROSS_REGION_ACCESS_ENABLED = + "fs.s3a.cross.region.access.enabled"; + /** + * Default value for S3 cross region access enabled: {@value}. + */ + public static final boolean AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT = true; + + /** * AWS S3 region for the bucket. When set bypasses the construction of * region through endpoint url. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index c52454ac15c81..b5e0afbf9e8e9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -58,6 +58,8 @@ import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESS_GRANTS_ENABLED; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESS_GRANTS_FALLBACK_TO_IAM_ENABLED; +import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED; +import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_DEFAULT_REGION; import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT; @@ -330,7 +332,6 @@ private , ClientT> void builder.endpointOverride(endpoint); LOG.debug("Setting endpoint to {}", endpoint); } else { - builder.crossRegionAccessEnabled(true); origin = "central endpoint with cross region access"; LOG.debug("Enabling cross region access for endpoint {}", endpointStr); @@ -343,7 +344,6 @@ private , ClientT> void // no region is configured, and none could be determined from the endpoint. // Use US_EAST_2 as default. region = Region.of(AWS_S3_DEFAULT_REGION); - builder.crossRegionAccessEnabled(true); builder.region(region); origin = "cross region access fallback"; } else if (configuredRegion.isEmpty()) { @@ -354,8 +354,14 @@ private , ClientT> void LOG.debug(SDK_REGION_CHAIN_IN_USE); origin = "SDK region chain"; } - - LOG.debug("Setting region to {} from {}", region, origin); + boolean isCrossRegionAccessEnabled = conf.getBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, + AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT); + // s3 cross region access + if (isCrossRegionAccessEnabled) { + builder.crossRegionAccessEnabled(true); + } + LOG.debug("Setting region to {} from {} with cross region access {}", + region, origin, isCrossRegionAccessEnabled); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index a3b994054e4d3..967ba885bc90f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -439,6 +439,7 @@ public void testCustomUserAgent() throws Exception { @Test public void testRequestTimeout() throws Exception { conf = new Configuration(); + skipIfCrossRegionClient(conf); // remove the safety check on minimum durations. AWSClientConfig.setMinimumOperationDuration(Duration.ZERO); try { @@ -632,8 +633,8 @@ public static boolean isSTSSignerCalled() { */ private static void skipIfCrossRegionClient( Configuration configuration) { - if (configuration.get(ENDPOINT, null) == null - && configuration.get(AWS_REGION, null) == null) { + if (configuration.getBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, + AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT)) { skip("Skipping test as cross region client is in use "); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index d06224df5b355..64d7ece05ba38 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -46,6 +46,7 @@ import static org.apache.hadoop.fs.s3a.Constants.ALLOW_REQUESTER_PAYS; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; +import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED; import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT; @@ -346,6 +347,37 @@ public void testCentralEndpointAndDifferentRegionThanBucket() throws Throwable { assertRequesterPaysFileExistence(newConf); } + @Test + public void testWithOutCrossRegionAccess() throws Exception { + describe("Verify cross region access fails when disabled"); + final Configuration newConf = new Configuration(getConfiguration()); + // skip the test if the region is eu-west-2 + String region = getFileSystem().getS3AInternals().getBucketMetadata().bucketRegion(); + if (EU_WEST_2.equals(region)) { + return; + } + // disable cross region access + newConf.setBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, false); + newConf.set(AWS_REGION, EU_WEST_2); + S3AFileSystem fs = new S3AFileSystem(); + fs.initialize(getFileSystem().getUri(), newConf); + intercept(AWSRedirectException.class, + "does not match the AWS region containing the bucket", + () -> fs.exists(getFileSystem().getWorkingDirectory())); + } + + @Test + public void testWithCrossRegionAccess() throws Exception { + describe("Verify cross region access succeed when enabled"); + final Configuration newConf = new Configuration(getConfiguration()); + // enable cross region access + newConf.setBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, true); + newConf.set(AWS_REGION, EU_WEST_2); + S3AFileSystem fs = new S3AFileSystem(); + fs.initialize(getFileSystem().getUri(), newConf); + fs.exists(getFileSystem().getWorkingDirectory()); + } + @Test public void testCentralEndpointAndSameRegionAsBucket() throws Throwable { describe("Access public bucket using central endpoint and region " From f5f0bc519597e43209ef64f4dea45f2b1e8ea837 Mon Sep 17 00:00:00 2001 From: Syed Shameerur Rahman Date: Wed, 25 Sep 2024 12:46:51 +0530 Subject: [PATCH 2/5] Change the cross region access to sa-east-1 --- .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 44 ++++++++++++------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index 64d7ece05ba38..5517d65f6bc8d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -44,6 +44,7 @@ import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext; import org.apache.hadoop.fs.s3a.test.PublicDatasetTestUtils; +import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; import static org.apache.hadoop.fs.s3a.Constants.ALLOW_REQUESTER_PAYS; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED; @@ -72,6 +73,8 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase { private static final String US_WEST_2 = "us-west-2"; + private static final String SA_EAST_1 = "sa-east-1"; + private static final String EU_WEST_2 = "eu-west-2"; private static final String CN_NORTHWEST_1 = "cn-northwest-1"; @@ -350,32 +353,33 @@ public void testCentralEndpointAndDifferentRegionThanBucket() throws Throwable { @Test public void testWithOutCrossRegionAccess() throws Exception { describe("Verify cross region access fails when disabled"); + // skip the test if the region is sa-east-1 + skipCrossRegionTest(); final Configuration newConf = new Configuration(getConfiguration()); - // skip the test if the region is eu-west-2 - String region = getFileSystem().getS3AInternals().getBucketMetadata().bucketRegion(); - if (EU_WEST_2.equals(region)) { - return; - } // disable cross region access newConf.setBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, false); - newConf.set(AWS_REGION, EU_WEST_2); - S3AFileSystem fs = new S3AFileSystem(); - fs.initialize(getFileSystem().getUri(), newConf); - intercept(AWSRedirectException.class, - "does not match the AWS region containing the bucket", - () -> fs.exists(getFileSystem().getWorkingDirectory())); + newConf.set(AWS_REGION, SA_EAST_1); + try (S3AFileSystem fs = new S3AFileSystem()) { + fs.initialize(getFileSystem().getUri(), newConf); + intercept(AWSRedirectException.class, + "does not match the AWS region containing the bucket", + () -> fs.exists(getFileSystem().getWorkingDirectory())); + } } @Test public void testWithCrossRegionAccess() throws Exception { describe("Verify cross region access succeed when enabled"); + // skip the test if the region is sa-east-1 + skipCrossRegionTest(); final Configuration newConf = new Configuration(getConfiguration()); // enable cross region access newConf.setBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, true); - newConf.set(AWS_REGION, EU_WEST_2); - S3AFileSystem fs = new S3AFileSystem(); - fs.initialize(getFileSystem().getUri(), newConf); - fs.exists(getFileSystem().getWorkingDirectory()); + newConf.set(AWS_REGION, SA_EAST_1); + try (S3AFileSystem fs = new S3AFileSystem()) { + fs.initialize(getFileSystem().getUri(), newConf); + fs.exists(getFileSystem().getWorkingDirectory()); + } } @Test @@ -510,6 +514,16 @@ public void testCentralEndpointAndNullRegionFipsWithCRUD() throws Throwable { assertOpsUsingNewFs(); } + /** + * Skip the test if the region is sa-east-1. + */ + private void skipCrossRegionTest() throws IOException { + String region = getFileSystem().getS3AInternals().getBucketMetadata().bucketRegion(); + if (SA_EAST_1.equals(region)) { + skip("Skipping test as sa-east-1 is in use"); + } + } + private void assertOpsUsingNewFs() throws IOException { final String file = getMethodName(); final Path basePath = methodPath(); From 2c541df5740b23f88e21e43b21db09f73f081850 Mon Sep 17 00:00:00 2001 From: Syed Shameerur Rahman Date: Mon, 30 Sep 2024 11:32:40 +0530 Subject: [PATCH 3/5] Skip the cross region tests for third party stores --- .../site/markdown/tools/hadoop-aws/third_party_stores.md | 2 +- .../org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java | 7 +++++-- .../java/org/apache/hadoop/fs/s3a/S3ATestConstants.java | 5 +++++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/third_party_stores.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/third_party_stores.md index 1018ec9e7d6c2..5f71bf30fde65 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/third_party_stores.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/third_party_stores.md @@ -82,7 +82,7 @@ then these must be set, either in XML or (preferred) in a JCEKS file. fs.s3a.endpoint.region - anything + non-aws diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index 5517d65f6bc8d..9bd2f29b8c977 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -373,6 +373,9 @@ public void testWithCrossRegionAccess() throws Exception { // skip the test if the region is sa-east-1 skipCrossRegionTest(); final Configuration newConf = new Configuration(getConfiguration()); + removeBaseAndBucketOverrides(newConf, + AWS_S3_CROSS_REGION_ACCESS_ENABLED, + AWS_REGION); // enable cross region access newConf.setBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED, true); newConf.set(AWS_REGION, SA_EAST_1); @@ -519,8 +522,8 @@ public void testCentralEndpointAndNullRegionFipsWithCRUD() throws Throwable { */ private void skipCrossRegionTest() throws IOException { String region = getFileSystem().getS3AInternals().getBucketMetadata().bucketRegion(); - if (SA_EAST_1.equals(region)) { - skip("Skipping test as sa-east-1 is in use"); + if (SA_EAST_1.equals(region) || NON_AWS_REGION.equals(region)) { + skip("Skipping test since sa-east-1 or non-aws region is in use"); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java index 9ab1768b2aba1..84707799270a7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java @@ -276,6 +276,11 @@ public interface S3ATestConstants { */ String EU_WEST_1 = "eu-west-1"; + /** + * Region name for testing S3A third party stores: {@value}. + */ + String NON_AWS_REGION = "non-aws"; + /** * System property for root tests being enabled: {@value}. */ From 1f41548d24788b1f5d2ecce30da0bd315961f73a Mon Sep 17 00:00:00 2001 From: Syed Shameerur Rahman Date: Tue, 1 Oct 2024 11:19:46 +0530 Subject: [PATCH 4/5] null check to skip the test and remove non-aws region probe --- .../site/markdown/tools/hadoop-aws/third_party_stores.md | 2 +- .../org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java | 6 +++--- .../java/org/apache/hadoop/fs/s3a/S3ATestConstants.java | 5 ----- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/third_party_stores.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/third_party_stores.md index 5f71bf30fde65..1018ec9e7d6c2 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/third_party_stores.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/third_party_stores.md @@ -82,7 +82,7 @@ then these must be set, either in XML or (preferred) in a JCEKS file. fs.s3a.endpoint.region - non-aws + anything diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index 9bd2f29b8c977..18cb985065f3f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -518,12 +518,12 @@ public void testCentralEndpointAndNullRegionFipsWithCRUD() throws Throwable { } /** - * Skip the test if the region is sa-east-1. + * Skip the test if the region is null or sa-east-1. */ private void skipCrossRegionTest() throws IOException { String region = getFileSystem().getS3AInternals().getBucketMetadata().bucketRegion(); - if (SA_EAST_1.equals(region) || NON_AWS_REGION.equals(region)) { - skip("Skipping test since sa-east-1 or non-aws region is in use"); + if (region == null || SA_EAST_1.equals(region)) { + skip("Skipping test since region is null or it is set to sa-east-1"); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java index 84707799270a7..9ab1768b2aba1 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java @@ -276,11 +276,6 @@ public interface S3ATestConstants { */ String EU_WEST_1 = "eu-west-1"; - /** - * Region name for testing S3A third party stores: {@value}. - */ - String NON_AWS_REGION = "non-aws"; - /** * System property for root tests being enabled: {@value}. */ From d3a4e61900497482e2f1b527dec1055ed5224fdf Mon Sep 17 00:00:00 2001 From: Syed Shameerur Rahman Date: Tue, 1 Oct 2024 19:46:15 +0530 Subject: [PATCH 5/5] Add documentation changes --- .../src/site/markdown/tools/hadoop-aws/connecting.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md index d39c480b7cc5a..6fa37750ded8c 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md @@ -48,6 +48,16 @@ There are multiple ways to connect to an S3 bucket The S3A connector supports all these; S3 Endpoints are the primary mechanism used -either explicitly declared or automatically determined from the declared region of the bucket. +The S3A connector supports S3 cross region access via AWS SDK which is enabled by default. This allows users to access S3 buckets in a different region than the one defined in the S3 endpoint/region configuration, as long as they are within the same AWS partition. However, S3 cross-region access can be disabled by: +```xml + + fs.s3a.cross.region.access.enabled + false + S3 cross region access + +``` + + Not supported: * AWS [Snowball](https://aws.amazon.com/snowball/).